main.cpp 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. #include <library/cpp/testing/benchmark/bench.h>
  2. #include <util/generic/xrange.h>
  3. #include <util/generic/algorithm.h>
  4. #include <util/generic/vector.h>
  5. #include <util/generic/yexception.h>
  6. #include <util/generic/bt_exception.h>
  7. Y_CPU_BENCHMARK(F, iface) {
  8. TVector<size_t> x;
  9. x.reserve(iface.Iterations());
  10. for (size_t i = 0; i < iface.Iterations(); ++i) {
  11. x.push_back(i);
  12. }
  13. }
  14. Y_CPU_BENCHMARK(EmptyF, iface) {
  15. (void)iface;
  16. }
  17. Y_CPU_BENCHMARK(AlmostEmptyF, iface) {
  18. (void)iface;
  19. TVector<size_t> x;
  20. x.resize(1);
  21. }
  22. Y_CPU_BENCHMARK(TestThrow, iface) {
  23. for (size_t i = 0; i < iface.Iterations(); ++i) {
  24. try {
  25. ythrow yexception() << i;
  26. } catch (...) {
  27. //CurrentExceptionMessage();
  28. }
  29. }
  30. }
  31. Y_CPU_BENCHMARK(TestThrowBT, iface) {
  32. for (size_t i = 0; i < iface.Iterations(); ++i) {
  33. try {
  34. ythrow TWithBackTrace<yexception>() << i;
  35. } catch (...) {
  36. //CurrentExceptionMessage();
  37. }
  38. }
  39. }
  40. Y_CPU_BENCHMARK(TestThrowCatch, iface) {
  41. for (size_t i = 0; i < iface.Iterations(); ++i) {
  42. try {
  43. ythrow yexception() << i;
  44. } catch (...) {
  45. Y_DO_NOT_OPTIMIZE_AWAY(CurrentExceptionMessage());
  46. }
  47. }
  48. }
  49. Y_CPU_BENCHMARK(TestThrowCatchBT, iface) {
  50. for (size_t i = 0; i < iface.Iterations(); ++i) {
  51. try {
  52. ythrow TWithBackTrace<yexception>() << i;
  53. } catch (...) {
  54. Y_DO_NOT_OPTIMIZE_AWAY(CurrentExceptionMessage());
  55. }
  56. }
  57. }
  58. Y_CPU_BENCHMARK(TestRobust, iface) {
  59. if (iface.Iterations() % 100 == 0) {
  60. usleep(100000);
  61. }
  62. }
  63. Y_CPU_BENCHMARK(IterationSpeed, iface) {
  64. const auto n = iface.Iterations();
  65. for (size_t i = 0; i < n; ++i) {
  66. Y_DO_NOT_OPTIMIZE_AWAY(i);
  67. }
  68. }
  69. Y_CPU_BENCHMARK(XRangeSpeed, iface) {
  70. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  71. Y_DO_NOT_OPTIMIZE_AWAY(i);
  72. }
  73. }
  74. Y_NO_INLINE int FFF() {
  75. return 0;
  76. }
  77. Y_NO_INLINE int FFF(int x) {
  78. return x;
  79. }
  80. Y_NO_INLINE int FFF(int x, int y) {
  81. return x + y;
  82. }
  83. Y_NO_INLINE size_t FS1(TStringBuf x) {
  84. return x.size();
  85. }
  86. Y_NO_INLINE size_t FS1_2(TStringBuf x, TStringBuf y) {
  87. return x.size() + y.size();
  88. }
  89. Y_NO_INLINE size_t FS2(const TStringBuf& x) {
  90. return x.size();
  91. }
  92. Y_NO_INLINE size_t FS2_2(const TStringBuf& x, const TStringBuf& y) {
  93. return x.size() + y.size();
  94. }
  95. Y_CPU_BENCHMARK(FunctionCallCost_StringBufVal1, iface) {
  96. TStringBuf x;
  97. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  98. (void)i;
  99. NBench::Escape(&x);
  100. Y_DO_NOT_OPTIMIZE_AWAY(FS1(x));
  101. NBench::Clobber();
  102. }
  103. }
  104. Y_CPU_BENCHMARK(FunctionCallCost_StringBufRef1, iface) {
  105. TStringBuf x;
  106. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  107. (void)i;
  108. NBench::Escape(&x);
  109. Y_DO_NOT_OPTIMIZE_AWAY(FS2(x));
  110. NBench::Clobber();
  111. }
  112. }
  113. Y_CPU_BENCHMARK(FunctionCallCost_StringBufVal2, iface) {
  114. TStringBuf x;
  115. TStringBuf y;
  116. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  117. (void)i;
  118. NBench::Escape(&x);
  119. NBench::Escape(&y);
  120. Y_DO_NOT_OPTIMIZE_AWAY(FS1_2(x, y));
  121. NBench::Clobber();
  122. }
  123. }
  124. Y_CPU_BENCHMARK(FunctionCallCost_StringBufRef2, iface) {
  125. TStringBuf x;
  126. TStringBuf y;
  127. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  128. (void)i;
  129. NBench::Escape(&x);
  130. NBench::Escape(&y);
  131. Y_DO_NOT_OPTIMIZE_AWAY(FS2_2(x, y));
  132. NBench::Clobber();
  133. }
  134. }
  135. Y_CPU_BENCHMARK(FunctionCallCost_NoArg, iface) {
  136. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  137. (void)i;
  138. Y_DO_NOT_OPTIMIZE_AWAY(FFF());
  139. }
  140. }
  141. Y_CPU_BENCHMARK(FunctionCallCost_OneArg, iface) {
  142. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  143. Y_DO_NOT_OPTIMIZE_AWAY(FFF(i));
  144. }
  145. }
  146. Y_CPU_BENCHMARK(FunctionCallCost_TwoArg, iface) {
  147. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  148. Y_DO_NOT_OPTIMIZE_AWAY(FFF(i, i));
  149. }
  150. }
  151. /* An example of incorrect benchmark. As of r2581591 Clang 3.7 produced following assembly:
  152. * @code
  153. * │ push %rbp
  154. * │ mov %rsp,%rbp
  155. * │ push %rbx
  156. * │ push %rax
  157. * │ mov (%rdi),%rbx
  158. * │ test %rbx,%rbx
  159. * │ ↓ je 25
  160. * │ xor %edi,%edi
  161. * │ xor %esi,%esi
  162. * │ → callq FS1(TBasicStringBuf<char, std::char_traits<char
  163. * │ nop
  164. * 100.00 │20:┌─→dec %rbx
  165. * │ └──jne 20
  166. * │25: add $0x8,%rsp
  167. * │ pop %rbx
  168. * │ pop %rbp
  169. * │ ← retq
  170. * @endcode
  171. *
  172. * So, this benchmark is measuring empty loop!
  173. */
  174. Y_CPU_BENCHMARK(Incorrect_FunctionCallCost_StringBufVal1, iface) {
  175. TStringBuf x;
  176. for (auto i : xrange<size_t>(0, iface.Iterations())) {
  177. (void)i;
  178. Y_DO_NOT_OPTIMIZE_AWAY(FS1(x));
  179. }
  180. }