main.cpp 5.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #include <library/cpp/accurate_accumulate/accurate_accumulate.h>
  2. #include <library/cpp/testing/benchmark/bench.h>
  3. #include <util/generic/algorithm.h>
  4. #include <util/generic/singleton.h>
  5. #include <util/generic/vector.h>
  6. #include <util/generic/xrange.h>
  7. #include <util/random/fast.h>
  8. namespace {
  9. template <typename T, size_t N>
  10. struct TNormalizedExamplesHolder {
  11. TVector<T> Examples;
  12. TNormalizedExamplesHolder()
  13. : Examples(N)
  14. {
  15. TFastRng<ui64> prng{sizeof(T) * N * 42u};
  16. for (auto& x : Examples) {
  17. x = prng.GenRandReal4();
  18. }
  19. }
  20. };
  21. template <typename T, size_t N>
  22. struct TExamplesHolder {
  23. TVector<T> Examples;
  24. TExamplesHolder()
  25. : Examples(N)
  26. {
  27. TFastRng<ui64> prng{sizeof(T) * N * 42u + 100500u};
  28. for (auto& x : Examples) {
  29. // operations with non-normalized floating point numbers are rumored to work slower
  30. x = prng.GenRandReal4() + prng.Uniform(1024u);
  31. }
  32. }
  33. };
  34. }
  35. #define DEFINE_BENCHMARK(type, count) \
  36. Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \
  37. const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \
  38. for (const auto i : xrange(iface.Iterations())) { \
  39. Y_UNUSED(i); \
  40. Y_DO_NOT_OPTIMIZE_AWAY( \
  41. (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \
  42. } \
  43. } \
  44. \
  45. Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \
  46. const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \
  47. for (const auto i : xrange(iface.Iterations())) { \
  48. Y_UNUSED(i); \
  49. Y_DO_NOT_OPTIMIZE_AWAY( \
  50. (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \
  51. } \
  52. } \
  53. \
  54. Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \
  55. const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
  56. for (const auto i : xrange(iface.Iterations())) { \
  57. Y_UNUSED(i); \
  58. Y_DO_NOT_OPTIMIZE_AWAY( \
  59. (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \
  60. } \
  61. } \
  62. \
  63. Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \
  64. const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
  65. for (const auto i : xrange(iface.Iterations())) { \
  66. Y_UNUSED(i); \
  67. Y_DO_NOT_OPTIMIZE_AWAY( \
  68. (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \
  69. } \
  70. }
  71. DEFINE_BENCHMARK(float, 2)
  72. DEFINE_BENCHMARK(float, 4)
  73. DEFINE_BENCHMARK(float, 8)
  74. DEFINE_BENCHMARK(float, 16)
  75. DEFINE_BENCHMARK(float, 32)
  76. DEFINE_BENCHMARK(float, 64)
  77. DEFINE_BENCHMARK(float, 128)
  78. DEFINE_BENCHMARK(float, 256)
  79. DEFINE_BENCHMARK(float, 512)
  80. DEFINE_BENCHMARK(float, 1024)
  81. DEFINE_BENCHMARK(double, 2)
  82. DEFINE_BENCHMARK(double, 4)
  83. DEFINE_BENCHMARK(double, 8)
  84. DEFINE_BENCHMARK(double, 16)
  85. DEFINE_BENCHMARK(double, 32)
  86. DEFINE_BENCHMARK(double, 64)
  87. DEFINE_BENCHMARK(double, 128)
  88. DEFINE_BENCHMARK(double, 256)
  89. DEFINE_BENCHMARK(double, 512)
  90. DEFINE_BENCHMARK(double, 1024)
  91. #undef DEFINE_BENCHMARK