#include #include #include #include #include #include #include namespace { template struct TNormalizedExamplesHolder { TVector Examples; TNormalizedExamplesHolder() : Examples(N) { TFastRng prng{sizeof(T) * N * 42u}; for (auto& x : Examples) { x = prng.GenRandReal4(); } } }; template struct TExamplesHolder { TVector Examples; TExamplesHolder() : Examples(N) { TFastRng prng{sizeof(T) * N * 42u + 100500u}; for (auto& x : Examples) { // operations with non-normalized floating point numbers are rumored to work slower x = prng.GenRandReal4() + prng.Uniform(1024u); } } }; } #define DEFINE_BENCHMARK(type, count) \ Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \ const auto& examples = Default>().Examples; \ for (const auto i : xrange(iface.Iterations())) { \ Y_UNUSED(i); \ Y_DO_NOT_OPTIMIZE_AWAY( \ (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ } \ } \ \ Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \ const auto& examples = Default>().Examples; \ for (const auto i : xrange(iface.Iterations())) { \ Y_UNUSED(i); \ Y_DO_NOT_OPTIMIZE_AWAY( \ (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator{})); \ } \ } \ \ Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \ const auto& examples = Default>().Examples; \ for (const auto i : xrange(iface.Iterations())) { \ Y_UNUSED(i); \ Y_DO_NOT_OPTIMIZE_AWAY( \ (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ } \ } \ \ Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \ const auto& examples = Default>().Examples; \ for (const auto i : xrange(iface.Iterations())) { \ Y_UNUSED(i); \ Y_DO_NOT_OPTIMIZE_AWAY( \ (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator{})); \ } \ } DEFINE_BENCHMARK(float, 2) DEFINE_BENCHMARK(float, 4) DEFINE_BENCHMARK(float, 8) DEFINE_BENCHMARK(float, 16) DEFINE_BENCHMARK(float, 32) DEFINE_BENCHMARK(float, 64) DEFINE_BENCHMARK(float, 128) DEFINE_BENCHMARK(float, 256) DEFINE_BENCHMARK(float, 512) DEFINE_BENCHMARK(float, 1024) DEFINE_BENCHMARK(double, 2) DEFINE_BENCHMARK(double, 4) DEFINE_BENCHMARK(double, 8) DEFINE_BENCHMARK(double, 16) DEFINE_BENCHMARK(double, 32) DEFINE_BENCHMARK(double, 64) DEFINE_BENCHMARK(double, 128) DEFINE_BENCHMARK(double, 256) DEFINE_BENCHMARK(double, 512) DEFINE_BENCHMARK(double, 1024) #undef DEFINE_BENCHMARK