#include #include #include #include #include #include #include #include namespace { // THash::operator() is not inlined Y_NO_INLINE size_t NaiveHash(TCaseInsensitiveStringBuf str) noexcept { TMurmurHash2A hash; for (size_t i = 0; i < str.size(); ++i) { char lower = std::tolower(str[i]); hash.Update(&lower, 1); } return hash.Value(); } [[maybe_unused]] Y_NO_INLINE size_t OptimizedHashV1(TCaseInsensitiveStringBuf str) noexcept { TMurmurHash2A hash; std::array buf; size_t headSize = str.size() - str.size() % buf.size(); for (size_t i = 0; i < headSize; i += buf.size()) { for (size_t j = 0; j < buf.size(); ++j) { buf[j] = std::tolower(str[i + j]); } hash.Update(buf.data(), buf.size()); } for (size_t i = headSize; i < str.size(); ++i) { char lower = std::tolower(str[i]); hash.Update(&lower, 1); } return hash.Value(); } [[maybe_unused]] Y_NO_INLINE size_t OptimizedHashDuplicateTailLoop(TCaseInsensitiveStringBuf str) noexcept { TMurmurHash2A hash; if (str.size() < sizeof(size_t)) { for (size_t i = 0; i < str.size(); ++i) { char lower = std::tolower(str[i]); hash.Update(&lower, 1); } return hash.Value(); } std::array buf; size_t headSize = str.size() - str.size() % buf.size(); for (size_t i = 0; i < headSize; i += buf.size()) { for (size_t j = 0; j < buf.size(); ++j) { buf[j] = std::tolower(str[i + j]); } hash.Update(buf.data(), buf.size()); } for (size_t i = headSize; i < str.size(); ++i) { char lower = std::tolower(str[i]); hash.Update(&lower, 1); } return hash.Value(); } size_t HashTail(TMurmurHash2A& hash, const char* data, size_t size) { for (size_t i = 0; i < size; ++i) { char lower = std::tolower(data[i]); hash.Update(&lower, 1); } return hash.Value(); } [[maybe_unused]] Y_NO_INLINE size_t OptimizedHashDuplicateTailLoopInFunc(TCaseInsensitiveStringBuf str) noexcept { TMurmurHash2A hash; if (str.size() < sizeof(size_t)) { return HashTail(hash, str.data(), str.size()); } std::array buf; size_t headSize = str.size() - str.size() % buf.size(); for (size_t i = 0; i < headSize; i += buf.size()) { for (size_t j = 0; j < buf.size(); ++j) { buf[j] = std::tolower(str[i + j]); } hash.Update(buf.data(), buf.size()); } return HashTail(hash, str.data() + headSize, str.size() - headSize); } // Currently shows the best performance, comparable with NaiveHash for short strings, only slower for empty strings. // Should be equivalent to OptimizedHashV1 after inlining HashTail, but for some reason it's a bit larger but at the same time faster than OptimizedHashV1. [[maybe_unused]] Y_NO_INLINE size_t OptimizedHashTailLoopInFunc(TCaseInsensitiveStringBuf str) noexcept { TMurmurHash2A hash; std::array buf; size_t headSize = str.size() - str.size() % buf.size(); for (size_t i = 0; i < headSize; i += buf.size()) { for (size_t j = 0; j < buf.size(); ++j) { buf[j] = std::tolower(str[i + j]); } hash.Update(buf.data(), buf.size()); } return HashTail(hash, str.data() + headSize, str.size() - headSize); } Y_FORCE_INLINE size_t DefaultHash(TCaseInsensitiveStringBuf str) { return ComputeHash(str); } Y_FORCE_INLINE size_t DefaultHashAscii(TCaseInsensitiveAsciiStringBuf str) { return ComputeHash(str); } } template void CaseInsensitiveHash(benchmark::State& state) { TLocaleGuard loc("C"); Y_ABORT_IF(loc.Error()); SetRandomSeed(123 + state.range()); TBasicString str; for (int i = 0; i < state.range(); ++i) { str.push_back(RandomNumber()); } Y_ENSURE(Impl(str) == NaiveHash(str), "Hashes differ: got " << Impl(str) << ", expected " << NaiveHash(str)); for (auto _ : state) { size_t hash = Impl(str); benchmark::DoNotOptimize(hash); } } template void CaseInsensitiveHashRandomSizes(benchmark::State& state) { TLocaleGuard loc("C"); Y_ABORT_IF(loc.Error()); SetRandomSeed(123); size_t minStrLen = static_cast(state.range(0)); size_t maxStrLen = static_cast(state.range(1)); static constexpr size_t nStrings = 64; TVector stringStorage(Reserve(nStrings)); std::array, nStrings> strings; for (size_t i = 0; i < nStrings; ++i) { auto& str = stringStorage.emplace_back(); size_t strLen = minStrLen + RandomNumber(maxStrLen - minStrLen + 1); for (size_t i = 0; i < strLen; ++i) { str.push_back(RandomNumber()); } strings[i] = str; } for (auto _ : state) { for (auto str : strings) { size_t hash = Impl(str); benchmark::DoNotOptimize(hash); } } } #define BENCH_ARGS ArgName("strlen")->DenseRange(0, sizeof(size_t) + 1)->RangeMultiplier(2)->Range(sizeof(size_t) * 2, 64) BENCHMARK(CaseInsensitiveHash)->BENCH_ARGS; BENCHMARK(CaseInsensitiveHash)->BENCH_ARGS; #ifdef BENCHMARK_ALL_IMPLS BENCHMARK(CaseInsensitiveHash)->BENCH_ARGS; BENCHMARK(CaseInsensitiveHash)->BENCH_ARGS; BENCHMARK(CaseInsensitiveHash)->BENCH_ARGS; BENCHMARK(CaseInsensitiveHash)->BENCH_ARGS; #endif BENCHMARK(CaseInsensitiveHash)->BENCH_ARGS; #undef BENCH_ARGS #define BENCH_ARGS \ ArgNames({"minStrLen", "maxStrLen"}) \ ->ArgPair(4, 16) /*Approximate http header lengths (real distribution may differ)*/ \ ->ArgPair(4, 11) /*1/2 short, 1/2 long to check possible branch mispredictions*/ \ ->ArgPair(1, 8) /*Mostly short strings to check performance regression*/ BENCHMARK(CaseInsensitiveHashRandomSizes)->BENCH_ARGS; BENCHMARK(CaseInsensitiveHashRandomSizes)->BENCH_ARGS; BENCHMARK(CaseInsensitiveHashRandomSizes)->BENCH_ARGS; #undef BENCH_ARGS