hyperloglog_ut.cpp 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. #include "hyperloglog.h"
  2. #include <util/generic/buffer.h>
  3. #include <util/random/mersenne.h>
  4. #include <util/stream/buffer.h>
  5. #include <library/cpp/testing/unittest/registar.h>
  6. #include <cmath>
  7. Y_UNIT_TEST_SUITE(THyperLogLog) {
  8. Y_UNIT_TEST(TestPrecision18) {
  9. TMersenne<ui64> rand;
  10. auto counter = THyperLogLog::Create(18);
  11. static const std::pair<ui64, ui64> POINTS[] = {
  12. {10, 10},
  13. {100, 100},
  14. {1000, 998},
  15. {10000, 9978},
  16. {100000, 99995},
  17. {1000000, 997017},
  18. {10000000, 9983891},
  19. {100000000, 100315572},
  20. {1000000000, 998791445},
  21. //1:37: {10000000000, 10015943904}
  22. };
  23. ui64 unique = 0;
  24. for (const auto& pnt : POINTS) {
  25. while (unique < pnt.first) {
  26. const auto val = rand();
  27. counter.Update(val);
  28. ++unique;
  29. }
  30. const auto estimation = counter.Estimate();
  31. const auto delta = i64(estimation) - i64(unique);
  32. const auto error = double(delta) / unique;
  33. UNIT_ASSERT(std::abs(error) < 0.0032);
  34. UNIT_ASSERT_EQUAL(estimation, pnt.second);
  35. }
  36. {
  37. auto counter2 = THyperLogLog::Create(18);
  38. while (unique < 2000000000) {
  39. const auto val = rand();
  40. counter2.Update(val);
  41. ++unique;
  42. }
  43. const auto estimation = counter2.Estimate();
  44. UNIT_ASSERT_EQUAL(estimation, 1000013484);
  45. counter.Merge(counter2);
  46. UNIT_ASSERT_EQUAL(counter.Estimate(), 1998488794);
  47. }
  48. {
  49. TBufferStream stream;
  50. counter.Save(stream);
  51. UNIT_ASSERT_EQUAL(stream.Buffer().Size(), 1 + (1 << 18));
  52. stream.Rewind();
  53. const auto copy = THyperLogLog::Load(stream);
  54. UNIT_ASSERT_EQUAL(counter.Estimate(), copy.Estimate());
  55. }
  56. }
  57. }