consistent_hashing.cpp 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. #include "consistent_hashing.h"
  2. #include <library/cpp/pop_count/popcount.h>
  3. #include <util/generic/bitops.h>
  4. /*
  5. * (all numbers are written in big-endian manner: the least significant digit on the right)
  6. * (only bit representations are used - no hex or octal, leading zeroes are ommited)
  7. *
  8. * Consistent hashing scheme:
  9. *
  10. * (sizeof(TValue) * 8, y] (y, 0]
  11. * a = * ablock
  12. * b = * cblock
  13. *
  14. * (sizeof(TValue) * 8, k] (k, 0]
  15. * c = * cblock
  16. *
  17. * d = *
  18. *
  19. * k - is determined by 2^(k-1) < n <= 2^k inequality
  20. * z - is number of ones in cblock
  21. * y - number of digits after first one in cblock
  22. *
  23. * The cblock determines logic of using a- and b- blocks:
  24. *
  25. * bits of cblock | result of a function
  26. * 0 : 0
  27. * 1 : 1 (optimization, the next case includes this one)
  28. * 1?..? : 1ablock (z is even) or 1bblock (z is odd) if possible (<n)
  29. *
  30. * If last case is not possible (>=n), than smooth moving from n=2^(k-1) to n=2^k is applied.
  31. * Using "*" bits of a-,b-,c-,d- blocks ui64 value is combined, modulo of which determines
  32. * if the value should be greather than 2^(k-1) or ConsistentHashing(x, 2^(k-1)) should be used.
  33. * The last case is optimized according to previous checks.
  34. */
  35. namespace {
  36. ui64 PowerOf2(size_t k) {
  37. return (ui64)0x1 << k;
  38. }
  39. template <class TValue>
  40. TValue SelectAOrBBlock(TValue a, TValue b, TValue cBlock) {
  41. size_t z = PopCount<unsigned long long>(cBlock);
  42. bool useABlock = z % 2 == 0;
  43. return useABlock ? a : b;
  44. }
  45. // Gets the exact result for n = k2 = 2 ^ k
  46. template <class TValue>
  47. size_t ConsistentHashingForPowersOf2(TValue a, TValue b, TValue c, ui64 k2) {
  48. TValue cBlock = c & (k2 - 1); // (k, 0] bits of c
  49. // Zero and one cases
  50. if (cBlock < 2) {
  51. // First two cases of result function table: 0 if cblock is 0, 1 if cblock is 1.
  52. return cBlock;
  53. }
  54. size_t y = GetValueBitCount<unsigned long long>(cBlock) - 1; // cblock = 0..01?..? (y = number of digits after 1), y > 0
  55. ui64 y2 = PowerOf2(y); // y2 = 2^y
  56. TValue abBlock = SelectAOrBBlock(a, b, cBlock) & (y2 - 1);
  57. return y2 + abBlock;
  58. }
  59. template <class TValue>
  60. ui64 GetAsteriskBits(TValue a, TValue b, TValue c, TValue d, size_t k) {
  61. size_t shift = sizeof(TValue) * 8 - k;
  62. ui64 res = (d << shift) | (c >> k);
  63. ++shift;
  64. res <<= shift;
  65. res |= b >> (k - 1);
  66. res <<= shift;
  67. res |= a >> (k - 1);
  68. return res;
  69. }
  70. template <class TValue>
  71. size_t ConsistentHashingImpl(TValue a, TValue b, TValue c, TValue d, size_t n) {
  72. Y_ABORT_UNLESS(n > 0, "Can't map consistently to a zero values.");
  73. // Uninteresting case
  74. if (n == 1) {
  75. return 0;
  76. }
  77. size_t k = GetValueBitCount(n - 1); // 2^(k-1) < n <= 2^k, k >= 1
  78. ui64 k2 = PowerOf2(k); // k2 = 2^k
  79. size_t largeValue;
  80. {
  81. // Bit determined variant. Large scheme.
  82. largeValue = ConsistentHashingForPowersOf2(a, b, c, k2);
  83. if (largeValue < n) {
  84. return largeValue;
  85. }
  86. }
  87. // Since largeValue is not assigned yet
  88. // Smooth moving from one bit scheme to another
  89. ui64 k21 = PowerOf2(k - 1);
  90. {
  91. size_t s = GetAsteriskBits(a, b, c, d, k) % (largeValue * (largeValue + 1));
  92. size_t largeValue2 = s / k2 + k21;
  93. if (largeValue2 < n) {
  94. return largeValue2;
  95. }
  96. }
  97. // Bit determined variant. Short scheme.
  98. return ConsistentHashingForPowersOf2(a, b, c, k21); // Do not apply checks. It is always less than k21 = 2^(k-1)
  99. }
  100. }
  101. size_t ConsistentHashing(ui64 x, size_t n) {
  102. ui32 lo = Lo32(x);
  103. ui32 hi = Hi32(x);
  104. return ConsistentHashingImpl<ui16>(Lo16(lo), Hi16(lo), Lo16(hi), Hi16(hi), n);
  105. }
  106. size_t ConsistentHashing(ui64 lo, ui64 hi, size_t n) {
  107. return ConsistentHashingImpl<ui32>(Lo32(lo), Hi32(lo), Lo32(hi), Hi32(hi), n);
  108. }