bloom.cc 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/filter_policy.h"
  5. #include "leveldb/slice.h"
  6. #include "util/hash.h"
  7. namespace leveldb {
  8. namespace {
  9. static uint32_t BloomHash(const Slice& key) {
  10. return Hash(key.data(), key.size(), 0xbc9f1d34);
  11. }
  12. class BloomFilterPolicy : public FilterPolicy {
  13. public:
  14. explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {
  15. // We intentionally round down to reduce probing cost a little bit
  16. k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
  17. if (k_ < 1) k_ = 1;
  18. if (k_ > 30) k_ = 30;
  19. }
  20. const char* Name() const override { return "leveldb.BuiltinBloomFilter2"; }
  21. void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
  22. // Compute bloom filter size (in both bits and bytes)
  23. size_t bits = n * bits_per_key_;
  24. // For small n, we can see a very high false positive rate. Fix it
  25. // by enforcing a minimum bloom filter length.
  26. if (bits < 64) bits = 64;
  27. size_t bytes = (bits + 7) / 8;
  28. bits = bytes * 8;
  29. const size_t init_size = dst->size();
  30. dst->resize(init_size + bytes, 0);
  31. dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
  32. char* array = &(*dst)[init_size];
  33. for (int i = 0; i < n; i++) {
  34. // Use double-hashing to generate a sequence of hash values.
  35. // See analysis in [Kirsch,Mitzenmacher 2006].
  36. uint32_t h = BloomHash(keys[i]);
  37. const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
  38. for (size_t j = 0; j < k_; j++) {
  39. const uint32_t bitpos = h % bits;
  40. array[bitpos / 8] |= (1 << (bitpos % 8));
  41. h += delta;
  42. }
  43. }
  44. }
  45. bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
  46. const size_t len = bloom_filter.size();
  47. if (len < 2) return false;
  48. const char* array = bloom_filter.data();
  49. const size_t bits = (len - 1) * 8;
  50. // Use the encoded k so that we can read filters generated by
  51. // bloom filters created using different parameters.
  52. const size_t k = array[len - 1];
  53. if (k > 30) {
  54. // Reserved for potentially new encodings for short bloom filters.
  55. // Consider it a match.
  56. return true;
  57. }
  58. uint32_t h = BloomHash(key);
  59. const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
  60. for (size_t j = 0; j < k; j++) {
  61. const uint32_t bitpos = h % bits;
  62. if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false;
  63. h += delta;
  64. }
  65. return true;
  66. }
  67. private:
  68. size_t bits_per_key_;
  69. size_t k_;
  70. };
  71. } // namespace
  72. const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
  73. return new BloomFilterPolicy(bits_per_key);
  74. }
  75. } // namespace leveldb