bloom_test.cc 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "gtest/gtest.h"
  5. #include "leveldb/filter_policy.h"
  6. #include "util/coding.h"
  7. #include "util/logging.h"
  8. #include "util/testutil.h"
  9. namespace leveldb {
  10. static const int kVerbose = 1;
  11. static Slice Key(int i, char* buffer) {
  12. EncodeFixed32(buffer, i);
  13. return Slice(buffer, sizeof(uint32_t));
  14. }
  15. class BloomTest : public testing::Test {
  16. public:
  17. BloomTest() : policy_(NewBloomFilterPolicy(10)) {}
  18. ~BloomTest() { delete policy_; }
  19. void Reset() {
  20. keys_.clear();
  21. filter_.clear();
  22. }
  23. void Add(const Slice& s) { keys_.push_back(s.ToString()); }
  24. void Build() {
  25. std::vector<Slice> key_slices;
  26. for (size_t i = 0; i < keys_.size(); i++) {
  27. key_slices.push_back(Slice(keys_[i]));
  28. }
  29. filter_.clear();
  30. policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
  31. &filter_);
  32. keys_.clear();
  33. if (kVerbose >= 2) DumpFilter();
  34. }
  35. size_t FilterSize() const { return filter_.size(); }
  36. void DumpFilter() {
  37. std::fprintf(stderr, "F(");
  38. for (size_t i = 0; i + 1 < filter_.size(); i++) {
  39. const unsigned int c = static_cast<unsigned int>(filter_[i]);
  40. for (int j = 0; j < 8; j++) {
  41. std::fprintf(stderr, "%c", (c & (1 << j)) ? '1' : '.');
  42. }
  43. }
  44. std::fprintf(stderr, ")\n");
  45. }
  46. bool Matches(const Slice& s) {
  47. if (!keys_.empty()) {
  48. Build();
  49. }
  50. return policy_->KeyMayMatch(s, filter_);
  51. }
  52. double FalsePositiveRate() {
  53. char buffer[sizeof(int)];
  54. int result = 0;
  55. for (int i = 0; i < 10000; i++) {
  56. if (Matches(Key(i + 1000000000, buffer))) {
  57. result++;
  58. }
  59. }
  60. return result / 10000.0;
  61. }
  62. private:
  63. const FilterPolicy* policy_;
  64. std::string filter_;
  65. std::vector<std::string> keys_;
  66. };
  67. TEST_F(BloomTest, EmptyFilter) {
  68. ASSERT_TRUE(!Matches("hello"));
  69. ASSERT_TRUE(!Matches("world"));
  70. }
  71. TEST_F(BloomTest, Small) {
  72. Add("hello");
  73. Add("world");
  74. ASSERT_TRUE(Matches("hello"));
  75. ASSERT_TRUE(Matches("world"));
  76. ASSERT_TRUE(!Matches("x"));
  77. ASSERT_TRUE(!Matches("foo"));
  78. }
  79. static int NextLength(int length) {
  80. if (length < 10) {
  81. length += 1;
  82. } else if (length < 100) {
  83. length += 10;
  84. } else if (length < 1000) {
  85. length += 100;
  86. } else {
  87. length += 1000;
  88. }
  89. return length;
  90. }
  91. TEST_F(BloomTest, VaryingLengths) {
  92. char buffer[sizeof(int)];
  93. // Count number of filters that significantly exceed the false positive rate
  94. int mediocre_filters = 0;
  95. int good_filters = 0;
  96. for (int length = 1; length <= 10000; length = NextLength(length)) {
  97. Reset();
  98. for (int i = 0; i < length; i++) {
  99. Add(Key(i, buffer));
  100. }
  101. Build();
  102. ASSERT_LE(FilterSize(), static_cast<size_t>((length * 10 / 8) + 40))
  103. << length;
  104. // All added keys must match
  105. for (int i = 0; i < length; i++) {
  106. ASSERT_TRUE(Matches(Key(i, buffer)))
  107. << "Length " << length << "; key " << i;
  108. }
  109. // Check false positive rate
  110. double rate = FalsePositiveRate();
  111. if (kVerbose >= 1) {
  112. std::fprintf(stderr,
  113. "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
  114. rate * 100.0, length, static_cast<int>(FilterSize()));
  115. }
  116. ASSERT_LE(rate, 0.02); // Must not be over 2%
  117. if (rate > 0.0125)
  118. mediocre_filters++; // Allowed, but not too often
  119. else
  120. good_filters++;
  121. }
  122. if (kVerbose >= 1) {
  123. std::fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
  124. mediocre_filters);
  125. }
  126. ASSERT_LE(mediocre_filters, good_filters / 5);
  127. }
  128. // Different bits-per-byte
  129. } // namespace leveldb