chunked_helpers_trie.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. #pragma once
  2. #include <library/cpp/on_disk/chunks/chunked_helpers.h>
  3. #include "comptrie.h"
  4. class TTrieSet {
  5. private:
  6. TCompactTrie<char> Trie;
  7. public:
  8. TTrieSet(const TBlob& blob)
  9. : Trie(blob)
  10. {
  11. }
  12. bool Has(const char* key) const {
  13. return Trie.Find(key, strlen(key));
  14. }
  15. bool FindLongestPrefix(const char* key, size_t keylen, size_t* prefixLen) {
  16. return Trie.FindLongestPrefix(key, keylen, prefixLen);
  17. }
  18. };
  19. template <bool sorted = false>
  20. class TTrieSetWriter {
  21. private:
  22. TCompactTrieBuilder<char> Builder;
  23. public:
  24. TTrieSetWriter(bool isSorted = sorted)
  25. : Builder(isSorted ? CTBF_PREFIX_GROUPED : CTBF_NONE)
  26. {
  27. }
  28. void Add(const char* key, size_t keylen) {
  29. Builder.Add(key, keylen, 0);
  30. assert(Has(((TString)key).substr(0, keylen).data()));
  31. }
  32. void Add(const char* key) {
  33. Add(key, strlen(key));
  34. }
  35. bool Has(const char* key) const {
  36. ui64 dummy;
  37. return Builder.Find(key, strlen(key), &dummy);
  38. }
  39. void Save(IOutputStream& out) const {
  40. Builder.Save(out);
  41. }
  42. void Clear() {
  43. Builder.Clear();
  44. }
  45. };
  46. template <bool isWriter, bool sorted = false>
  47. struct TTrieSetG;
  48. template <bool sorted>
  49. struct TTrieSetG<false, sorted> {
  50. typedef TTrieSet T;
  51. };
  52. template <bool sorted>
  53. struct TTrieSetG<true, sorted> {
  54. typedef TTrieSetWriter<sorted> T;
  55. };
  56. template <typename T>
  57. class TTrieMap {
  58. private:
  59. TCompactTrie<char> Trie;
  60. static_assert(sizeof(T) <= sizeof(ui64), "expect sizeof(T) <= sizeof(ui64)");
  61. public:
  62. TTrieMap(const TBlob& blob)
  63. : Trie(blob)
  64. {
  65. }
  66. bool Get(const char* key, T* value) const {
  67. ui64 trieValue;
  68. if (Trie.Find(key, strlen(key), &trieValue)) {
  69. *value = ReadUnaligned<T>(&trieValue);
  70. return true;
  71. } else {
  72. return false;
  73. }
  74. }
  75. T Get(const char* key, T def = T()) const {
  76. ui64 trieValue;
  77. if (Trie.Find(key, strlen(key), &trieValue)) {
  78. return ReadUnaligned<T>(&trieValue);
  79. } else {
  80. return def;
  81. }
  82. }
  83. const TCompactTrie<char>& GetTrie() const {
  84. return Trie;
  85. }
  86. };
  87. template <typename T, bool sorted = false>
  88. class TTrieMapWriter {
  89. private:
  90. typedef TCompactTrieBuilder<char> TBuilder;
  91. TBuilder Builder;
  92. static_assert(sizeof(T) <= sizeof(ui64), "expect sizeof(T) <= sizeof(ui64)");
  93. #ifndef NDEBUG
  94. bool IsSorted;
  95. #endif
  96. public:
  97. TTrieMapWriter(bool isSorted = sorted)
  98. : Builder(isSorted ? CTBF_PREFIX_GROUPED : CTBF_NONE)
  99. #ifndef NDEBUG
  100. , IsSorted(isSorted)
  101. #endif
  102. {
  103. }
  104. void Add(const char* key, const T& value) {
  105. ui64 intValue = 0;
  106. memcpy(&intValue, &value, sizeof(T));
  107. Builder.Add(key, strlen(key), intValue);
  108. #ifndef NDEBUG
  109. /*
  110. if (!IsSorted) {
  111. T test;
  112. assert(Get(key, &test) && value == test);
  113. }
  114. */
  115. #endif
  116. }
  117. void Add(const TString& s, const T& value) {
  118. ui64 intValue = 0;
  119. memcpy(&intValue, &value, sizeof(T));
  120. Builder.Add(s.data(), s.size(), intValue);
  121. }
  122. bool Get(const char* key, T* value) const {
  123. ui64 trieValue;
  124. if (Builder.Find(key, strlen(key), &trieValue)) {
  125. *value = ReadUnaligned<T>(&trieValue);
  126. return true;
  127. } else {
  128. return false;
  129. }
  130. }
  131. T Get(const char* key, T def = (T)0) const {
  132. ui64 trieValue;
  133. if (Builder.Find(key, strlen(key), &trieValue)) {
  134. return ReadUnaligned<T>(&trieValue);
  135. } else {
  136. return def;
  137. }
  138. }
  139. void Save(IOutputStream& out, bool minimize = false) const {
  140. if (minimize) {
  141. CompactTrieMinimize<TBuilder>(out, Builder, false);
  142. } else {
  143. Builder.Save(out);
  144. }
  145. }
  146. void Clear() {
  147. Builder.Clear();
  148. }
  149. };
  150. template <typename T>
  151. class TTrieSortedMapWriter {
  152. private:
  153. typedef std::pair<TString, T> TValue;
  154. typedef TVector<TValue> TValues;
  155. TValues Values;
  156. public:
  157. TTrieSortedMapWriter() = default;
  158. void Add(const char* key, const T& value) {
  159. Values.push_back(TValue(key, value));
  160. }
  161. void Save(IOutputStream& out) {
  162. Sort(Values.begin(), Values.end());
  163. TTrieMapWriter<T, true> writer;
  164. for (typename TValues::const_iterator toValue = Values.begin(); toValue != Values.end(); ++toValue)
  165. writer.Add(toValue->first.data(), toValue->second);
  166. writer.Save(out);
  167. }
  168. void Clear() {
  169. Values.clear();
  170. }
  171. };
  172. template <typename X, bool isWriter, bool sorted = false>
  173. struct TTrieMapG;
  174. template <typename X, bool sorted>
  175. struct TTrieMapG<X, false, sorted> {
  176. typedef TTrieMap<X> T;
  177. };
  178. template <typename X, bool sorted>
  179. struct TTrieMapG<X, true, sorted> {
  180. typedef TTrieMapWriter<X, sorted> T;
  181. };