opaque_trie_iterator.h 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. #pragma once
  2. #include "comptrie_impl.h"
  3. #include "node.h"
  4. #include "key_selector.h"
  5. #include "leaf_skipper.h"
  6. #include <util/generic/vector.h>
  7. #include <util/generic/yexception.h>
  8. namespace NCompactTrie {
  9. class ILeafSkipper;
  10. class TFork { // Auxiliary class for a branching point in the iterator
  11. public:
  12. TNode Node;
  13. const char* Data;
  14. size_t Limit; // valid data is in range [Data + Node.GetOffset(), Data + Limit)
  15. TDirection CurrentDirection;
  16. public:
  17. TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper);
  18. bool operator==(const TFork& rhs) const;
  19. bool HasLabelInKey() const {
  20. return CurrentDirection == D_NEXT || CurrentDirection == D_FINAL;
  21. }
  22. bool NextDirection();
  23. bool PrevDirection();
  24. void LastDirection();
  25. bool HasDirection(TDirection direction) const {
  26. return Node.GetOffsetByDirection(direction);
  27. }
  28. // If the fork doesn't have the specified direction,
  29. // leaves the direction intact and returns false.
  30. // Otherwise returns true.
  31. bool SetDirection(TDirection direction);
  32. TFork NextFork(const ILeafSkipper& skipper) const;
  33. char GetLabel() const;
  34. size_t GetValueOffset() const;
  35. };
  36. inline TFork TFork::NextFork(const ILeafSkipper& skipper) const {
  37. Y_ASSERT(CurrentDirection != D_FINAL);
  38. size_t offset = Node.GetOffsetByDirection(CurrentDirection);
  39. return TFork(Data, offset, Limit, skipper);
  40. }
  41. //------------------------------------------------------------------------------------------------
  42. class TForkStack {
  43. public:
  44. void Push(const TFork& fork) {
  45. if (TopHasLabelInKey()) {
  46. Key.push_back(Top().GetLabel());
  47. }
  48. Forks.push_back(fork);
  49. }
  50. void Pop() {
  51. Forks.pop_back();
  52. if (TopHasLabelInKey()) {
  53. Key.pop_back();
  54. }
  55. }
  56. TFork& Top() {
  57. return Forks.back();
  58. }
  59. const TFork& Top() const {
  60. return Forks.back();
  61. }
  62. bool Empty() const {
  63. return Forks.empty();
  64. }
  65. void Clear() {
  66. Forks.clear();
  67. Key.clear();
  68. }
  69. bool operator==(const TForkStack& other) const {
  70. return Forks == other.Forks;
  71. }
  72. bool operator!=(const TForkStack& other) const {
  73. return !(*this == other);
  74. }
  75. TString GetKey() const;
  76. size_t MeasureKey() const;
  77. private:
  78. TVector<TFork> Forks;
  79. TString Key;
  80. private:
  81. bool TopHasLabelInKey() const {
  82. return !Empty() && Top().HasLabelInKey();
  83. }
  84. bool HasEmptyKey() const;
  85. };
  86. //------------------------------------------------------------------------------------------------
  87. template <class TSymbol>
  88. struct TConvertRawKey {
  89. typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey;
  90. static TKey Get(const TString& rawkey) {
  91. TKey result;
  92. const size_t sz = rawkey.size();
  93. result.reserve(sz / sizeof(TSymbol));
  94. for (size_t i = 0; i < sz; i += sizeof(TSymbol)) {
  95. TSymbol sym = 0;
  96. for (size_t j = 0; j < sizeof(TSymbol); j++) {
  97. if (sizeof(TSymbol) <= 1)
  98. sym = 0;
  99. else
  100. sym <<= 8;
  101. if (i + j < sz)
  102. sym |= TSymbol(0x00FF & rawkey[i + j]);
  103. }
  104. result.push_back(sym);
  105. }
  106. return result;
  107. }
  108. static size_t Size(size_t rawsize) {
  109. return rawsize / sizeof(TSymbol);
  110. }
  111. };
  112. template <>
  113. struct TConvertRawKey<char> {
  114. static TString Get(const TString& rawkey) {
  115. return rawkey;
  116. }
  117. static size_t Size(size_t rawsize) {
  118. return rawsize;
  119. }
  120. };
  121. //------------------------------------------------------------------------------------------------
  122. class TOpaqueTrieIterator { // Iterator stuff. Stores a stack of visited forks.
  123. public:
  124. TOpaqueTrieIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend,
  125. size_t maxKeyLength = size_t(-1));
  126. bool operator==(const TOpaqueTrieIterator& rhs) const;
  127. bool operator!=(const TOpaqueTrieIterator& rhs) const {
  128. return !(*this == rhs);
  129. }
  130. bool Forward();
  131. bool Backward();
  132. template <class TSymbol>
  133. bool UpperBound(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key); // True if matched exactly.
  134. template <class TSymbol>
  135. typename TCompactTrieKeySelector<TSymbol>::TKey GetKey() const {
  136. return TConvertRawKey<TSymbol>::Get(GetNarrowKey());
  137. }
  138. template <class TSymbol>
  139. size_t MeasureKey() const {
  140. return TConvertRawKey<TSymbol>::Size(MeasureNarrowKey());
  141. }
  142. TString GetNarrowKey() const {
  143. return Forks.GetKey();
  144. }
  145. size_t MeasureNarrowKey() const {
  146. return Forks.MeasureKey();
  147. }
  148. const char* GetValuePtr() const; // 0 if none
  149. const TNode& GetNode() const { // Could be called for non-empty key and not AtEnd.
  150. return Forks.Top().Node;
  151. }
  152. const TOpaqueTrie& GetTrie() const {
  153. return Trie;
  154. }
  155. private:
  156. TOpaqueTrie Trie;
  157. TForkStack Forks;
  158. const char* const EmptyValue;
  159. bool AtEmptyValue;
  160. const size_t MaxKeyLength;
  161. private:
  162. bool HasMaxKeyLength() const;
  163. template <class TSymbol>
  164. int LongestPrefix(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key); // Used in UpperBound.
  165. };
  166. template <class TSymbol>
  167. int TOpaqueTrieIterator::LongestPrefix(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key) {
  168. Forks.Clear();
  169. TFork next(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
  170. for (size_t i = 0; i < key.size(); i++) {
  171. TSymbol symbol = key[i];
  172. const bool isLastSymbol = (i + 1 == key.size());
  173. for (i64 shift = (i64)NCompactTrie::ExtraBits<TSymbol>(); shift >= 0; shift -= 8) {
  174. const unsigned char label = (unsigned char)(symbol >> shift);
  175. const bool isLastByte = (isLastSymbol && shift == 0);
  176. do {
  177. Forks.Push(next);
  178. TFork& top = Forks.Top();
  179. if (label < (unsigned char)top.GetLabel()) {
  180. if (!top.SetDirection(D_LEFT))
  181. return 1;
  182. } else if (label > (unsigned char)top.GetLabel()) {
  183. if (!top.SetDirection(D_RIGHT)) {
  184. Forks.Pop(); // We don't pass this fork on the way to the upper bound.
  185. return -1;
  186. }
  187. } else if (isLastByte) { // Here and below label == top.GetLabel().
  188. if (top.SetDirection(D_FINAL)) {
  189. return 0; // Skip the NextFork() call at the end of the cycle.
  190. } else {
  191. top.SetDirection(D_NEXT);
  192. return 1;
  193. }
  194. } else if (!top.SetDirection(D_NEXT)) {
  195. top.SetDirection(D_FINAL);
  196. return -1;
  197. }
  198. next = top.NextFork(Trie.SkipFunction);
  199. } while (Forks.Top().CurrentDirection != D_NEXT); // Proceed to the next byte.
  200. }
  201. }
  202. // We get here only if the key was empty.
  203. Forks.Push(next);
  204. return 1;
  205. }
  206. template <class TSymbol>
  207. bool TOpaqueTrieIterator::UpperBound(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key) {
  208. Forks.Clear();
  209. if (key.empty() && EmptyValue) {
  210. AtEmptyValue = true;
  211. return true;
  212. } else {
  213. AtEmptyValue = false;
  214. }
  215. const int defect = LongestPrefix<TSymbol>(key);
  216. if (defect > 0) {
  217. // Continue the constructed forks with the smallest key possible.
  218. while (Forks.Top().CurrentDirection != D_FINAL) {
  219. TFork next = Forks.Top().NextFork(Trie.SkipFunction);
  220. Forks.Push(next);
  221. }
  222. } else if (defect < 0) {
  223. Forward();
  224. }
  225. return defect == 0;
  226. }
  227. }