opaque_trie_iterator.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. #include "opaque_trie_iterator.h"
  2. #include "comptrie_impl.h"
  3. #include "node.h"
  4. namespace NCompactTrie {
  5. TOpaqueTrieIterator::TOpaqueTrieIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend,
  6. size_t maxKeyLength)
  7. : Trie(trie)
  8. , EmptyValue(emptyValue)
  9. , AtEmptyValue(emptyValue && !atend)
  10. , MaxKeyLength(maxKeyLength)
  11. {
  12. if (!AtEmptyValue && !atend)
  13. Forward();
  14. }
  15. bool TOpaqueTrieIterator::operator==(const TOpaqueTrieIterator& rhs) const {
  16. return (Trie == rhs.Trie &&
  17. Forks == rhs.Forks &&
  18. EmptyValue == rhs.EmptyValue &&
  19. AtEmptyValue == rhs.AtEmptyValue &&
  20. MaxKeyLength == rhs.MaxKeyLength);
  21. }
  22. bool TOpaqueTrieIterator::HasMaxKeyLength() const {
  23. return MaxKeyLength != size_t(-1) && MeasureNarrowKey() == MaxKeyLength;
  24. }
  25. bool TOpaqueTrieIterator::Forward() {
  26. if (AtEmptyValue) {
  27. AtEmptyValue = false;
  28. bool res = Forward(); // TODO delete this after format change
  29. if (res && MeasureNarrowKey() != 0) {
  30. return res; // there was not "\0" key
  31. }
  32. // otherwise we are skipping "\0" key
  33. }
  34. if (!Trie.Length)
  35. return false;
  36. if (Forks.Empty()) {
  37. TFork fork(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
  38. Forks.Push(fork);
  39. } else {
  40. TFork* topFork = &Forks.Top();
  41. while (!topFork->NextDirection()) {
  42. if (topFork->Node.GetOffset() >= Trie.Length)
  43. return false;
  44. Forks.Pop();
  45. if (Forks.Empty())
  46. return false;
  47. topFork = &Forks.Top();
  48. }
  49. }
  50. Y_ASSERT(!Forks.Empty());
  51. while (Forks.Top().CurrentDirection != D_FINAL && !HasMaxKeyLength()) {
  52. TFork nextFork = Forks.Top().NextFork(Trie.SkipFunction);
  53. Forks.Push(nextFork);
  54. }
  55. TFork& top = Forks.Top();
  56. static_assert(D_FINAL < D_NEXT, "relative order of NEXT and FINAL directions has changed");
  57. if (HasMaxKeyLength() && top.CurrentDirection == D_FINAL && top.HasDirection(D_NEXT)) {
  58. top.NextDirection();
  59. }
  60. return true;
  61. }
  62. bool TOpaqueTrieIterator::Backward() {
  63. if (AtEmptyValue)
  64. return false;
  65. if (!Trie.Length) {
  66. if (EmptyValue) {
  67. // A trie that has only the empty value;
  68. // we are not at the empty value, so move to it.
  69. AtEmptyValue = true;
  70. return true;
  71. } else {
  72. // Empty trie.
  73. return false;
  74. }
  75. }
  76. if (Forks.Empty()) {
  77. TFork fork(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
  78. fork.LastDirection();
  79. Forks.Push(fork);
  80. } else {
  81. TFork* topFork = &Forks.Top();
  82. while (!topFork->PrevDirection()) {
  83. if (topFork->Node.GetOffset() >= Trie.Length)
  84. return false;
  85. Forks.Pop();
  86. if (!Forks.Empty()) {
  87. topFork = &Forks.Top();
  88. } else {
  89. // When there are no more forks,
  90. // we have to iterate over the empty value.
  91. if (!EmptyValue)
  92. return false;
  93. AtEmptyValue = true;
  94. return true;
  95. }
  96. }
  97. }
  98. Y_ASSERT(!Forks.Empty());
  99. while (Forks.Top().CurrentDirection != D_FINAL && !HasMaxKeyLength()) {
  100. TFork nextFork = Forks.Top().NextFork(Trie.SkipFunction);
  101. nextFork.LastDirection();
  102. Forks.Push(nextFork);
  103. }
  104. TFork& top = Forks.Top();
  105. static_assert(D_FINAL < D_NEXT, "relative order of NEXT and FINAL directions has changed");
  106. if (HasMaxKeyLength() && top.CurrentDirection == D_NEXT && top.HasDirection(D_FINAL)) {
  107. top.PrevDirection();
  108. }
  109. if (MeasureNarrowKey() == 0) {
  110. // This is the '\0' key, skip it and get to the EmptyValue.
  111. AtEmptyValue = true;
  112. Forks.Clear();
  113. }
  114. return true;
  115. }
  116. const char* TOpaqueTrieIterator::GetValuePtr() const {
  117. if (!Forks.Empty()) {
  118. const TFork& lastFork = Forks.Top();
  119. Y_ASSERT(lastFork.Node.IsFinal() && lastFork.CurrentDirection == D_FINAL);
  120. return Trie.Data + lastFork.GetValueOffset();
  121. }
  122. Y_ASSERT(AtEmptyValue);
  123. return EmptyValue;
  124. }
  125. //-------------------------------------------------------------------------
  126. TString TForkStack::GetKey() const {
  127. if (HasEmptyKey()) {
  128. return TString();
  129. }
  130. TString result(Key);
  131. if (TopHasLabelInKey()) {
  132. result.append(Top().GetLabel());
  133. }
  134. return result;
  135. }
  136. bool TForkStack::HasEmptyKey() const {
  137. // Special case: if we get a single zero label, treat it as an empty key
  138. // TODO delete this after format change
  139. if (TopHasLabelInKey()) {
  140. return Key.size() == 0 && Top().GetLabel() == '\0';
  141. } else {
  142. return Key.size() == 1 && Key[0] == '\0';
  143. }
  144. }
  145. size_t TForkStack::MeasureKey() const {
  146. size_t result = Key.size() + (TopHasLabelInKey() ? 1 : 0);
  147. if (result == 1 && HasEmptyKey()) {
  148. return 0;
  149. }
  150. return result;
  151. }
  152. //-------------------------------------------------------------------------
  153. TFork::TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper)
  154. : Node(data, offset, skipper)
  155. , Data(data)
  156. , Limit(limit)
  157. , CurrentDirection(TDirection(0))
  158. {
  159. #if COMPTRIE_DATA_CHECK
  160. if (Node.GetOffset() >= Limit - 1)
  161. ythrow yexception() << "gone beyond the limit, data is corrupted";
  162. #endif
  163. while (CurrentDirection < D_MAX && !HasDirection(CurrentDirection)) {
  164. ++CurrentDirection;
  165. }
  166. }
  167. bool TFork::operator==(const TFork& rhs) const {
  168. return (Data == rhs.Data &&
  169. Node.GetOffset() == rhs.Node.GetOffset() &&
  170. CurrentDirection == rhs.CurrentDirection);
  171. }
  172. inline bool TFork::NextDirection() {
  173. do {
  174. ++CurrentDirection;
  175. } while (CurrentDirection < D_MAX && !HasDirection(CurrentDirection));
  176. return CurrentDirection < D_MAX;
  177. }
  178. inline bool TFork::PrevDirection() {
  179. if (CurrentDirection == TDirection(0)) {
  180. return false;
  181. }
  182. do {
  183. --CurrentDirection;
  184. } while (CurrentDirection > 0 && !HasDirection(CurrentDirection));
  185. return HasDirection(CurrentDirection);
  186. }
  187. void TFork::LastDirection() {
  188. CurrentDirection = D_MAX;
  189. PrevDirection();
  190. }
  191. bool TFork::SetDirection(TDirection direction) {
  192. if (!HasDirection(direction)) {
  193. return false;
  194. }
  195. CurrentDirection = direction;
  196. return true;
  197. }
  198. char TFork::GetLabel() const {
  199. return Node.GetLabel();
  200. }
  201. size_t TFork::GetValueOffset() const {
  202. return Node.GetLeafOffset();
  203. }
  204. }