bytestrieiterator.cpp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2010-2012, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: bytestrieiterator.cpp
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2010nov03
  14. * created by: Markus W. Scherer
  15. */
  16. #include "unicode/utypes.h"
  17. #include "unicode/bytestrie.h"
  18. #include "unicode/stringpiece.h"
  19. #include "charstr.h"
  20. #include "uvectr32.h"
  21. U_NAMESPACE_BEGIN
  22. BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
  23. UErrorCode &errorCode)
  24. : bytes_(static_cast<const uint8_t *>(trieBytes)),
  25. pos_(bytes_), initialPos_(bytes_),
  26. remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
  27. str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
  28. if(U_FAILURE(errorCode)) {
  29. return;
  30. }
  31. // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
  32. // a public API header for which we would want it to depend only on
  33. // other public headers.
  34. // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
  35. // via the CharString and UVector32 implementations, so this additional
  36. // cost is minimal.
  37. str_=new CharString();
  38. stack_=new UVector32(errorCode);
  39. if(U_SUCCESS(errorCode) && (str_==nullptr || stack_==nullptr)) {
  40. errorCode=U_MEMORY_ALLOCATION_ERROR;
  41. }
  42. }
  43. BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
  44. UErrorCode &errorCode)
  45. : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
  46. remainingMatchLength_(trie.remainingMatchLength_),
  47. initialRemainingMatchLength_(trie.remainingMatchLength_),
  48. str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
  49. if(U_FAILURE(errorCode)) {
  50. return;
  51. }
  52. str_=new CharString();
  53. stack_=new UVector32(errorCode);
  54. if(U_FAILURE(errorCode)) {
  55. return;
  56. }
  57. if(str_==nullptr || stack_==nullptr) {
  58. errorCode=U_MEMORY_ALLOCATION_ERROR;
  59. return;
  60. }
  61. int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
  62. if(length>=0) {
  63. // Pending linear-match node, append remaining bytes to str_.
  64. ++length;
  65. if(maxLength_>0 && length>maxLength_) {
  66. length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
  67. }
  68. str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
  69. pos_+=length;
  70. remainingMatchLength_-=length;
  71. }
  72. }
  73. BytesTrie::Iterator::~Iterator() {
  74. delete str_;
  75. delete stack_;
  76. }
  77. BytesTrie::Iterator &
  78. BytesTrie::Iterator::reset() {
  79. pos_=initialPos_;
  80. remainingMatchLength_=initialRemainingMatchLength_;
  81. int32_t length=remainingMatchLength_+1; // Remaining match length.
  82. if(maxLength_>0 && length>maxLength_) {
  83. length=maxLength_;
  84. }
  85. str_->truncate(length);
  86. pos_+=length;
  87. remainingMatchLength_-=length;
  88. stack_->setSize(0);
  89. return *this;
  90. }
  91. UBool
  92. BytesTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
  93. UBool
  94. BytesTrie::Iterator::next(UErrorCode &errorCode) {
  95. if(U_FAILURE(errorCode)) {
  96. return false;
  97. }
  98. const uint8_t *pos=pos_;
  99. if(pos==nullptr) {
  100. if(stack_->isEmpty()) {
  101. return false;
  102. }
  103. // Pop the state off the stack and continue with the next outbound edge of
  104. // the branch node.
  105. int32_t stackSize=stack_->size();
  106. int32_t length=stack_->elementAti(stackSize-1);
  107. pos=bytes_+stack_->elementAti(stackSize-2);
  108. stack_->setSize(stackSize-2);
  109. str_->truncate(length&0xffff);
  110. length=(int32_t)((uint32_t)length>>16);
  111. if(length>1) {
  112. pos=branchNext(pos, length, errorCode);
  113. if(pos==nullptr) {
  114. return true; // Reached a final value.
  115. }
  116. } else {
  117. str_->append((char)*pos++, errorCode);
  118. }
  119. }
  120. if(remainingMatchLength_>=0) {
  121. // We only get here if we started in a pending linear-match node
  122. // with more than maxLength remaining bytes.
  123. return truncateAndStop();
  124. }
  125. for(;;) {
  126. int32_t node=*pos++;
  127. if(node>=kMinValueLead) {
  128. // Deliver value for the byte sequence so far.
  129. UBool isFinal=(UBool)(node&kValueIsFinal);
  130. value_=readValue(pos, node>>1);
  131. if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
  132. pos_=nullptr;
  133. } else {
  134. pos_=skipValue(pos, node);
  135. }
  136. return true;
  137. }
  138. if(maxLength_>0 && str_->length()==maxLength_) {
  139. return truncateAndStop();
  140. }
  141. if(node<kMinLinearMatch) {
  142. if(node==0) {
  143. node=*pos++;
  144. }
  145. pos=branchNext(pos, node+1, errorCode);
  146. if(pos==nullptr) {
  147. return true; // Reached a final value.
  148. }
  149. } else {
  150. // Linear-match node, append length bytes to str_.
  151. int32_t length=node-kMinLinearMatch+1;
  152. if(maxLength_>0 && str_->length()+length>maxLength_) {
  153. str_->append(reinterpret_cast<const char *>(pos),
  154. maxLength_-str_->length(), errorCode);
  155. return truncateAndStop();
  156. }
  157. str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
  158. pos+=length;
  159. }
  160. }
  161. }
  162. StringPiece
  163. BytesTrie::Iterator::getString() const {
  164. return str_ == nullptr ? StringPiece() : str_->toStringPiece();
  165. }
  166. UBool
  167. BytesTrie::Iterator::truncateAndStop() {
  168. pos_=nullptr;
  169. value_=-1; // no real value for str
  170. return true;
  171. }
  172. // Branch node, needs to take the first outbound edge and push state for the rest.
  173. const uint8_t *
  174. BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
  175. while(length>kMaxBranchLinearSubNodeLength) {
  176. ++pos; // ignore the comparison byte
  177. // Push state for the greater-or-equal edge.
  178. stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
  179. stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
  180. // Follow the less-than edge.
  181. length>>=1;
  182. pos=jumpByDelta(pos);
  183. }
  184. // List of key-value pairs where values are either final values or jump deltas.
  185. // Read the first (key, value) pair.
  186. uint8_t trieByte=*pos++;
  187. int32_t node=*pos++;
  188. UBool isFinal=(UBool)(node&kValueIsFinal);
  189. int32_t value=readValue(pos, node>>1);
  190. pos=skipValue(pos, node);
  191. stack_->addElement((int32_t)(pos-bytes_), errorCode);
  192. stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
  193. str_->append((char)trieByte, errorCode);
  194. if(isFinal) {
  195. pos_=nullptr;
  196. value_=value;
  197. return nullptr;
  198. } else {
  199. return pos+value;
  200. }
  201. }
  202. U_NAMESPACE_END