stringtriebuilder.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2010-2012, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: stringtriebuilder.cpp
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2010dec24
  14. * created by: Markus W. Scherer
  15. */
  16. #include "utypeinfo.h" // for 'typeid' to work
  17. #include "unicode/utypes.h"
  18. #include "unicode/stringtriebuilder.h"
  19. #include "uassert.h"
  20. #include "uhash.h"
  21. U_CDECL_BEGIN
  22. static int32_t U_CALLCONV
  23. hashStringTrieNode(const UHashTok key) {
  24. return icu::StringTrieBuilder::hashNode(key.pointer);
  25. }
  26. static UBool U_CALLCONV
  27. equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
  28. return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
  29. }
  30. U_CDECL_END
  31. U_NAMESPACE_BEGIN
  32. StringTrieBuilder::StringTrieBuilder() : nodes(nullptr) {}
  33. StringTrieBuilder::~StringTrieBuilder() {
  34. deleteCompactBuilder();
  35. }
  36. void
  37. StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
  38. if(U_FAILURE(errorCode)) {
  39. return;
  40. }
  41. nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, nullptr,
  42. sizeGuess, &errorCode);
  43. if(U_SUCCESS(errorCode)) {
  44. if(nodes==nullptr) {
  45. errorCode=U_MEMORY_ALLOCATION_ERROR;
  46. } else {
  47. uhash_setKeyDeleter(nodes, uprv_deleteUObject);
  48. }
  49. }
  50. }
  51. void
  52. StringTrieBuilder::deleteCompactBuilder() {
  53. uhash_close(nodes);
  54. nodes=nullptr;
  55. }
  56. void
  57. StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
  58. UErrorCode &errorCode) {
  59. if(buildOption==USTRINGTRIE_BUILD_FAST) {
  60. writeNode(0, elementsLength, 0);
  61. } else /* USTRINGTRIE_BUILD_SMALL */ {
  62. createCompactBuilder(2*elementsLength, errorCode);
  63. Node *root=makeNode(0, elementsLength, 0, errorCode);
  64. if(U_SUCCESS(errorCode)) {
  65. root->markRightEdgesFirst(-1);
  66. root->write(*this);
  67. }
  68. deleteCompactBuilder();
  69. }
  70. }
  71. // Requires start<limit,
  72. // and all strings of the [start..limit[ elements must be sorted and
  73. // have a common prefix of length unitIndex.
  74. int32_t
  75. StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
  76. UBool hasValue=false;
  77. int32_t value=0;
  78. int32_t type;
  79. if(unitIndex==getElementStringLength(start)) {
  80. // An intermediate or final value.
  81. value=getElementValue(start++);
  82. if(start==limit) {
  83. return writeValueAndFinal(value, true); // final-value node
  84. }
  85. hasValue=true;
  86. }
  87. // Now all [start..limit[ strings are longer than unitIndex.
  88. int32_t minUnit=getElementUnit(start, unitIndex);
  89. int32_t maxUnit=getElementUnit(limit-1, unitIndex);
  90. if(minUnit==maxUnit) {
  91. // Linear-match node: All strings have the same character at unitIndex.
  92. int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
  93. writeNode(start, limit, lastUnitIndex);
  94. // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
  95. int32_t length=lastUnitIndex-unitIndex;
  96. int32_t maxLinearMatchLength=getMaxLinearMatchLength();
  97. while(length>maxLinearMatchLength) {
  98. lastUnitIndex-=maxLinearMatchLength;
  99. length-=maxLinearMatchLength;
  100. writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
  101. write(getMinLinearMatch()+maxLinearMatchLength-1);
  102. }
  103. writeElementUnits(start, unitIndex, length);
  104. type=getMinLinearMatch()+length-1;
  105. } else {
  106. // Branch node.
  107. int32_t length=countElementUnits(start, limit, unitIndex);
  108. // length>=2 because minUnit!=maxUnit.
  109. writeBranchSubNode(start, limit, unitIndex, length);
  110. if(--length<getMinLinearMatch()) {
  111. type=length;
  112. } else {
  113. write(length);
  114. type=0;
  115. }
  116. }
  117. return writeValueAndType(hasValue, value, type);
  118. }
  119. // start<limit && all strings longer than unitIndex &&
  120. // length different units at unitIndex
  121. int32_t
  122. StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
  123. char16_t middleUnits[kMaxSplitBranchLevels];
  124. int32_t lessThan[kMaxSplitBranchLevels];
  125. int32_t ltLength=0;
  126. while(length>getMaxBranchLinearSubNodeLength()) {
  127. // Branch on the middle unit.
  128. // First, find the middle unit.
  129. int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
  130. // Encode the less-than branch first.
  131. middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
  132. lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
  133. ++ltLength;
  134. // Continue for the greater-or-equal branch.
  135. start=i;
  136. length=length-length/2;
  137. }
  138. // For each unit, find its elements array start and whether it has a final value.
  139. int32_t starts[kMaxBranchLinearSubNodeLength];
  140. UBool isFinal[kMaxBranchLinearSubNodeLength-1];
  141. int32_t unitNumber=0;
  142. do {
  143. int32_t i=starts[unitNumber]=start;
  144. char16_t unit=getElementUnit(i++, unitIndex);
  145. i=indexOfElementWithNextUnit(i, unitIndex, unit);
  146. isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
  147. start=i;
  148. } while(++unitNumber<length-1);
  149. // unitNumber==length-1, and the maxUnit elements range is [start..limit[
  150. starts[unitNumber]=start;
  151. // Write the sub-nodes in reverse order: The jump lengths are deltas from
  152. // after their own positions, so if we wrote the minUnit sub-node first,
  153. // then its jump delta would be larger.
  154. // Instead we write the minUnit sub-node last, for a shorter delta.
  155. int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
  156. do {
  157. --unitNumber;
  158. if(!isFinal[unitNumber]) {
  159. jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
  160. }
  161. } while(unitNumber>0);
  162. // The maxUnit sub-node is written as the very last one because we do
  163. // not jump for it at all.
  164. unitNumber=length-1;
  165. writeNode(start, limit, unitIndex+1);
  166. int32_t offset=write(getElementUnit(start, unitIndex));
  167. // Write the rest of this node's unit-value pairs.
  168. while(--unitNumber>=0) {
  169. start=starts[unitNumber];
  170. int32_t value;
  171. if(isFinal[unitNumber]) {
  172. // Write the final value for the one string ending with this unit.
  173. value=getElementValue(start);
  174. } else {
  175. // Write the delta to the start position of the sub-node.
  176. value=offset-jumpTargets[unitNumber];
  177. }
  178. writeValueAndFinal(value, isFinal[unitNumber]);
  179. offset=write(getElementUnit(start, unitIndex));
  180. }
  181. // Write the split-branch nodes.
  182. while(ltLength>0) {
  183. --ltLength;
  184. writeDeltaTo(lessThan[ltLength]);
  185. offset=write(middleUnits[ltLength]);
  186. }
  187. return offset;
  188. }
  189. // Requires start<limit,
  190. // and all strings of the [start..limit[ elements must be sorted and
  191. // have a common prefix of length unitIndex.
  192. StringTrieBuilder::Node *
  193. StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
  194. if(U_FAILURE(errorCode)) {
  195. return nullptr;
  196. }
  197. UBool hasValue=false;
  198. int32_t value=0;
  199. if(unitIndex==getElementStringLength(start)) {
  200. // An intermediate or final value.
  201. value=getElementValue(start++);
  202. if(start==limit) {
  203. return registerFinalValue(value, errorCode);
  204. }
  205. hasValue=true;
  206. }
  207. Node *node;
  208. // Now all [start..limit[ strings are longer than unitIndex.
  209. int32_t minUnit=getElementUnit(start, unitIndex);
  210. int32_t maxUnit=getElementUnit(limit-1, unitIndex);
  211. if(minUnit==maxUnit) {
  212. // Linear-match node: All strings have the same character at unitIndex.
  213. int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
  214. Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
  215. // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
  216. int32_t length=lastUnitIndex-unitIndex;
  217. int32_t maxLinearMatchLength=getMaxLinearMatchLength();
  218. while(length>maxLinearMatchLength) {
  219. lastUnitIndex-=maxLinearMatchLength;
  220. length-=maxLinearMatchLength;
  221. node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
  222. nextNode=registerNode(node, errorCode);
  223. }
  224. node=createLinearMatchNode(start, unitIndex, length, nextNode);
  225. } else {
  226. // Branch node.
  227. int32_t length=countElementUnits(start, limit, unitIndex);
  228. // length>=2 because minUnit!=maxUnit.
  229. Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
  230. node=new BranchHeadNode(length, subNode);
  231. }
  232. if(hasValue && node!=nullptr) {
  233. if(matchNodesCanHaveValues()) {
  234. ((ValueNode *)node)->setValue(value);
  235. } else {
  236. node=new IntermediateValueNode(value, registerNode(node, errorCode));
  237. }
  238. }
  239. return registerNode(node, errorCode);
  240. }
  241. // start<limit && all strings longer than unitIndex &&
  242. // length different units at unitIndex
  243. StringTrieBuilder::Node *
  244. StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
  245. int32_t length, UErrorCode &errorCode) {
  246. if(U_FAILURE(errorCode)) {
  247. return nullptr;
  248. }
  249. char16_t middleUnits[kMaxSplitBranchLevels];
  250. Node *lessThan[kMaxSplitBranchLevels];
  251. int32_t ltLength=0;
  252. while(length>getMaxBranchLinearSubNodeLength()) {
  253. // Branch on the middle unit.
  254. // First, find the middle unit.
  255. int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
  256. // Create the less-than branch.
  257. middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
  258. lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
  259. ++ltLength;
  260. // Continue for the greater-or-equal branch.
  261. start=i;
  262. length=length-length/2;
  263. }
  264. if(U_FAILURE(errorCode)) {
  265. return nullptr;
  266. }
  267. ListBranchNode *listNode=new ListBranchNode();
  268. if(listNode==nullptr) {
  269. errorCode=U_MEMORY_ALLOCATION_ERROR;
  270. return nullptr;
  271. }
  272. // For each unit, find its elements array start and whether it has a final value.
  273. int32_t unitNumber=0;
  274. do {
  275. int32_t i=start;
  276. char16_t unit=getElementUnit(i++, unitIndex);
  277. i=indexOfElementWithNextUnit(i, unitIndex, unit);
  278. if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
  279. listNode->add(unit, getElementValue(start));
  280. } else {
  281. listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
  282. }
  283. start=i;
  284. } while(++unitNumber<length-1);
  285. // unitNumber==length-1, and the maxUnit elements range is [start..limit[
  286. char16_t unit=getElementUnit(start, unitIndex);
  287. if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
  288. listNode->add(unit, getElementValue(start));
  289. } else {
  290. listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
  291. }
  292. Node *node=registerNode(listNode, errorCode);
  293. // Create the split-branch nodes.
  294. while(ltLength>0) {
  295. --ltLength;
  296. node=registerNode(
  297. new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
  298. }
  299. return node;
  300. }
  301. StringTrieBuilder::Node *
  302. StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
  303. if(U_FAILURE(errorCode)) {
  304. delete newNode;
  305. return nullptr;
  306. }
  307. if(newNode==nullptr) {
  308. errorCode=U_MEMORY_ALLOCATION_ERROR;
  309. return nullptr;
  310. }
  311. const UHashElement *old=uhash_find(nodes, newNode);
  312. if(old!=nullptr) {
  313. delete newNode;
  314. return static_cast<Node*>(old->key.pointer);
  315. }
  316. // If uhash_puti() returns a non-zero value from an equivalent, previously
  317. // registered node, then uhash_find() failed to find that and we will leak newNode.
  318. #if U_DEBUG
  319. int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
  320. #endif
  321. uhash_puti(nodes, newNode, 1, &errorCode);
  322. U_ASSERT(oldValue==0);
  323. if(U_FAILURE(errorCode)) {
  324. delete newNode;
  325. return nullptr;
  326. }
  327. return newNode;
  328. }
  329. StringTrieBuilder::Node *
  330. StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
  331. if(U_FAILURE(errorCode)) {
  332. return nullptr;
  333. }
  334. FinalValueNode key(value);
  335. const UHashElement *old=uhash_find(nodes, &key);
  336. if(old!=nullptr) {
  337. return static_cast<Node*>(old->key.pointer);
  338. }
  339. Node *newNode=new FinalValueNode(value);
  340. if(newNode==nullptr) {
  341. errorCode=U_MEMORY_ALLOCATION_ERROR;
  342. return nullptr;
  343. }
  344. // If uhash_puti() returns a non-zero value from an equivalent, previously
  345. // registered node, then uhash_find() failed to find that and we will leak newNode.
  346. #if U_DEBUG
  347. int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
  348. #endif
  349. uhash_puti(nodes, newNode, 1, &errorCode);
  350. U_ASSERT(oldValue==0);
  351. if(U_FAILURE(errorCode)) {
  352. delete newNode;
  353. return nullptr;
  354. }
  355. return newNode;
  356. }
  357. int32_t
  358. StringTrieBuilder::hashNode(const void *node) {
  359. return static_cast<const Node*>(node)->hashCode();
  360. }
  361. UBool
  362. StringTrieBuilder::equalNodes(const void *left, const void *right) {
  363. return *static_cast<const Node*>(left) == *static_cast<const Node*>(right);
  364. }
  365. bool
  366. StringTrieBuilder::Node::operator==(const Node &other) const {
  367. return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
  368. }
  369. int32_t
  370. StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
  371. if(offset==0) {
  372. offset=edgeNumber;
  373. }
  374. return edgeNumber;
  375. }
  376. bool
  377. StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
  378. if(this==&other) {
  379. return true;
  380. }
  381. if(!Node::operator==(other)) {
  382. return false;
  383. }
  384. const FinalValueNode &o=static_cast<const FinalValueNode &>(other);
  385. return value==o.value;
  386. }
  387. void
  388. StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
  389. offset=builder.writeValueAndFinal(value, true);
  390. }
  391. bool
  392. StringTrieBuilder::ValueNode::operator==(const Node &other) const {
  393. if(this==&other) {
  394. return true;
  395. }
  396. if(!Node::operator==(other)) {
  397. return false;
  398. }
  399. const ValueNode &o=static_cast<const ValueNode &>(other);
  400. return hasValue==o.hasValue && (!hasValue || value==o.value);
  401. }
  402. bool
  403. StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
  404. if(this==&other) {
  405. return true;
  406. }
  407. if(!ValueNode::operator==(other)) {
  408. return false;
  409. }
  410. const IntermediateValueNode &o=static_cast<const IntermediateValueNode &>(other);
  411. return next==o.next;
  412. }
  413. int32_t
  414. StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
  415. if(offset==0) {
  416. offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
  417. }
  418. return edgeNumber;
  419. }
  420. void
  421. StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
  422. next->write(builder);
  423. offset=builder.writeValueAndFinal(value, false);
  424. }
  425. bool
  426. StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
  427. if(this==&other) {
  428. return true;
  429. }
  430. if(!ValueNode::operator==(other)) {
  431. return false;
  432. }
  433. const LinearMatchNode &o=static_cast<const LinearMatchNode &>(other);
  434. return length==o.length && next==o.next;
  435. }
  436. int32_t
  437. StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
  438. if(offset==0) {
  439. offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
  440. }
  441. return edgeNumber;
  442. }
  443. bool
  444. StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
  445. if(this==&other) {
  446. return true;
  447. }
  448. if(!Node::operator==(other)) {
  449. return false;
  450. }
  451. const ListBranchNode &o=static_cast<const ListBranchNode &>(other);
  452. for(int32_t i=0; i<length; ++i) {
  453. if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
  454. return false;
  455. }
  456. }
  457. return true;
  458. }
  459. int32_t
  460. StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
  461. if(offset==0) {
  462. firstEdgeNumber=edgeNumber;
  463. int32_t step=0;
  464. int32_t i=length;
  465. do {
  466. Node *edge=equal[--i];
  467. if(edge!=nullptr) {
  468. edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
  469. }
  470. // For all but the rightmost edge, decrement the edge number.
  471. step=1;
  472. } while(i>0);
  473. offset=edgeNumber;
  474. }
  475. return edgeNumber;
  476. }
  477. void
  478. StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
  479. // Write the sub-nodes in reverse order: The jump lengths are deltas from
  480. // after their own positions, so if we wrote the minUnit sub-node first,
  481. // then its jump delta would be larger.
  482. // Instead we write the minUnit sub-node last, for a shorter delta.
  483. int32_t unitNumber=length-1;
  484. Node *rightEdge=equal[unitNumber];
  485. int32_t rightEdgeNumber= rightEdge==nullptr ? firstEdgeNumber : rightEdge->getOffset();
  486. do {
  487. --unitNumber;
  488. if(equal[unitNumber]!=nullptr) {
  489. equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
  490. }
  491. } while(unitNumber>0);
  492. // The maxUnit sub-node is written as the very last one because we do
  493. // not jump for it at all.
  494. unitNumber=length-1;
  495. if(rightEdge==nullptr) {
  496. builder.writeValueAndFinal(values[unitNumber], true);
  497. } else {
  498. rightEdge->write(builder);
  499. }
  500. offset=builder.write(units[unitNumber]);
  501. // Write the rest of this node's unit-value pairs.
  502. while(--unitNumber>=0) {
  503. int32_t value;
  504. UBool isFinal;
  505. if(equal[unitNumber]==nullptr) {
  506. // Write the final value for the one string ending with this unit.
  507. value=values[unitNumber];
  508. isFinal=true;
  509. } else {
  510. // Write the delta to the start position of the sub-node.
  511. U_ASSERT(equal[unitNumber]->getOffset()>0);
  512. value=offset-equal[unitNumber]->getOffset();
  513. isFinal=false;
  514. }
  515. builder.writeValueAndFinal(value, isFinal);
  516. offset=builder.write(units[unitNumber]);
  517. }
  518. }
  519. bool
  520. StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
  521. if(this==&other) {
  522. return true;
  523. }
  524. if(!Node::operator==(other)) {
  525. return false;
  526. }
  527. const SplitBranchNode &o=static_cast<const SplitBranchNode &>(other);
  528. return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
  529. }
  530. int32_t
  531. StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
  532. if(offset==0) {
  533. firstEdgeNumber=edgeNumber;
  534. edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
  535. offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
  536. }
  537. return edgeNumber;
  538. }
  539. void
  540. StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
  541. // Encode the less-than branch first.
  542. lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
  543. // Encode the greater-or-equal branch last because we do not jump for it at all.
  544. greaterOrEqual->write(builder);
  545. // Write this node.
  546. U_ASSERT(lessThan->getOffset()>0);
  547. builder.writeDeltaTo(lessThan->getOffset()); // less-than
  548. offset=builder.write(unit);
  549. }
  550. bool
  551. StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
  552. if(this==&other) {
  553. return true;
  554. }
  555. if(!ValueNode::operator==(other)) {
  556. return false;
  557. }
  558. const BranchHeadNode &o=static_cast<const BranchHeadNode &>(other);
  559. return length==o.length && next==o.next;
  560. }
  561. int32_t
  562. StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
  563. if(offset==0) {
  564. offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
  565. }
  566. return edgeNumber;
  567. }
  568. void
  569. StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
  570. next->write(builder);
  571. if(length<=builder.getMinLinearMatch()) {
  572. offset=builder.writeValueAndType(hasValue, value, length-1);
  573. } else {
  574. builder.write(length-1);
  575. offset=builder.writeValueAndType(hasValue, value, 0);
  576. }
  577. }
  578. U_NAMESPACE_END