rbbitblb.cpp 64 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (c) 2002-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. */
  9. //
  10. // rbbitblb.cpp
  11. //
  12. #include "unicode/utypes.h"
  13. #if !UCONFIG_NO_BREAK_ITERATION
  14. #include "unicode/unistr.h"
  15. #include "rbbitblb.h"
  16. #include "rbbirb.h"
  17. #include "rbbiscan.h"
  18. #include "rbbisetb.h"
  19. #include "rbbidata.h"
  20. #include "cstring.h"
  21. #include "uassert.h"
  22. #include "uvectr32.h"
  23. #include "cmemory.h"
  24. U_NAMESPACE_BEGIN
  25. const int32_t kMaxStateFor8BitsTable = 255;
  26. RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
  27. fRB(rb),
  28. fTree(*rootNode),
  29. fStatus(&status),
  30. fDStates(nullptr),
  31. fSafeTable(nullptr) {
  32. if (U_FAILURE(status)) {
  33. return;
  34. }
  35. // fDStates is UVector<RBBIStateDescriptor *>
  36. fDStates = new UVector(status);
  37. if (U_SUCCESS(status) && fDStates == nullptr ) {
  38. status = U_MEMORY_ALLOCATION_ERROR;
  39. }
  40. }
  41. RBBITableBuilder::~RBBITableBuilder() {
  42. int i;
  43. for (i=0; i<fDStates->size(); i++) {
  44. delete static_cast<RBBIStateDescriptor*>(fDStates->elementAt(i));
  45. }
  46. delete fDStates;
  47. delete fSafeTable;
  48. delete fLookAheadRuleMap;
  49. }
  50. //-----------------------------------------------------------------------------
  51. //
  52. // RBBITableBuilder::buildForwardTable - This is the main function for building
  53. // the DFA state transition table from the RBBI rules parse tree.
  54. //
  55. //-----------------------------------------------------------------------------
  56. void RBBITableBuilder::buildForwardTable() {
  57. if (U_FAILURE(*fStatus)) {
  58. return;
  59. }
  60. // If there were no rules, just return. This situation can easily arise
  61. // for the reverse rules.
  62. if (fTree==nullptr) {
  63. return;
  64. }
  65. //
  66. // Walk through the tree, replacing any references to $variables with a copy of the
  67. // parse tree for the substitution expression.
  68. //
  69. fTree = fTree->flattenVariables(*fStatus, 0);
  70. if (U_FAILURE(*fStatus)) {
  71. return;
  72. }
  73. #ifdef RBBI_DEBUG
  74. if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
  75. RBBIDebugPuts("\nParse tree after flattening variable references.");
  76. RBBINode::printTree(fTree, true);
  77. }
  78. #endif
  79. //
  80. // If the rules contained any references to {bof}
  81. // add a {bof} <cat> <former root of tree> to the
  82. // tree. Means that all matches must start out with the
  83. // {bof} fake character.
  84. //
  85. if (fRB->fSetBuilder->sawBOF()) {
  86. RBBINode *bofTop = new RBBINode(RBBINode::opCat);
  87. RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar);
  88. // Delete and exit if memory allocation failed.
  89. if (bofTop == nullptr || bofLeaf == nullptr) {
  90. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  91. delete bofTop;
  92. delete bofLeaf;
  93. return;
  94. }
  95. bofTop->fLeftChild = bofLeaf;
  96. bofTop->fRightChild = fTree;
  97. bofLeaf->fParent = bofTop;
  98. bofLeaf->fVal = 2; // Reserved value for {bof}.
  99. fTree = bofTop;
  100. }
  101. //
  102. // Add a unique right-end marker to the expression.
  103. // Appears as a cat-node, left child being the original tree,
  104. // right child being the end marker.
  105. //
  106. RBBINode *cn = new RBBINode(RBBINode::opCat);
  107. // Exit if memory allocation failed.
  108. if (cn == nullptr) {
  109. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  110. return;
  111. }
  112. cn->fLeftChild = fTree;
  113. fTree->fParent = cn;
  114. RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark);
  115. // Delete and exit if memory allocation failed.
  116. if (cn->fRightChild == nullptr) {
  117. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  118. delete cn;
  119. return;
  120. }
  121. cn->fRightChild->fParent = cn;
  122. fTree = cn;
  123. //
  124. // Replace all references to UnicodeSets with the tree for the equivalent
  125. // expression.
  126. //
  127. fTree->flattenSets();
  128. #ifdef RBBI_DEBUG
  129. if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
  130. RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");
  131. RBBINode::printTree(fTree, true);
  132. }
  133. #endif
  134. //
  135. // calculate the functions nullable, firstpos, lastpos and followpos on
  136. // nodes in the parse tree.
  137. // See the algorithm description in Aho.
  138. // Understanding how this works by looking at the code alone will be
  139. // nearly impossible.
  140. //
  141. calcNullable(fTree);
  142. calcFirstPos(fTree);
  143. calcLastPos(fTree);
  144. calcFollowPos(fTree);
  145. if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
  146. RBBIDebugPuts("\n");
  147. printPosSets(fTree);
  148. }
  149. //
  150. // For "chained" rules, modify the followPos sets
  151. //
  152. if (fRB->fChainRules) {
  153. calcChainedFollowPos(fTree, endMarkerNode);
  154. }
  155. //
  156. // BOF (start of input) test fixup.
  157. //
  158. if (fRB->fSetBuilder->sawBOF()) {
  159. bofFixup();
  160. }
  161. //
  162. // Build the DFA state transition tables.
  163. //
  164. buildStateTable();
  165. mapLookAheadRules();
  166. flagAcceptingStates();
  167. flagLookAheadStates();
  168. flagTaggedStates();
  169. //
  170. // Update the global table of rule status {tag} values
  171. // The rule builder has a global vector of status values that are common
  172. // for all tables. Merge the ones from this table into the global set.
  173. //
  174. mergeRuleStatusVals();
  175. }
  176. //-----------------------------------------------------------------------------
  177. //
  178. // calcNullable. Impossible to explain succinctly. See Aho, section 3.9
  179. //
  180. //-----------------------------------------------------------------------------
  181. void RBBITableBuilder::calcNullable(RBBINode *n) {
  182. if (n == nullptr) {
  183. return;
  184. }
  185. if (n->fType == RBBINode::setRef ||
  186. n->fType == RBBINode::endMark ) {
  187. // These are non-empty leaf node types.
  188. n->fNullable = false;
  189. return;
  190. }
  191. if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) {
  192. // Lookahead marker node. It's a leaf, so no recursion on children.
  193. // It's nullable because it does not match any literal text from the input stream.
  194. n->fNullable = true;
  195. return;
  196. }
  197. // The node is not a leaf.
  198. // Calculate nullable on its children.
  199. calcNullable(n->fLeftChild);
  200. calcNullable(n->fRightChild);
  201. // Apply functions from table 3.40 in Aho
  202. if (n->fType == RBBINode::opOr) {
  203. n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable;
  204. }
  205. else if (n->fType == RBBINode::opCat) {
  206. n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable;
  207. }
  208. else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) {
  209. n->fNullable = true;
  210. }
  211. else {
  212. n->fNullable = false;
  213. }
  214. }
  215. //-----------------------------------------------------------------------------
  216. //
  217. // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9
  218. //
  219. //-----------------------------------------------------------------------------
  220. void RBBITableBuilder::calcFirstPos(RBBINode *n) {
  221. if (n == nullptr) {
  222. return;
  223. }
  224. if (n->fType == RBBINode::leafChar ||
  225. n->fType == RBBINode::endMark ||
  226. n->fType == RBBINode::lookAhead ||
  227. n->fType == RBBINode::tag) {
  228. // These are non-empty leaf node types.
  229. // Note: In order to maintain the sort invariant on the set,
  230. // this function should only be called on a node whose set is
  231. // empty to start with.
  232. n->fFirstPosSet->addElement(n, *fStatus);
  233. return;
  234. }
  235. // The node is not a leaf.
  236. // Calculate firstPos on its children.
  237. calcFirstPos(n->fLeftChild);
  238. calcFirstPos(n->fRightChild);
  239. // Apply functions from table 3.40 in Aho
  240. if (n->fType == RBBINode::opOr) {
  241. setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
  242. setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
  243. }
  244. else if (n->fType == RBBINode::opCat) {
  245. setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
  246. if (n->fLeftChild->fNullable) {
  247. setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
  248. }
  249. }
  250. else if (n->fType == RBBINode::opStar ||
  251. n->fType == RBBINode::opQuestion ||
  252. n->fType == RBBINode::opPlus) {
  253. setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
  254. }
  255. }
  256. //-----------------------------------------------------------------------------
  257. //
  258. // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
  259. //
  260. //-----------------------------------------------------------------------------
  261. void RBBITableBuilder::calcLastPos(RBBINode *n) {
  262. if (n == nullptr) {
  263. return;
  264. }
  265. if (n->fType == RBBINode::leafChar ||
  266. n->fType == RBBINode::endMark ||
  267. n->fType == RBBINode::lookAhead ||
  268. n->fType == RBBINode::tag) {
  269. // These are non-empty leaf node types.
  270. // Note: In order to maintain the sort invariant on the set,
  271. // this function should only be called on a node whose set is
  272. // empty to start with.
  273. n->fLastPosSet->addElement(n, *fStatus);
  274. return;
  275. }
  276. // The node is not a leaf.
  277. // Calculate lastPos on its children.
  278. calcLastPos(n->fLeftChild);
  279. calcLastPos(n->fRightChild);
  280. // Apply functions from table 3.40 in Aho
  281. if (n->fType == RBBINode::opOr) {
  282. setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
  283. setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
  284. }
  285. else if (n->fType == RBBINode::opCat) {
  286. setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
  287. if (n->fRightChild->fNullable) {
  288. setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
  289. }
  290. }
  291. else if (n->fType == RBBINode::opStar ||
  292. n->fType == RBBINode::opQuestion ||
  293. n->fType == RBBINode::opPlus) {
  294. setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
  295. }
  296. }
  297. //-----------------------------------------------------------------------------
  298. //
  299. // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
  300. //
  301. //-----------------------------------------------------------------------------
  302. void RBBITableBuilder::calcFollowPos(RBBINode *n) {
  303. if (n == nullptr ||
  304. n->fType == RBBINode::leafChar ||
  305. n->fType == RBBINode::endMark) {
  306. return;
  307. }
  308. calcFollowPos(n->fLeftChild);
  309. calcFollowPos(n->fRightChild);
  310. // Aho rule #1
  311. if (n->fType == RBBINode::opCat) {
  312. RBBINode *i; // is 'i' in Aho's description
  313. uint32_t ix;
  314. UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet;
  315. for (ix = 0; ix < static_cast<uint32_t>(LastPosOfLeftChild->size()); ix++) {
  316. i = static_cast<RBBINode*>(LastPosOfLeftChild->elementAt(ix));
  317. setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet);
  318. }
  319. }
  320. // Aho rule #2
  321. if (n->fType == RBBINode::opStar ||
  322. n->fType == RBBINode::opPlus) {
  323. RBBINode *i; // again, n and i are the names from Aho's description.
  324. uint32_t ix;
  325. for (ix = 0; ix < static_cast<uint32_t>(n->fLastPosSet->size()); ix++) {
  326. i = static_cast<RBBINode*>(n->fLastPosSet->elementAt(ix));
  327. setAdd(i->fFollowPos, n->fFirstPosSet);
  328. }
  329. }
  330. }
  331. //-----------------------------------------------------------------------------
  332. //
  333. // addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged
  334. // as roots of a rule to a destination vector.
  335. //
  336. //-----------------------------------------------------------------------------
  337. void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) {
  338. if (node == nullptr || U_FAILURE(*fStatus)) {
  339. return;
  340. }
  341. U_ASSERT(!dest->hasDeleter());
  342. if (node->fRuleRoot) {
  343. dest->addElement(node, *fStatus);
  344. // Note: rules cannot nest. If we found a rule start node,
  345. // no child node can also be a start node.
  346. return;
  347. }
  348. addRuleRootNodes(dest, node->fLeftChild);
  349. addRuleRootNodes(dest, node->fRightChild);
  350. }
  351. //-----------------------------------------------------------------------------
  352. //
  353. // calcChainedFollowPos. Modify the previously calculated followPos sets
  354. // to implement rule chaining. NOT described by Aho
  355. //
  356. //-----------------------------------------------------------------------------
  357. void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) {
  358. UVector leafNodes(*fStatus);
  359. if (U_FAILURE(*fStatus)) {
  360. return;
  361. }
  362. // get a list all leaf nodes
  363. tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus);
  364. if (U_FAILURE(*fStatus)) {
  365. return;
  366. }
  367. // Collect all leaf nodes that can start matches for rules
  368. // with inbound chaining enabled, which is the union of the
  369. // firstPosition sets from each of the rule root nodes.
  370. UVector ruleRootNodes(*fStatus);
  371. addRuleRootNodes(&ruleRootNodes, tree);
  372. UVector matchStartNodes(*fStatus);
  373. for (int j=0; j<ruleRootNodes.size(); ++j) {
  374. RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
  375. if (node->fChainIn) {
  376. setAdd(&matchStartNodes, node->fFirstPosSet);
  377. }
  378. }
  379. if (U_FAILURE(*fStatus)) {
  380. return;
  381. }
  382. int32_t endNodeIx;
  383. int32_t startNodeIx;
  384. for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) {
  385. RBBINode* endNode = static_cast<RBBINode*>(leafNodes.elementAt(endNodeIx));
  386. // Identify leaf nodes that correspond to overall rule match positions.
  387. // These include the endMarkNode in their followPos sets.
  388. //
  389. // Note: do not consider other end marker nodes, those that are added to
  390. // look-ahead rules. These can't chain; a match immediately stops
  391. // further matching. This leaves exactly one end marker node, the one
  392. // at the end of the complete tree.
  393. if (!endNode->fFollowPos->contains(endMarkNode)) {
  394. continue;
  395. }
  396. // We've got a node that can end a match.
  397. // Now iterate over the nodes that can start a match, looking for ones
  398. // with the same char class as our ending node.
  399. RBBINode *startNode;
  400. for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) {
  401. startNode = static_cast<RBBINode*>(matchStartNodes.elementAt(startNodeIx));
  402. if (startNode->fType != RBBINode::leafChar) {
  403. continue;
  404. }
  405. if (endNode->fVal == startNode->fVal) {
  406. // The end val (character class) of one possible match is the
  407. // same as the start of another.
  408. // Add all nodes from the followPos of the start node to the
  409. // followPos set of the end node, which will have the effect of
  410. // letting matches transition from a match state at endNode
  411. // to the second char of a match starting with startNode.
  412. setAdd(endNode->fFollowPos, startNode->fFollowPos);
  413. }
  414. }
  415. }
  416. }
  417. //-----------------------------------------------------------------------------
  418. //
  419. // bofFixup. Fixup for state tables that include {bof} beginning of input testing.
  420. // Do an swizzle similar to chaining, modifying the followPos set of
  421. // the bofNode to include the followPos nodes from other {bot} nodes
  422. // scattered through the tree.
  423. //
  424. // This function has much in common with calcChainedFollowPos().
  425. //
  426. //-----------------------------------------------------------------------------
  427. void RBBITableBuilder::bofFixup() {
  428. if (U_FAILURE(*fStatus)) {
  429. return;
  430. }
  431. // The parse tree looks like this ...
  432. // fTree root ---> <cat>
  433. // / \ .
  434. // <cat> <#end node>
  435. // / \ .
  436. // <bofNode> rest
  437. // of tree
  438. //
  439. // We will be adding things to the followPos set of the <bofNode>
  440. //
  441. RBBINode *bofNode = fTree->fLeftChild->fLeftChild;
  442. U_ASSERT(bofNode->fType == RBBINode::leafChar);
  443. U_ASSERT(bofNode->fVal == 2);
  444. // Get all nodes that can be the start a match of the user-written rules
  445. // (excluding the fake bofNode)
  446. // We want the nodes that can start a match in the
  447. // part labeled "rest of tree"
  448. //
  449. UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet;
  450. RBBINode *startNode;
  451. int startNodeIx;
  452. for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) {
  453. startNode = static_cast<RBBINode*>(matchStartNodes->elementAt(startNodeIx));
  454. if (startNode->fType != RBBINode::leafChar) {
  455. continue;
  456. }
  457. if (startNode->fVal == bofNode->fVal) {
  458. // We found a leaf node corresponding to a {bof} that was
  459. // explicitly written into a rule.
  460. // Add everything from the followPos set of this node to the
  461. // followPos set of the fake bofNode at the start of the tree.
  462. //
  463. setAdd(bofNode->fFollowPos, startNode->fFollowPos);
  464. }
  465. }
  466. }
  467. //-----------------------------------------------------------------------------
  468. //
  469. // buildStateTable() Determine the set of runtime DFA states and the
  470. // transition tables for these states, by the algorithm
  471. // of fig. 3.44 in Aho.
  472. //
  473. // Most of the comments are quotes of Aho's psuedo-code.
  474. //
  475. //-----------------------------------------------------------------------------
  476. void RBBITableBuilder::buildStateTable() {
  477. if (U_FAILURE(*fStatus)) {
  478. return;
  479. }
  480. RBBIStateDescriptor *failState;
  481. // Set it to nullptr to avoid uninitialized warning
  482. RBBIStateDescriptor *initialState = nullptr;
  483. //
  484. // Add a dummy state 0 - the stop state. Not from Aho.
  485. int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1;
  486. failState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
  487. if (failState == nullptr) {
  488. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  489. goto ExitBuildSTdeleteall;
  490. }
  491. failState->fPositions = new UVector(*fStatus);
  492. if (failState->fPositions == nullptr) {
  493. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  494. }
  495. if (failState->fPositions == nullptr || U_FAILURE(*fStatus)) {
  496. goto ExitBuildSTdeleteall;
  497. }
  498. fDStates->addElement(failState, *fStatus);
  499. if (U_FAILURE(*fStatus)) {
  500. goto ExitBuildSTdeleteall;
  501. }
  502. // initially, the only unmarked state in Dstates is firstpos(root),
  503. // where toot is the root of the syntax tree for (r)#;
  504. initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
  505. if (initialState == nullptr) {
  506. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  507. }
  508. if (U_FAILURE(*fStatus)) {
  509. goto ExitBuildSTdeleteall;
  510. }
  511. initialState->fPositions = new UVector(*fStatus);
  512. if (initialState->fPositions == nullptr) {
  513. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  514. }
  515. if (U_FAILURE(*fStatus)) {
  516. goto ExitBuildSTdeleteall;
  517. }
  518. setAdd(initialState->fPositions, fTree->fFirstPosSet);
  519. fDStates->addElement(initialState, *fStatus);
  520. if (U_FAILURE(*fStatus)) {
  521. goto ExitBuildSTdeleteall;
  522. }
  523. // while there is an unmarked state T in Dstates do begin
  524. for (;;) {
  525. RBBIStateDescriptor *T = nullptr;
  526. int32_t tx;
  527. for (tx=1; tx<fDStates->size(); tx++) {
  528. RBBIStateDescriptor *temp;
  529. temp = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(tx));
  530. if (temp->fMarked == false) {
  531. T = temp;
  532. break;
  533. }
  534. }
  535. if (T == nullptr) {
  536. break;
  537. }
  538. // mark T;
  539. T->fMarked = true;
  540. // for each input symbol a do begin
  541. int32_t a;
  542. for (a = 1; a<=lastInputSymbol; a++) {
  543. // let U be the set of positions that are in followpos(p)
  544. // for some position p in T
  545. // such that the symbol at position p is a;
  546. UVector *U = nullptr;
  547. RBBINode *p;
  548. int32_t px;
  549. for (px=0; px<T->fPositions->size(); px++) {
  550. p = static_cast<RBBINode*>(T->fPositions->elementAt(px));
  551. if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) {
  552. if (U == nullptr) {
  553. U = new UVector(*fStatus);
  554. if (U == nullptr) {
  555. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  556. goto ExitBuildSTdeleteall;
  557. }
  558. }
  559. setAdd(U, p->fFollowPos);
  560. }
  561. }
  562. // if U is not empty and not in DStates then
  563. int32_t ux = 0;
  564. UBool UinDstates = false;
  565. if (U != nullptr) {
  566. U_ASSERT(U->size() > 0);
  567. int ix;
  568. for (ix=0; ix<fDStates->size(); ix++) {
  569. RBBIStateDescriptor *temp2;
  570. temp2 = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(ix));
  571. if (setEquals(U, temp2->fPositions)) {
  572. delete U;
  573. U = temp2->fPositions;
  574. ux = ix;
  575. UinDstates = true;
  576. break;
  577. }
  578. }
  579. // Add U as an unmarked state to Dstates
  580. if (!UinDstates)
  581. {
  582. RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
  583. if (newState == nullptr) {
  584. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  585. }
  586. if (U_FAILURE(*fStatus)) {
  587. goto ExitBuildSTdeleteall;
  588. }
  589. newState->fPositions = U;
  590. fDStates->addElement(newState, *fStatus);
  591. if (U_FAILURE(*fStatus)) {
  592. return;
  593. }
  594. ux = fDStates->size()-1;
  595. }
  596. // Dtran[T, a] := U;
  597. T->fDtran->setElementAt(ux, a);
  598. }
  599. }
  600. }
  601. return;
  602. // delete local pointers only if error occurred.
  603. ExitBuildSTdeleteall:
  604. delete initialState;
  605. delete failState;
  606. }
  607. /**
  608. * mapLookAheadRules
  609. *
  610. */
  611. void RBBITableBuilder::mapLookAheadRules() {
  612. fLookAheadRuleMap = new UVector32(fRB->fScanner->numRules() + 1, *fStatus);
  613. if (fLookAheadRuleMap == nullptr) {
  614. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  615. }
  616. if (U_FAILURE(*fStatus)) {
  617. return;
  618. }
  619. fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1);
  620. for (int32_t n=0; n<fDStates->size(); n++) {
  621. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
  622. int32_t laSlotForState = 0;
  623. // Establish the look-ahead slot for this state, if the state covers
  624. // any look-ahead nodes - corresponding to the '/' in look-ahead rules.
  625. // If any of the look-ahead nodes already have a slot assigned, use it,
  626. // otherwise assign a new one.
  627. bool sawLookAheadNode = false;
  628. for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
  629. RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
  630. if (node->fType != RBBINode::NodeType::lookAhead) {
  631. continue;
  632. }
  633. sawLookAheadNode = true;
  634. int32_t ruleNum = node->fVal; // Set when rule was originally parsed.
  635. U_ASSERT(ruleNum < fLookAheadRuleMap->size());
  636. U_ASSERT(ruleNum > 0);
  637. int32_t laSlot = fLookAheadRuleMap->elementAti(ruleNum);
  638. if (laSlot != 0) {
  639. if (laSlotForState == 0) {
  640. laSlotForState = laSlot;
  641. } else {
  642. // TODO: figure out if this can fail, change to setting an error code if so.
  643. U_ASSERT(laSlot == laSlotForState);
  644. }
  645. }
  646. }
  647. if (!sawLookAheadNode) {
  648. continue;
  649. }
  650. if (laSlotForState == 0) {
  651. laSlotForState = ++fLASlotsInUse;
  652. }
  653. // For each look ahead node covered by this state,
  654. // set the mapping from the node's rule number to the look ahead slot.
  655. // There can be multiple nodes/rule numbers going to the same la slot.
  656. for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
  657. RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
  658. if (node->fType != RBBINode::NodeType::lookAhead) {
  659. continue;
  660. }
  661. int32_t ruleNum = node->fVal; // Set when rule was originally parsed.
  662. int32_t existingVal = fLookAheadRuleMap->elementAti(ruleNum);
  663. (void)existingVal;
  664. U_ASSERT(existingVal == 0 || existingVal == laSlotForState);
  665. fLookAheadRuleMap->setElementAt(laSlotForState, ruleNum);
  666. }
  667. }
  668. }
  669. //-----------------------------------------------------------------------------
  670. //
  671. // flagAcceptingStates Identify accepting states.
  672. // First get a list of all of the end marker nodes.
  673. // Then, for each state s,
  674. // if s contains one of the end marker nodes in its list of tree positions then
  675. // s is an accepting state.
  676. //
  677. //-----------------------------------------------------------------------------
  678. void RBBITableBuilder::flagAcceptingStates() {
  679. if (U_FAILURE(*fStatus)) {
  680. return;
  681. }
  682. UVector endMarkerNodes(*fStatus);
  683. RBBINode *endMarker;
  684. int32_t i;
  685. int32_t n;
  686. if (U_FAILURE(*fStatus)) {
  687. return;
  688. }
  689. fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus);
  690. if (U_FAILURE(*fStatus)) {
  691. return;
  692. }
  693. for (i=0; i<endMarkerNodes.size(); i++) {
  694. endMarker = static_cast<RBBINode*>(endMarkerNodes.elementAt(i));
  695. for (n=0; n<fDStates->size(); n++) {
  696. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
  697. if (sd->fPositions->indexOf(endMarker) >= 0) {
  698. // Any non-zero value for fAccepting means this is an accepting node.
  699. // The value is what will be returned to the user as the break status.
  700. // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1).
  701. if (sd->fAccepting==0) {
  702. // State hasn't been marked as accepting yet. Do it now.
  703. sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
  704. if (sd->fAccepting == 0) {
  705. sd->fAccepting = ACCEPTING_UNCONDITIONAL;
  706. }
  707. }
  708. if (sd->fAccepting==ACCEPTING_UNCONDITIONAL && endMarker->fVal != 0) {
  709. // Both lookahead and non-lookahead accepting for this state.
  710. // Favor the look-ahead, because a look-ahead match needs to
  711. // immediately stop the run-time engine. First match, not longest.
  712. sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
  713. }
  714. // implicit else:
  715. // if sd->fAccepting already had a value other than 0 or 1, leave it be.
  716. }
  717. }
  718. }
  719. }
  720. //-----------------------------------------------------------------------------
  721. //
  722. // flagLookAheadStates Very similar to flagAcceptingStates, above.
  723. //
  724. //-----------------------------------------------------------------------------
  725. void RBBITableBuilder::flagLookAheadStates() {
  726. if (U_FAILURE(*fStatus)) {
  727. return;
  728. }
  729. UVector lookAheadNodes(*fStatus);
  730. RBBINode *lookAheadNode;
  731. int32_t i;
  732. int32_t n;
  733. fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus);
  734. if (U_FAILURE(*fStatus)) {
  735. return;
  736. }
  737. for (i=0; i<lookAheadNodes.size(); i++) {
  738. lookAheadNode = static_cast<RBBINode*>(lookAheadNodes.elementAt(i));
  739. U_ASSERT(lookAheadNode->fType == RBBINode::NodeType::lookAhead);
  740. for (n=0; n<fDStates->size(); n++) {
  741. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
  742. int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode);
  743. if (positionsIdx >= 0) {
  744. U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx));
  745. uint32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);
  746. U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot);
  747. // if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) {
  748. // printf("%s:%d Bingo. sd->fLookAhead:%d lookaheadSlot:%d\n",
  749. // __FILE__, __LINE__, sd->fLookAhead, lookaheadSlot);
  750. // }
  751. sd->fLookAhead = lookaheadSlot;
  752. }
  753. }
  754. }
  755. }
  756. //-----------------------------------------------------------------------------
  757. //
  758. // flagTaggedStates
  759. //
  760. //-----------------------------------------------------------------------------
  761. void RBBITableBuilder::flagTaggedStates() {
  762. if (U_FAILURE(*fStatus)) {
  763. return;
  764. }
  765. UVector tagNodes(*fStatus);
  766. RBBINode *tagNode;
  767. int32_t i;
  768. int32_t n;
  769. if (U_FAILURE(*fStatus)) {
  770. return;
  771. }
  772. fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus);
  773. if (U_FAILURE(*fStatus)) {
  774. return;
  775. }
  776. for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em)
  777. tagNode = static_cast<RBBINode*>(tagNodes.elementAt(i));
  778. for (n=0; n<fDStates->size(); n++) { // For each state s (row in the state table)
  779. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
  780. if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include the tag node t
  781. sortedAdd(&sd->fTagVals, tagNode->fVal);
  782. }
  783. }
  784. }
  785. }
  786. //-----------------------------------------------------------------------------
  787. //
  788. // mergeRuleStatusVals
  789. //
  790. // Update the global table of rule status {tag} values
  791. // The rule builder has a global vector of status values that are common
  792. // for all tables. Merge the ones from this table into the global set.
  793. //
  794. //-----------------------------------------------------------------------------
  795. void RBBITableBuilder::mergeRuleStatusVals() {
  796. //
  797. // The basic outline of what happens here is this...
  798. //
  799. // for each state in this state table
  800. // if the status tag list for this state is in the global statuses list
  801. // record where and
  802. // continue with the next state
  803. // else
  804. // add the tag list for this state to the global list.
  805. //
  806. int i;
  807. int n;
  808. // Pre-set a single tag of {0} into the table.
  809. // We will need this as a default, for rule sets with no explicit tagging.
  810. if (fRB->fRuleStatusVals->size() == 0) {
  811. fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group
  812. fRB->fRuleStatusVals->addElement(static_cast<int32_t>(0), *fStatus); // and our single status of zero
  813. }
  814. // For each state
  815. for (n=0; n<fDStates->size(); n++) {
  816. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
  817. UVector *thisStatesTagValues = sd->fTagVals;
  818. if (thisStatesTagValues == nullptr) {
  819. // No tag values are explicitly associated with this state.
  820. // Set the default tag value.
  821. sd->fTagsIdx = 0;
  822. continue;
  823. }
  824. // There are tag(s) associated with this state.
  825. // fTagsIdx will be the index into the global tag list for this state's tag values.
  826. // Initial value of -1 flags that we haven't got it set yet.
  827. sd->fTagsIdx = -1;
  828. int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list
  829. int32_t nextTagGroupStart = 0;
  830. // Loop runs once per group of tags in the global list
  831. while (nextTagGroupStart < fRB->fRuleStatusVals->size()) {
  832. thisTagGroupStart = nextTagGroupStart;
  833. nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1;
  834. if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) {
  835. // The number of tags for this state is different from
  836. // the number of tags in this group from the global list.
  837. // Continue with the next group from the global list.
  838. continue;
  839. }
  840. // The lengths match, go ahead and compare the actual tag values
  841. // between this state and the group from the global list.
  842. for (i=0; i<thisStatesTagValues->size(); i++) {
  843. if (thisStatesTagValues->elementAti(i) !=
  844. fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) {
  845. // Mismatch.
  846. break;
  847. }
  848. }
  849. if (i == thisStatesTagValues->size()) {
  850. // We found a set of tag values in the global list that match
  851. // those for this state. Use them.
  852. sd->fTagsIdx = thisTagGroupStart;
  853. break;
  854. }
  855. }
  856. if (sd->fTagsIdx == -1) {
  857. // No suitable entry in the global tag list already. Add one
  858. sd->fTagsIdx = fRB->fRuleStatusVals->size();
  859. fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus);
  860. for (i=0; i<thisStatesTagValues->size(); i++) {
  861. fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus);
  862. }
  863. }
  864. }
  865. }
  866. //-----------------------------------------------------------------------------
  867. //
  868. // sortedAdd Add a value to a vector of sorted values (ints).
  869. // Do not replicate entries; if the value is already there, do not
  870. // add a second one.
  871. // Lazily create the vector if it does not already exist.
  872. //
  873. //-----------------------------------------------------------------------------
  874. void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) {
  875. int32_t i;
  876. if (*vector == nullptr) {
  877. *vector = new UVector(*fStatus);
  878. }
  879. if (*vector == nullptr || U_FAILURE(*fStatus)) {
  880. return;
  881. }
  882. UVector *vec = *vector;
  883. int32_t vSize = vec->size();
  884. for (i=0; i<vSize; i++) {
  885. int32_t valAtI = vec->elementAti(i);
  886. if (valAtI == val) {
  887. // The value is already in the vector. Don't add it again.
  888. return;
  889. }
  890. if (valAtI > val) {
  891. break;
  892. }
  893. }
  894. vec->insertElementAt(val, i, *fStatus);
  895. }
  896. //-----------------------------------------------------------------------------
  897. //
  898. // setAdd Set operation on UVector
  899. // dest = dest union source
  900. // Elements may only appear once and must be sorted.
  901. //
  902. //-----------------------------------------------------------------------------
  903. void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
  904. U_ASSERT(!dest->hasDeleter());
  905. U_ASSERT(!source->hasDeleter());
  906. int32_t destOriginalSize = dest->size();
  907. int32_t sourceSize = source->size();
  908. int32_t di = 0;
  909. MaybeStackArray<void *, 16> destArray, sourceArray; // Handle small cases without malloc
  910. void **destPtr, **sourcePtr;
  911. void **destLim, **sourceLim;
  912. if (destOriginalSize > destArray.getCapacity()) {
  913. if (destArray.resize(destOriginalSize) == nullptr) {
  914. return;
  915. }
  916. }
  917. destPtr = destArray.getAlias();
  918. destLim = destPtr + destOriginalSize; // destArray.getArrayLimit()?
  919. if (sourceSize > sourceArray.getCapacity()) {
  920. if (sourceArray.resize(sourceSize) == nullptr) {
  921. return;
  922. }
  923. }
  924. sourcePtr = sourceArray.getAlias();
  925. sourceLim = sourcePtr + sourceSize; // sourceArray.getArrayLimit()?
  926. // Avoid multiple "get element" calls by getting the contents into arrays
  927. (void) dest->toArray(destPtr);
  928. (void) source->toArray(sourcePtr);
  929. dest->setSize(sourceSize+destOriginalSize, *fStatus);
  930. if (U_FAILURE(*fStatus)) {
  931. return;
  932. }
  933. while (sourcePtr < sourceLim && destPtr < destLim) {
  934. if (*destPtr == *sourcePtr) {
  935. dest->setElementAt(*sourcePtr++, di++);
  936. destPtr++;
  937. }
  938. // This check is required for machines with segmented memory, like i5/OS.
  939. // Direct pointer comparison is not recommended.
  940. else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) {
  941. dest->setElementAt(*destPtr++, di++);
  942. }
  943. else { /* *sourcePtr < *destPtr */
  944. dest->setElementAt(*sourcePtr++, di++);
  945. }
  946. }
  947. // At most one of these two cleanup loops will execute
  948. while (destPtr < destLim) {
  949. dest->setElementAt(*destPtr++, di++);
  950. }
  951. while (sourcePtr < sourceLim) {
  952. dest->setElementAt(*sourcePtr++, di++);
  953. }
  954. dest->setSize(di, *fStatus);
  955. }
  956. //-----------------------------------------------------------------------------
  957. //
  958. // setEqual Set operation on UVector.
  959. // Compare for equality.
  960. // Elements must be sorted.
  961. //
  962. //-----------------------------------------------------------------------------
  963. UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) {
  964. return a->equals(*b);
  965. }
  966. //-----------------------------------------------------------------------------
  967. //
  968. // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos
  969. // for each node in the tree.
  970. //
  971. //-----------------------------------------------------------------------------
  972. #ifdef RBBI_DEBUG
  973. void RBBITableBuilder::printPosSets(RBBINode *n) {
  974. if (n==nullptr) {
  975. return;
  976. }
  977. printf("\n");
  978. RBBINode::printNodeHeader();
  979. RBBINode::printNode(n);
  980. RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"true":"false");
  981. RBBIDebugPrintf(" firstpos: ");
  982. printSet(n->fFirstPosSet);
  983. RBBIDebugPrintf(" lastpos: ");
  984. printSet(n->fLastPosSet);
  985. RBBIDebugPrintf(" followpos: ");
  986. printSet(n->fFollowPos);
  987. printPosSets(n->fLeftChild);
  988. printPosSets(n->fRightChild);
  989. }
  990. #endif
  991. //
  992. // findDuplCharClassFrom()
  993. //
  994. bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
  995. int32_t numStates = fDStates->size();
  996. int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
  997. for (; categories->first < numCols-1; categories->first++) {
  998. // Note: dictionary & non-dictionary columns cannot be merged.
  999. // The limitSecond value prevents considering mixed pairs.
  1000. // Dictionary categories are >= DictCategoriesStart.
  1001. // Non dict categories are < DictCategoriesStart.
  1002. int limitSecond = categories->first < fRB->fSetBuilder->getDictCategoriesStart() ?
  1003. fRB->fSetBuilder->getDictCategoriesStart() : numCols;
  1004. for (categories->second=categories->first+1; categories->second < limitSecond; categories->second++) {
  1005. // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
  1006. uint16_t table_base = 0;
  1007. uint16_t table_dupl = 1;
  1008. for (int32_t state=0; state<numStates; state++) {
  1009. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
  1010. table_base = static_cast<uint16_t>(sd->fDtran->elementAti(categories->first));
  1011. table_dupl = static_cast<uint16_t>(sd->fDtran->elementAti(categories->second));
  1012. if (table_base != table_dupl) {
  1013. break;
  1014. }
  1015. }
  1016. if (table_base == table_dupl) {
  1017. return true;
  1018. }
  1019. }
  1020. }
  1021. return false;
  1022. }
  1023. //
  1024. // removeColumn()
  1025. //
  1026. void RBBITableBuilder::removeColumn(int32_t column) {
  1027. int32_t numStates = fDStates->size();
  1028. for (int32_t state=0; state<numStates; state++) {
  1029. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
  1030. U_ASSERT(column < sd->fDtran->size());
  1031. sd->fDtran->removeElementAt(column);
  1032. }
  1033. }
  1034. /*
  1035. * findDuplicateState
  1036. */
  1037. bool RBBITableBuilder::findDuplicateState(IntPair *states) {
  1038. int32_t numStates = fDStates->size();
  1039. int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
  1040. for (; states->first<numStates-1; states->first++) {
  1041. RBBIStateDescriptor* firstSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(states->first));
  1042. for (states->second=states->first+1; states->second<numStates; states->second++) {
  1043. RBBIStateDescriptor* duplSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(states->second));
  1044. if (firstSD->fAccepting != duplSD->fAccepting ||
  1045. firstSD->fLookAhead != duplSD->fLookAhead ||
  1046. firstSD->fTagsIdx != duplSD->fTagsIdx) {
  1047. continue;
  1048. }
  1049. bool rowsMatch = true;
  1050. for (int32_t col=0; col < numCols; ++col) {
  1051. int32_t firstVal = firstSD->fDtran->elementAti(col);
  1052. int32_t duplVal = duplSD->fDtran->elementAti(col);
  1053. if (!((firstVal == duplVal) ||
  1054. ((firstVal == states->first || firstVal == states->second) &&
  1055. (duplVal == states->first || duplVal == states->second)))) {
  1056. rowsMatch = false;
  1057. break;
  1058. }
  1059. }
  1060. if (rowsMatch) {
  1061. return true;
  1062. }
  1063. }
  1064. }
  1065. return false;
  1066. }
  1067. bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
  1068. int32_t numStates = fSafeTable->size();
  1069. for (; states->first<numStates-1; states->first++) {
  1070. UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
  1071. for (states->second=states->first+1; states->second<numStates; states->second++) {
  1072. UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
  1073. bool rowsMatch = true;
  1074. int32_t numCols = firstRow->length();
  1075. for (int32_t col=0; col < numCols; ++col) {
  1076. int32_t firstVal = firstRow->charAt(col);
  1077. int32_t duplVal = duplRow->charAt(col);
  1078. if (!((firstVal == duplVal) ||
  1079. ((firstVal == states->first || firstVal == states->second) &&
  1080. (duplVal == states->first || duplVal == states->second)))) {
  1081. rowsMatch = false;
  1082. break;
  1083. }
  1084. }
  1085. if (rowsMatch) {
  1086. return true;
  1087. }
  1088. }
  1089. }
  1090. return false;
  1091. }
  1092. void RBBITableBuilder::removeState(IntPair duplStates) {
  1093. const int32_t keepState = duplStates.first;
  1094. const int32_t duplState = duplStates.second;
  1095. U_ASSERT(keepState < duplState);
  1096. U_ASSERT(duplState < fDStates->size());
  1097. RBBIStateDescriptor* duplSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(duplState));
  1098. fDStates->removeElementAt(duplState);
  1099. delete duplSD;
  1100. int32_t numStates = fDStates->size();
  1101. int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
  1102. for (int32_t state=0; state<numStates; ++state) {
  1103. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
  1104. for (int32_t col=0; col<numCols; col++) {
  1105. int32_t existingVal = sd->fDtran->elementAti(col);
  1106. int32_t newVal = existingVal;
  1107. if (existingVal == duplState) {
  1108. newVal = keepState;
  1109. } else if (existingVal > duplState) {
  1110. newVal = existingVal - 1;
  1111. }
  1112. sd->fDtran->setElementAt(newVal, col);
  1113. }
  1114. }
  1115. }
  1116. void RBBITableBuilder::removeSafeState(IntPair duplStates) {
  1117. const int32_t keepState = duplStates.first;
  1118. const int32_t duplState = duplStates.second;
  1119. U_ASSERT(keepState < duplState);
  1120. U_ASSERT(duplState < fSafeTable->size());
  1121. fSafeTable->removeElementAt(duplState); // Note that fSafeTable has a deleter function
  1122. // and will auto-delete the removed element.
  1123. int32_t numStates = fSafeTable->size();
  1124. for (int32_t state=0; state<numStates; ++state) {
  1125. UnicodeString* sd = static_cast<UnicodeString*>(fSafeTable->elementAt(state));
  1126. int32_t numCols = sd->length();
  1127. for (int32_t col=0; col<numCols; col++) {
  1128. int32_t existingVal = sd->charAt(col);
  1129. int32_t newVal = existingVal;
  1130. if (existingVal == duplState) {
  1131. newVal = keepState;
  1132. } else if (existingVal > duplState) {
  1133. newVal = existingVal - 1;
  1134. }
  1135. sd->setCharAt(col, static_cast<char16_t>(newVal));
  1136. }
  1137. }
  1138. }
  1139. /*
  1140. * RemoveDuplicateStates
  1141. */
  1142. int32_t RBBITableBuilder::removeDuplicateStates() {
  1143. IntPair dupls = {3, 0};
  1144. int32_t numStatesRemoved = 0;
  1145. while (findDuplicateState(&dupls)) {
  1146. // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
  1147. removeState(dupls);
  1148. ++numStatesRemoved;
  1149. }
  1150. return numStatesRemoved;
  1151. }
  1152. //-----------------------------------------------------------------------------
  1153. //
  1154. // getTableSize() Calculate the size of the runtime form of this
  1155. // state transition table.
  1156. //
  1157. //-----------------------------------------------------------------------------
  1158. int32_t RBBITableBuilder::getTableSize() const {
  1159. int32_t size = 0;
  1160. int32_t numRows;
  1161. int32_t numCols;
  1162. int32_t rowSize;
  1163. if (fTree == nullptr) {
  1164. return 0;
  1165. }
  1166. size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
  1167. numRows = fDStates->size();
  1168. numCols = fRB->fSetBuilder->getNumCharCategories();
  1169. if (use8BitsForTable()) {
  1170. rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
  1171. } else {
  1172. rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
  1173. }
  1174. size += numRows * rowSize;
  1175. return size;
  1176. }
  1177. bool RBBITableBuilder::use8BitsForTable() const {
  1178. return fDStates->size() <= kMaxStateFor8BitsTable;
  1179. }
  1180. //-----------------------------------------------------------------------------
  1181. //
  1182. // exportTable() export the state transition table in the format required
  1183. // by the runtime engine. getTableSize() bytes of memory
  1184. // must be available at the output address "where".
  1185. //
  1186. //-----------------------------------------------------------------------------
  1187. void RBBITableBuilder::exportTable(void *where) {
  1188. RBBIStateTable* table = static_cast<RBBIStateTable*>(where);
  1189. uint32_t state;
  1190. int col;
  1191. if (U_FAILURE(*fStatus) || fTree == nullptr) {
  1192. return;
  1193. }
  1194. int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
  1195. if (catCount > 0x7fff ||
  1196. fDStates->size() > 0x7fff) {
  1197. *fStatus = U_BRK_INTERNAL_ERROR;
  1198. return;
  1199. }
  1200. table->fNumStates = fDStates->size();
  1201. table->fDictCategoriesStart = fRB->fSetBuilder->getDictCategoriesStart();
  1202. table->fLookAheadResultsSize = fLASlotsInUse == ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1;
  1203. table->fFlags = 0;
  1204. if (use8BitsForTable()) {
  1205. table->fRowLen = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
  1206. table->fFlags |= RBBI_8BITS_ROWS;
  1207. } else {
  1208. table->fRowLen = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
  1209. }
  1210. if (fRB->fLookAheadHardBreak) {
  1211. table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK;
  1212. }
  1213. if (fRB->fSetBuilder->sawBOF()) {
  1214. table->fFlags |= RBBI_BOF_REQUIRED;
  1215. }
  1216. for (state=0; state<table->fNumStates; state++) {
  1217. RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
  1218. RBBIStateTableRow* row = reinterpret_cast<RBBIStateTableRow*>(table->fTableData + state * table->fRowLen);
  1219. if (use8BitsForTable()) {
  1220. U_ASSERT (sd->fAccepting <= 255);
  1221. U_ASSERT (sd->fLookAhead <= 255);
  1222. U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255);
  1223. RBBIStateTableRow8* r8 = reinterpret_cast<RBBIStateTableRow8*>(row);
  1224. r8->fAccepting = sd->fAccepting;
  1225. r8->fLookAhead = sd->fLookAhead;
  1226. r8->fTagsIdx = sd->fTagsIdx;
  1227. for (col=0; col<catCount; col++) {
  1228. U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable);
  1229. r8->fNextState[col] = sd->fDtran->elementAti(col);
  1230. }
  1231. } else {
  1232. U_ASSERT (sd->fAccepting <= 0xffff);
  1233. U_ASSERT (sd->fLookAhead <= 0xffff);
  1234. U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 0xffff);
  1235. row->r16.fAccepting = sd->fAccepting;
  1236. row->r16.fLookAhead = sd->fLookAhead;
  1237. row->r16.fTagsIdx = sd->fTagsIdx;
  1238. for (col=0; col<catCount; col++) {
  1239. row->r16.fNextState[col] = sd->fDtran->elementAti(col);
  1240. }
  1241. }
  1242. }
  1243. }
  1244. /**
  1245. * Synthesize a safe state table from the main state table.
  1246. */
  1247. void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
  1248. // The safe table creation has three steps:
  1249. // 1. Identify pairs of character classes that are "safe." Safe means that boundaries
  1250. // following the pair do not depend on context or state before the pair. To test
  1251. // whether a pair is safe, run it through the main forward state table, starting
  1252. // from each state. If the the final state is the same, no matter what the starting state,
  1253. // the pair is safe.
  1254. //
  1255. // 2. Build a state table that recognizes the safe pairs. It's similar to their
  1256. // forward table, with a column for each input character [class], and a row for
  1257. // each state. Row 1 is the start state, and row 0 is the stop state. Initially
  1258. // create an additional state for each input character category; being in
  1259. // one of these states means that the character has been seen, and is potentially
  1260. // the first of a pair. In each of these rows, the entry for the second character
  1261. // of a safe pair is set to the stop state (0), indicating that a match was found.
  1262. // All other table entries are set to the state corresponding the current input
  1263. // character, allowing that character to be the of a start following pair.
  1264. //
  1265. // Because the safe rules are to be run in reverse, moving backwards in the text,
  1266. // the first and second pair categories are swapped when building the table.
  1267. //
  1268. // 3. Compress the table. There are typically many rows (states) that are
  1269. // equivalent - that have zeroes (match completed) in the same columns -
  1270. // and can be folded together.
  1271. // Each safe pair is stored as two UChars in the safePair string.
  1272. UnicodeString safePairs;
  1273. int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();
  1274. int32_t numStates = fDStates->size();
  1275. for (int32_t c1=0; c1<numCharClasses; ++c1) {
  1276. for (int32_t c2=0; c2 < numCharClasses; ++c2) {
  1277. int32_t wantedEndState = -1;
  1278. int32_t endState = 0;
  1279. for (int32_t startState = 1; startState < numStates; ++startState) {
  1280. RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));
  1281. int32_t s2 = startStateD->fDtran->elementAti(c1);
  1282. RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));
  1283. endState = s2StateD->fDtran->elementAti(c2);
  1284. if (wantedEndState < 0) {
  1285. wantedEndState = endState;
  1286. } else {
  1287. if (wantedEndState != endState) {
  1288. break;
  1289. }
  1290. }
  1291. }
  1292. if (wantedEndState == endState) {
  1293. safePairs.append(static_cast<char16_t>(c1));
  1294. safePairs.append(static_cast<char16_t>(c2));
  1295. // printf("(%d, %d) ", c1, c2);
  1296. }
  1297. }
  1298. // printf("\n");
  1299. }
  1300. // Populate the initial safe table.
  1301. // The table as a whole is UVector<UnicodeString>
  1302. // Each row is represented by a UnicodeString, being used as a Vector<int16>.
  1303. // Row 0 is the stop state.
  1304. // Row 1 is the start state.
  1305. // Row 2 and beyond are other states, initially one per char class, but
  1306. // after initial construction, many of the states will be combined, compacting the table.
  1307. // The String holds the nextState data only. The four leading fields of a row, fAccepting,
  1308. // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
  1309. U_ASSERT(fSafeTable == nullptr);
  1310. LocalPointer<UVector> lpSafeTable(
  1311. new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status), status);
  1312. if (U_FAILURE(status)) {
  1313. return;
  1314. }
  1315. fSafeTable = lpSafeTable.orphan();
  1316. for (int32_t row=0; row<numCharClasses + 2; ++row) {
  1317. LocalPointer<UnicodeString> lpString(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
  1318. fSafeTable->adoptElement(lpString.orphan(), status);
  1319. }
  1320. if (U_FAILURE(status)) {
  1321. return;
  1322. }
  1323. // From the start state, each input char class transitions to the state for that input.
  1324. UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
  1325. for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
  1326. // Note: +2 for the start & stop state.
  1327. startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
  1328. }
  1329. // Initially make every other state table row look like the start state row,
  1330. for (int32_t row=2; row<numCharClasses+2; ++row) {
  1331. UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));
  1332. rowState = startState; // UnicodeString assignment, copies contents.
  1333. }
  1334. // Run through the safe pairs, set the next state to zero when pair has been seen.
  1335. // Zero being the stop state, meaning we found a safe point.
  1336. for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
  1337. int32_t c1 = safePairs.charAt(pairIdx);
  1338. int32_t c2 = safePairs.charAt(pairIdx + 1);
  1339. UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));
  1340. rowState.setCharAt(c1, 0);
  1341. }
  1342. // Remove duplicate or redundant rows from the table.
  1343. IntPair states = {1, 0};
  1344. while (findDuplicateSafeState(&states)) {
  1345. // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
  1346. removeSafeState(states);
  1347. }
  1348. }
  1349. //-----------------------------------------------------------------------------
  1350. //
  1351. // getSafeTableSize() Calculate the size of the runtime form of this
  1352. // safe state table.
  1353. //
  1354. //-----------------------------------------------------------------------------
  1355. int32_t RBBITableBuilder::getSafeTableSize() const {
  1356. int32_t size = 0;
  1357. int32_t numRows;
  1358. int32_t numCols;
  1359. int32_t rowSize;
  1360. if (fSafeTable == nullptr) {
  1361. return 0;
  1362. }
  1363. size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
  1364. numRows = fSafeTable->size();
  1365. numCols = fRB->fSetBuilder->getNumCharCategories();
  1366. if (use8BitsForSafeTable()) {
  1367. rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
  1368. } else {
  1369. rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
  1370. }
  1371. size += numRows * rowSize;
  1372. return size;
  1373. }
  1374. bool RBBITableBuilder::use8BitsForSafeTable() const {
  1375. return fSafeTable->size() <= kMaxStateFor8BitsTable;
  1376. }
  1377. //-----------------------------------------------------------------------------
  1378. //
  1379. // exportSafeTable() export the state transition table in the format required
  1380. // by the runtime engine. getTableSize() bytes of memory
  1381. // must be available at the output address "where".
  1382. //
  1383. //-----------------------------------------------------------------------------
  1384. void RBBITableBuilder::exportSafeTable(void *where) {
  1385. RBBIStateTable* table = static_cast<RBBIStateTable*>(where);
  1386. uint32_t state;
  1387. int col;
  1388. if (U_FAILURE(*fStatus) || fSafeTable == nullptr) {
  1389. return;
  1390. }
  1391. int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
  1392. if (catCount > 0x7fff ||
  1393. fSafeTable->size() > 0x7fff) {
  1394. *fStatus = U_BRK_INTERNAL_ERROR;
  1395. return;
  1396. }
  1397. table->fNumStates = fSafeTable->size();
  1398. table->fFlags = 0;
  1399. if (use8BitsForSafeTable()) {
  1400. table->fRowLen = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
  1401. table->fFlags |= RBBI_8BITS_ROWS;
  1402. } else {
  1403. table->fRowLen = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
  1404. }
  1405. for (state=0; state<table->fNumStates; state++) {
  1406. UnicodeString* rowString = static_cast<UnicodeString*>(fSafeTable->elementAt(state));
  1407. RBBIStateTableRow* row = reinterpret_cast<RBBIStateTableRow*>(table->fTableData + state * table->fRowLen);
  1408. if (use8BitsForSafeTable()) {
  1409. RBBIStateTableRow8* r8 = reinterpret_cast<RBBIStateTableRow8*>(row);
  1410. r8->fAccepting = 0;
  1411. r8->fLookAhead = 0;
  1412. r8->fTagsIdx = 0;
  1413. for (col=0; col<catCount; col++) {
  1414. U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable);
  1415. r8->fNextState[col] = static_cast<uint8_t>(rowString->charAt(col));
  1416. }
  1417. } else {
  1418. row->r16.fAccepting = 0;
  1419. row->r16.fLookAhead = 0;
  1420. row->r16.fTagsIdx = 0;
  1421. for (col=0; col<catCount; col++) {
  1422. row->r16.fNextState[col] = rowString->charAt(col);
  1423. }
  1424. }
  1425. }
  1426. }
  1427. //-----------------------------------------------------------------------------
  1428. //
  1429. // printSet Debug function. Print the contents of a UVector
  1430. //
  1431. //-----------------------------------------------------------------------------
  1432. #ifdef RBBI_DEBUG
  1433. void RBBITableBuilder::printSet(UVector *s) {
  1434. int32_t i;
  1435. for (i=0; i<s->size(); i++) {
  1436. const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i));
  1437. RBBIDebugPrintf("%5d", v==nullptr? -1 : v->fSerialNum);
  1438. }
  1439. RBBIDebugPrintf("\n");
  1440. }
  1441. #endif
  1442. //-----------------------------------------------------------------------------
  1443. //
  1444. // printStates Debug Function. Dump the fully constructed state transition table.
  1445. //
  1446. //-----------------------------------------------------------------------------
  1447. #ifdef RBBI_DEBUG
  1448. void RBBITableBuilder::printStates() {
  1449. int c; // input "character"
  1450. int n; // state number
  1451. RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
  1452. RBBIDebugPrintf(" | Acc LA Tag");
  1453. for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
  1454. RBBIDebugPrintf(" %3d", c);
  1455. }
  1456. RBBIDebugPrintf("\n");
  1457. RBBIDebugPrintf(" |---------------");
  1458. for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
  1459. RBBIDebugPrintf("----");
  1460. }
  1461. RBBIDebugPrintf("\n");
  1462. for (n=0; n<fDStates->size(); n++) {
  1463. RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
  1464. RBBIDebugPrintf(" %3d | " , n);
  1465. RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx);
  1466. for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
  1467. RBBIDebugPrintf(" %3d", sd->fDtran->elementAti(c));
  1468. }
  1469. RBBIDebugPrintf("\n");
  1470. }
  1471. RBBIDebugPrintf("\n\n");
  1472. }
  1473. #endif
  1474. //-----------------------------------------------------------------------------
  1475. //
  1476. // printSafeTable Debug Function. Dump the fully constructed safe table.
  1477. //
  1478. //-----------------------------------------------------------------------------
  1479. #ifdef RBBI_DEBUG
  1480. void RBBITableBuilder::printReverseTable() {
  1481. int c; // input "character"
  1482. int n; // state number
  1483. RBBIDebugPrintf(" Safe Reverse Table \n");
  1484. if (fSafeTable == nullptr) {
  1485. RBBIDebugPrintf(" --- nullptr ---\n");
  1486. return;
  1487. }
  1488. RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
  1489. RBBIDebugPrintf(" | Acc LA Tag");
  1490. for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
  1491. RBBIDebugPrintf(" %2d", c);
  1492. }
  1493. RBBIDebugPrintf("\n");
  1494. RBBIDebugPrintf(" |---------------");
  1495. for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
  1496. RBBIDebugPrintf("---");
  1497. }
  1498. RBBIDebugPrintf("\n");
  1499. for (n=0; n<fSafeTable->size(); n++) {
  1500. UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);
  1501. RBBIDebugPrintf(" %3d | " , n);
  1502. RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags
  1503. for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
  1504. RBBIDebugPrintf(" %2d", rowString->charAt(c));
  1505. }
  1506. RBBIDebugPrintf("\n");
  1507. }
  1508. RBBIDebugPrintf("\n\n");
  1509. }
  1510. #endif
  1511. //-----------------------------------------------------------------------------
  1512. //
  1513. // printRuleStatusTable Debug Function. Dump the common rule status table
  1514. //
  1515. //-----------------------------------------------------------------------------
  1516. #ifdef RBBI_DEBUG
  1517. void RBBITableBuilder::printRuleStatusTable() {
  1518. int32_t thisRecord = 0;
  1519. int32_t nextRecord = 0;
  1520. int i;
  1521. UVector *tbl = fRB->fRuleStatusVals;
  1522. RBBIDebugPrintf("index | tags \n");
  1523. RBBIDebugPrintf("-------------------\n");
  1524. while (nextRecord < tbl->size()) {
  1525. thisRecord = nextRecord;
  1526. nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1;
  1527. RBBIDebugPrintf("%4d ", thisRecord);
  1528. for (i=thisRecord+1; i<nextRecord; i++) {
  1529. RBBIDebugPrintf(" %5d", tbl->elementAti(i));
  1530. }
  1531. RBBIDebugPrintf("\n");
  1532. }
  1533. RBBIDebugPrintf("\n\n");
  1534. }
  1535. #endif
  1536. //-----------------------------------------------------------------------------
  1537. //
  1538. // RBBIStateDescriptor Methods. This is a very struct-like class
  1539. // Most access is directly to the fields.
  1540. //
  1541. //-----------------------------------------------------------------------------
  1542. RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) {
  1543. fMarked = false;
  1544. fAccepting = 0;
  1545. fLookAhead = 0;
  1546. fTagsIdx = 0;
  1547. fTagVals = nullptr;
  1548. fPositions = nullptr;
  1549. fDtran = nullptr;
  1550. fDtran = new UVector32(lastInputSymbol+1, *fStatus);
  1551. if (U_FAILURE(*fStatus)) {
  1552. return;
  1553. }
  1554. if (fDtran == nullptr) {
  1555. *fStatus = U_MEMORY_ALLOCATION_ERROR;
  1556. return;
  1557. }
  1558. fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized.
  1559. // It is indexed by input symbols, and will
  1560. // hold the next state number for each
  1561. // symbol.
  1562. }
  1563. RBBIStateDescriptor::~RBBIStateDescriptor() {
  1564. delete fPositions;
  1565. delete fDtran;
  1566. delete fTagVals;
  1567. fPositions = nullptr;
  1568. fDtran = nullptr;
  1569. fTagVals = nullptr;
  1570. }
  1571. U_NAMESPACE_END
  1572. #endif /* #if !UCONFIG_NO_BREAK_ITERATION */