rbbinode.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ***************************************************************************
  5. * Copyright (C) 2002-2016 International Business Machines Corporation *
  6. * and others. All rights reserved. *
  7. ***************************************************************************
  8. */
  9. //
  10. // File: rbbinode.cpp
  11. //
  12. // Implementation of class RBBINode, which represents a node in the
  13. // tree generated when parsing the Rules Based Break Iterator rules.
  14. //
  15. // This "Class" is actually closer to a struct.
  16. // Code using it is expected to directly access fields much of the time.
  17. //
  18. #include "unicode/utypes.h"
  19. #if !UCONFIG_NO_BREAK_ITERATION
  20. #include "unicode/unistr.h"
  21. #include "unicode/uniset.h"
  22. #include "unicode/uchar.h"
  23. #include "unicode/parsepos.h"
  24. #include "cstr.h"
  25. #include "uvector.h"
  26. #include "rbbirb.h"
  27. #include "rbbinode.h"
  28. #include "uassert.h"
  29. U_NAMESPACE_BEGIN
  30. #ifdef RBBI_DEBUG
  31. static int gLastSerial = 0;
  32. #endif
  33. //-------------------------------------------------------------------------
  34. //
  35. // Constructor. Just set the fields to reasonable default values.
  36. //
  37. //-------------------------------------------------------------------------
  38. RBBINode::RBBINode(NodeType t) : UMemory() {
  39. #ifdef RBBI_DEBUG
  40. fSerialNum = ++gLastSerial;
  41. #endif
  42. fType = t;
  43. fParent = nullptr;
  44. fLeftChild = nullptr;
  45. fRightChild = nullptr;
  46. fInputSet = nullptr;
  47. fFirstPos = 0;
  48. fLastPos = 0;
  49. fNullable = false;
  50. fLookAheadEnd = false;
  51. fRuleRoot = false;
  52. fChainIn = false;
  53. fVal = 0;
  54. fPrecedence = precZero;
  55. UErrorCode status = U_ZERO_ERROR;
  56. fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
  57. fLastPosSet = new UVector(status);
  58. fFollowPos = new UVector(status);
  59. if (t==opCat) {fPrecedence = precOpCat;}
  60. else if (t==opOr) {fPrecedence = precOpOr;}
  61. else if (t==opStart) {fPrecedence = precStart;}
  62. else if (t==opLParen) {fPrecedence = precLParen;}
  63. }
  64. RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
  65. #ifdef RBBI_DEBUG
  66. fSerialNum = ++gLastSerial;
  67. #endif
  68. fType = other.fType;
  69. fParent = nullptr;
  70. fLeftChild = nullptr;
  71. fRightChild = nullptr;
  72. fInputSet = other.fInputSet;
  73. fPrecedence = other.fPrecedence;
  74. fText = other.fText;
  75. fFirstPos = other.fFirstPos;
  76. fLastPos = other.fLastPos;
  77. fNullable = other.fNullable;
  78. fVal = other.fVal;
  79. fRuleRoot = false;
  80. fChainIn = other.fChainIn;
  81. UErrorCode status = U_ZERO_ERROR;
  82. fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
  83. fLastPosSet = new UVector(status);
  84. fFollowPos = new UVector(status);
  85. }
  86. //-------------------------------------------------------------------------
  87. //
  88. // Destructor. Deletes both this node AND any child nodes,
  89. // except in the case of variable reference nodes. For
  90. // these, the l. child points back to the definition, which
  91. // is common for all references to the variable, meaning
  92. // it can't be deleted here.
  93. //
  94. //-------------------------------------------------------------------------
  95. RBBINode::~RBBINode() {
  96. // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
  97. delete fInputSet;
  98. fInputSet = nullptr;
  99. switch (this->fType) {
  100. case varRef:
  101. case setRef:
  102. // for these node types, multiple instances point to the same "children"
  103. // Storage ownership of children handled elsewhere. Don't delete here.
  104. break;
  105. default:
  106. delete fLeftChild;
  107. fLeftChild = nullptr;
  108. delete fRightChild;
  109. fRightChild = nullptr;
  110. }
  111. delete fFirstPosSet;
  112. delete fLastPosSet;
  113. delete fFollowPos;
  114. }
  115. //-------------------------------------------------------------------------
  116. //
  117. // cloneTree Make a copy of the subtree rooted at this node.
  118. // Discard any variable references encountered along the way,
  119. // and replace with copies of the variable's definitions.
  120. // Used to replicate the expression underneath variable
  121. // references in preparation for generating the DFA tables.
  122. //
  123. //-------------------------------------------------------------------------
  124. RBBINode *RBBINode::cloneTree() {
  125. RBBINode *n;
  126. if (fType == RBBINode::varRef) {
  127. // If the current node is a variable reference, skip over it
  128. // and clone the definition of the variable instead.
  129. n = fLeftChild->cloneTree();
  130. } else if (fType == RBBINode::uset) {
  131. n = this;
  132. } else {
  133. n = new RBBINode(*this);
  134. // Check for null pointer.
  135. if (n != nullptr) {
  136. if (fLeftChild != nullptr) {
  137. n->fLeftChild = fLeftChild->cloneTree();
  138. n->fLeftChild->fParent = n;
  139. }
  140. if (fRightChild != nullptr) {
  141. n->fRightChild = fRightChild->cloneTree();
  142. n->fRightChild->fParent = n;
  143. }
  144. }
  145. }
  146. return n;
  147. }
  148. //-------------------------------------------------------------------------
  149. //
  150. // flattenVariables Walk a parse tree, replacing any variable
  151. // references with a copy of the variable's definition.
  152. // Aside from variables, the tree is not changed.
  153. //
  154. // Return the root of the tree. If the root was not a variable
  155. // reference, it remains unchanged - the root we started with
  156. // is the root we return. If, however, the root was a variable
  157. // reference, the root of the newly cloned replacement tree will
  158. // be returned, and the original tree deleted.
  159. //
  160. // This function works by recursively walking the tree
  161. // without doing anything until a variable reference is
  162. // found, then calling cloneTree() at that point. Any
  163. // nested references are handled by cloneTree(), not here.
  164. //
  165. //-------------------------------------------------------------------------
  166. RBBINode *RBBINode::flattenVariables() {
  167. if (fType == varRef) {
  168. RBBINode *retNode = fLeftChild->cloneTree();
  169. if (retNode != nullptr) {
  170. retNode->fRuleRoot = this->fRuleRoot;
  171. retNode->fChainIn = this->fChainIn;
  172. }
  173. delete this; // TODO: undefined behavior. Fix.
  174. return retNode;
  175. }
  176. if (fLeftChild != nullptr) {
  177. fLeftChild = fLeftChild->flattenVariables();
  178. fLeftChild->fParent = this;
  179. }
  180. if (fRightChild != nullptr) {
  181. fRightChild = fRightChild->flattenVariables();
  182. fRightChild->fParent = this;
  183. }
  184. return this;
  185. }
  186. //-------------------------------------------------------------------------
  187. //
  188. // flattenSets Walk the parse tree, replacing any nodes of type setRef
  189. // with a copy of the expression tree for the set. A set's
  190. // equivalent expression tree is precomputed and saved as
  191. // the left child of the uset node.
  192. //
  193. //-------------------------------------------------------------------------
  194. void RBBINode::flattenSets() {
  195. U_ASSERT(fType != setRef);
  196. if (fLeftChild != nullptr) {
  197. if (fLeftChild->fType==setRef) {
  198. RBBINode *setRefNode = fLeftChild;
  199. RBBINode *usetNode = setRefNode->fLeftChild;
  200. RBBINode *replTree = usetNode->fLeftChild;
  201. fLeftChild = replTree->cloneTree();
  202. fLeftChild->fParent = this;
  203. delete setRefNode;
  204. } else {
  205. fLeftChild->flattenSets();
  206. }
  207. }
  208. if (fRightChild != nullptr) {
  209. if (fRightChild->fType==setRef) {
  210. RBBINode *setRefNode = fRightChild;
  211. RBBINode *usetNode = setRefNode->fLeftChild;
  212. RBBINode *replTree = usetNode->fLeftChild;
  213. fRightChild = replTree->cloneTree();
  214. fRightChild->fParent = this;
  215. delete setRefNode;
  216. } else {
  217. fRightChild->flattenSets();
  218. }
  219. }
  220. }
  221. //-------------------------------------------------------------------------
  222. //
  223. // findNodes() Locate all the nodes of the specified type, starting
  224. // at the specified root.
  225. //
  226. //-------------------------------------------------------------------------
  227. void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
  228. /* test for buffer overflows */
  229. if (U_FAILURE(status)) {
  230. return;
  231. }
  232. U_ASSERT(!dest->hasDeleter());
  233. if (fType == kind) {
  234. dest->addElement(this, status);
  235. }
  236. if (fLeftChild != nullptr) {
  237. fLeftChild->findNodes(dest, kind, status);
  238. }
  239. if (fRightChild != nullptr) {
  240. fRightChild->findNodes(dest, kind, status);
  241. }
  242. }
  243. //-------------------------------------------------------------------------
  244. //
  245. // print. Print out a single node, for debugging.
  246. //
  247. //-------------------------------------------------------------------------
  248. #ifdef RBBI_DEBUG
  249. static int32_t serial(const RBBINode *node) {
  250. return (node == nullptr? -1 : node->fSerialNum);
  251. }
  252. void RBBINode::printNode(const RBBINode *node) {
  253. static const char * const nodeTypeNames[] = {
  254. "setRef",
  255. "uset",
  256. "varRef",
  257. "leafChar",
  258. "lookAhead",
  259. "tag",
  260. "endMark",
  261. "opStart",
  262. "opCat",
  263. "opOr",
  264. "opStar",
  265. "opPlus",
  266. "opQuestion",
  267. "opBreak",
  268. "opReverse",
  269. "opLParen"
  270. };
  271. if (node==nullptr) {
  272. RBBIDebugPrintf("%10p", (void *)node);
  273. } else {
  274. RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ",
  275. (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
  276. node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
  277. serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
  278. node->fFirstPos, node->fVal);
  279. if (node->fType == varRef) {
  280. RBBI_DEBUG_printUnicodeString(node->fText);
  281. }
  282. }
  283. RBBIDebugPrintf("\n");
  284. }
  285. #endif
  286. #ifdef RBBI_DEBUG
  287. U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
  288. RBBIDebugPrintf("%*s", minWidth, CStr(s)());
  289. }
  290. #endif
  291. //-------------------------------------------------------------------------
  292. //
  293. // print. Print out the tree of nodes rooted at "this"
  294. //
  295. //-------------------------------------------------------------------------
  296. #ifdef RBBI_DEBUG
  297. void RBBINode::printNodeHeader() {
  298. RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n");
  299. }
  300. void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
  301. if (printHeading) {
  302. printNodeHeader();
  303. }
  304. printNode(node);
  305. if (node != nullptr) {
  306. // Only dump the definition under a variable reference if asked to.
  307. // Unconditionally dump children of all other node types.
  308. if (node->fType != varRef) {
  309. if (node->fLeftChild != nullptr) {
  310. printTree(node->fLeftChild, false);
  311. }
  312. if (node->fRightChild != nullptr) {
  313. printTree(node->fRightChild, false);
  314. }
  315. }
  316. }
  317. }
  318. #endif
  319. U_NAMESPACE_END
  320. #endif /* #if !UCONFIG_NO_BREAK_ITERATION */