123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- ***************************************************************************
- * Copyright (C) 2002-2016 International Business Machines Corporation *
- * and others. All rights reserved. *
- ***************************************************************************
- */
- //
- // File: rbbinode.cpp
- //
- // Implementation of class RBBINode, which represents a node in the
- // tree generated when parsing the Rules Based Break Iterator rules.
- //
- // This "Class" is actually closer to a struct.
- // Code using it is expected to directly access fields much of the time.
- //
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_BREAK_ITERATION
- #include "unicode/unistr.h"
- #include "unicode/uniset.h"
- #include "unicode/uchar.h"
- #include "unicode/parsepos.h"
- #include "cstr.h"
- #include "uvector.h"
- #include "rbbirb.h"
- #include "rbbinode.h"
- #include "uassert.h"
- U_NAMESPACE_BEGIN
- #ifdef RBBI_DEBUG
- static int gLastSerial = 0;
- #endif
- //-------------------------------------------------------------------------
- //
- // Constructor. Just set the fields to reasonable default values.
- //
- //-------------------------------------------------------------------------
- RBBINode::RBBINode(NodeType t) : UMemory() {
- #ifdef RBBI_DEBUG
- fSerialNum = ++gLastSerial;
- #endif
- fType = t;
- fParent = nullptr;
- fLeftChild = nullptr;
- fRightChild = nullptr;
- fInputSet = nullptr;
- fFirstPos = 0;
- fLastPos = 0;
- fNullable = false;
- fLookAheadEnd = false;
- fRuleRoot = false;
- fChainIn = false;
- fVal = 0;
- fPrecedence = precZero;
- UErrorCode status = U_ZERO_ERROR;
- fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
- fLastPosSet = new UVector(status);
- fFollowPos = new UVector(status);
- if (t==opCat) {fPrecedence = precOpCat;}
- else if (t==opOr) {fPrecedence = precOpOr;}
- else if (t==opStart) {fPrecedence = precStart;}
- else if (t==opLParen) {fPrecedence = precLParen;}
- }
- RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
- #ifdef RBBI_DEBUG
- fSerialNum = ++gLastSerial;
- #endif
- fType = other.fType;
- fParent = nullptr;
- fLeftChild = nullptr;
- fRightChild = nullptr;
- fInputSet = other.fInputSet;
- fPrecedence = other.fPrecedence;
- fText = other.fText;
- fFirstPos = other.fFirstPos;
- fLastPos = other.fLastPos;
- fNullable = other.fNullable;
- fVal = other.fVal;
- fRuleRoot = false;
- fChainIn = other.fChainIn;
- UErrorCode status = U_ZERO_ERROR;
- fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
- fLastPosSet = new UVector(status);
- fFollowPos = new UVector(status);
- }
- //-------------------------------------------------------------------------
- //
- // Destructor. Deletes both this node AND any child nodes,
- // except in the case of variable reference nodes. For
- // these, the l. child points back to the definition, which
- // is common for all references to the variable, meaning
- // it can't be deleted here.
- //
- //-------------------------------------------------------------------------
- RBBINode::~RBBINode() {
- // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
- delete fInputSet;
- fInputSet = nullptr;
- switch (this->fType) {
- case varRef:
- case setRef:
- // for these node types, multiple instances point to the same "children"
- // Storage ownership of children handled elsewhere. Don't delete here.
- break;
- default:
- // Avoid using a recursive implementation because of stack overflow problems.
- // See bug ICU-22584.
- // delete fLeftChild;
- NRDeleteNode(fLeftChild);
- fLeftChild = nullptr;
- // delete fRightChild;
- NRDeleteNode(fRightChild);
- fRightChild = nullptr;
- }
- delete fFirstPosSet;
- delete fLastPosSet;
- delete fFollowPos;
- }
- /**
- * Non-recursive delete of a node + its children. Used from the node destructor
- * instead of the more obvious recursive implementation to avoid problems with
- * stack overflow with some perverse test rule data (from fuzzing).
- */
- void RBBINode::NRDeleteNode(RBBINode *node) {
- if (node == nullptr) {
- return;
- }
- RBBINode *stopNode = node->fParent;
- RBBINode *nextNode = node;
- while (nextNode != stopNode && nextNode != nullptr) {
- RBBINode *currentNode = nextNode;
- if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
- currentNode->fType == varRef || // varRef and setRef nodes do not
- currentNode->fType == setRef) { // own their children nodes.
- // CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
- nextNode = currentNode->fParent;
- if (nextNode) {
- if (nextNode->fLeftChild == currentNode) {
- nextNode->fLeftChild = nullptr;
- } else if (nextNode->fRightChild == currentNode) {
- nextNode->fRightChild = nullptr;
- }
- }
- delete currentNode;
- } else if (currentNode->fLeftChild) {
- nextNode = currentNode->fLeftChild;
- if (nextNode->fParent == nullptr) {
- nextNode->fParent = currentNode;
- // fParent isn't always set; do it now if not.
- }
- U_ASSERT(nextNode->fParent == currentNode);
- } else if (currentNode->fRightChild) {
- nextNode = currentNode->fRightChild;
- if (nextNode->fParent == nullptr) {
- nextNode->fParent = currentNode;
- // fParent isn't always set; do it now if not.
- }
- U_ASSERT(nextNode->fParent == currentNode);
- }
- }
- }
- //-------------------------------------------------------------------------
- //
- // cloneTree Make a copy of the subtree rooted at this node.
- // Discard any variable references encountered along the way,
- // and replace with copies of the variable's definitions.
- // Used to replicate the expression underneath variable
- // references in preparation for generating the DFA tables.
- //
- //-------------------------------------------------------------------------
- RBBINode *RBBINode::cloneTree() {
- RBBINode *n;
- if (fType == RBBINode::varRef) {
- // If the current node is a variable reference, skip over it
- // and clone the definition of the variable instead.
- n = fLeftChild->cloneTree();
- } else if (fType == RBBINode::uset) {
- n = this;
- } else {
- n = new RBBINode(*this);
- // Check for null pointer.
- if (n != nullptr) {
- if (fLeftChild != nullptr) {
- n->fLeftChild = fLeftChild->cloneTree();
- n->fLeftChild->fParent = n;
- }
- if (fRightChild != nullptr) {
- n->fRightChild = fRightChild->cloneTree();
- n->fRightChild->fParent = n;
- }
- }
- }
- return n;
- }
- //-------------------------------------------------------------------------
- //
- // flattenVariables Walk a parse tree, replacing any variable
- // references with a copy of the variable's definition.
- // Aside from variables, the tree is not changed.
- //
- // Return the root of the tree. If the root was not a variable
- // reference, it remains unchanged - the root we started with
- // is the root we return. If, however, the root was a variable
- // reference, the root of the newly cloned replacement tree will
- // be returned, and the original tree deleted.
- //
- // This function works by recursively walking the tree
- // without doing anything until a variable reference is
- // found, then calling cloneTree() at that point. Any
- // nested references are handled by cloneTree(), not here.
- //
- //-------------------------------------------------------------------------
- constexpr int kRecursiveDepthLimit = 3500;
- RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
- if (U_FAILURE(status)) {
- return this;
- }
- // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
- // to avoid stack overflow crash.
- if (depth > kRecursiveDepthLimit) {
- status = U_INPUT_TOO_LONG_ERROR;
- return this;
- }
- if (fType == varRef) {
- RBBINode *retNode = fLeftChild->cloneTree();
- if (retNode != nullptr) {
- retNode->fRuleRoot = this->fRuleRoot;
- retNode->fChainIn = this->fChainIn;
- }
- delete this; // TODO: undefined behavior. Fix.
- return retNode;
- }
- if (fLeftChild != nullptr) {
- fLeftChild = fLeftChild->flattenVariables(status, depth+1);
- fLeftChild->fParent = this;
- }
- if (fRightChild != nullptr) {
- fRightChild = fRightChild->flattenVariables(status, depth+1);
- fRightChild->fParent = this;
- }
- return this;
- }
- //-------------------------------------------------------------------------
- //
- // flattenSets Walk the parse tree, replacing any nodes of type setRef
- // with a copy of the expression tree for the set. A set's
- // equivalent expression tree is precomputed and saved as
- // the left child of the uset node.
- //
- //-------------------------------------------------------------------------
- void RBBINode::flattenSets() {
- U_ASSERT(fType != setRef);
- if (fLeftChild != nullptr) {
- if (fLeftChild->fType==setRef) {
- RBBINode *setRefNode = fLeftChild;
- RBBINode *usetNode = setRefNode->fLeftChild;
- RBBINode *replTree = usetNode->fLeftChild;
- fLeftChild = replTree->cloneTree();
- fLeftChild->fParent = this;
- delete setRefNode;
- } else {
- fLeftChild->flattenSets();
- }
- }
- if (fRightChild != nullptr) {
- if (fRightChild->fType==setRef) {
- RBBINode *setRefNode = fRightChild;
- RBBINode *usetNode = setRefNode->fLeftChild;
- RBBINode *replTree = usetNode->fLeftChild;
- fRightChild = replTree->cloneTree();
- fRightChild->fParent = this;
- delete setRefNode;
- } else {
- fRightChild->flattenSets();
- }
- }
- }
- //-------------------------------------------------------------------------
- //
- // findNodes() Locate all the nodes of the specified type, starting
- // at the specified root.
- //
- //-------------------------------------------------------------------------
- void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
- /* test for buffer overflows */
- if (U_FAILURE(status)) {
- return;
- }
- U_ASSERT(!dest->hasDeleter());
- if (fType == kind) {
- dest->addElement(this, status);
- }
- if (fLeftChild != nullptr) {
- fLeftChild->findNodes(dest, kind, status);
- }
- if (fRightChild != nullptr) {
- fRightChild->findNodes(dest, kind, status);
- }
- }
- //-------------------------------------------------------------------------
- //
- // print. Print out a single node, for debugging.
- //
- //-------------------------------------------------------------------------
- #ifdef RBBI_DEBUG
- static int32_t serial(const RBBINode *node) {
- return (node == nullptr? -1 : node->fSerialNum);
- }
- void RBBINode::printNode(const RBBINode *node) {
- static const char * const nodeTypeNames[] = {
- "setRef",
- "uset",
- "varRef",
- "leafChar",
- "lookAhead",
- "tag",
- "endMark",
- "opStart",
- "opCat",
- "opOr",
- "opStar",
- "opPlus",
- "opQuestion",
- "opBreak",
- "opReverse",
- "opLParen"
- };
- if (node==nullptr) {
- RBBIDebugPrintf("%10p", (void *)node);
- } else {
- RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ",
- (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
- node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
- serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
- node->fFirstPos, node->fVal);
- if (node->fType == varRef) {
- RBBI_DEBUG_printUnicodeString(node->fText);
- }
- }
- RBBIDebugPrintf("\n");
- }
- #endif
- #ifdef RBBI_DEBUG
- U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
- RBBIDebugPrintf("%*s", minWidth, CStr(s)());
- }
- #endif
- //-------------------------------------------------------------------------
- //
- // print. Print out the tree of nodes rooted at "this"
- //
- //-------------------------------------------------------------------------
- #ifdef RBBI_DEBUG
- void RBBINode::printNodeHeader() {
- RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n");
- }
-
- void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
- if (printHeading) {
- printNodeHeader();
- }
- printNode(node);
- if (node != nullptr) {
- // Only dump the definition under a variable reference if asked to.
- // Unconditionally dump children of all other node types.
- if (node->fType != varRef) {
- if (node->fLeftChild != nullptr) {
- printTree(node->fLeftChild, false);
- }
-
- if (node->fRightChild != nullptr) {
- printTree(node->fRightChild, false);
- }
- }
- }
- }
- #endif
- U_NAMESPACE_END
- #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|