1 /*
2 ***************************************************************************
3 *   Copyright (C) 2002-2016 International Business Machines Corporation   *
4 *   and others. All rights reserved.                                      *
5 ***************************************************************************
6 */
7 
8 //
9 //  File:  rbbinode.cpp
10 //
11 //         Implementation of class RBBINode, which represents a node in the
12 //         tree generated when parsing the Rules Based Break Iterator rules.
13 //
14 //         This "Class" is actually closer to a struct.
15 //         Code using it is expected to directly access fields much of the time.
16 //
17 
18 #include "unicode/utypes.h"
19 
20 #if !UCONFIG_NO_BREAK_ITERATION
21 
22 #include "unicode/unistr.h"
23 #include "unicode/uniset.h"
24 #include "unicode/uchar.h"
25 #include "unicode/parsepos.h"
26 #include "uvector.h"
27 
28 #include "rbbirb.h"
29 #include "rbbinode.h"
30 
31 #include "uassert.h"
32 
33 
34 U_NAMESPACE_BEGIN
35 
36 #ifdef RBBI_DEBUG
37 static int  gLastSerial = 0;
38 #endif
39 
40 
41 //-------------------------------------------------------------------------
42 //
43 //    Constructor.   Just set the fields to reasonable default values.
44 //
45 //-------------------------------------------------------------------------
RBBINode(NodeType t)46 RBBINode::RBBINode(NodeType t) : UMemory() {
47 #ifdef RBBI_DEBUG
48     fSerialNum    = ++gLastSerial;
49 #endif
50     fType         = t;
51     fParent       = NULL;
52     fLeftChild    = NULL;
53     fRightChild   = NULL;
54     fInputSet     = NULL;
55     fFirstPos     = 0;
56     fLastPos      = 0;
57     fNullable     = FALSE;
58     fLookAheadEnd = FALSE;
59     fRuleRoot     = FALSE;
60     fChainIn      = FALSE;
61     fVal          = 0;
62     fPrecedence   = precZero;
63 
64     UErrorCode     status = U_ZERO_ERROR;
65     fFirstPosSet  = new UVector(status);  // TODO - get a real status from somewhere
66     fLastPosSet   = new UVector(status);
67     fFollowPos    = new UVector(status);
68     if      (t==opCat)    {fPrecedence = precOpCat;}
69     else if (t==opOr)     {fPrecedence = precOpOr;}
70     else if (t==opStart)  {fPrecedence = precStart;}
71     else if (t==opLParen) {fPrecedence = precLParen;}
72 
73 }
74 
75 
RBBINode(const RBBINode & other)76 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
77 #ifdef RBBI_DEBUG
78     fSerialNum   = ++gLastSerial;
79 #endif
80     fType        = other.fType;
81     fParent      = NULL;
82     fLeftChild   = NULL;
83     fRightChild  = NULL;
84     fInputSet    = other.fInputSet;
85     fPrecedence  = other.fPrecedence;
86     fText        = other.fText;
87     fFirstPos    = other.fFirstPos;
88     fLastPos     = other.fLastPos;
89     fNullable    = other.fNullable;
90     fVal         = other.fVal;
91     fRuleRoot    = FALSE;
92     fChainIn     = other.fChainIn;
93     UErrorCode     status = U_ZERO_ERROR;
94     fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
95     fLastPosSet  = new UVector(status);
96     fFollowPos   = new UVector(status);
97 }
98 
99 
100 //-------------------------------------------------------------------------
101 //
102 //    Destructor.   Deletes both this node AND any child nodes,
103 //                  except in the case of variable reference nodes.  For
104 //                  these, the l. child points back to the definition, which
105 //                  is common for all references to the variable, meaning
106 //                  it can't be deleted here.
107 //
108 //-------------------------------------------------------------------------
~RBBINode()109 RBBINode::~RBBINode() {
110     // printf("deleting node %8x   serial %4d\n", this, this->fSerialNum);
111     delete fInputSet;
112     fInputSet = NULL;
113 
114     switch (this->fType) {
115     case varRef:
116     case setRef:
117         // for these node types, multiple instances point to the same "children"
118         // Storage ownership of children handled elsewhere.  Don't delete here.
119         break;
120 
121     default:
122         delete        fLeftChild;
123         fLeftChild =   NULL;
124         delete        fRightChild;
125         fRightChild = NULL;
126     }
127 
128 
129     delete fFirstPosSet;
130     delete fLastPosSet;
131     delete fFollowPos;
132 
133 }
134 
135 
136 //-------------------------------------------------------------------------
137 //
138 //    cloneTree     Make a copy of the subtree rooted at this node.
139 //                  Discard any variable references encountered along the way,
140 //                  and replace with copies of the variable's definitions.
141 //                  Used to replicate the expression underneath variable
142 //                  references in preparation for generating the DFA tables.
143 //
144 //-------------------------------------------------------------------------
cloneTree()145 RBBINode *RBBINode::cloneTree() {
146     RBBINode    *n;
147 
148     if (fType == RBBINode::varRef) {
149         // If the current node is a variable reference, skip over it
150         //   and clone the definition of the variable instead.
151         n = fLeftChild->cloneTree();
152     } else if (fType == RBBINode::uset) {
153         n = this;
154     } else {
155         n = new RBBINode(*this);
156         // Check for null pointer.
157         if (n != NULL) {
158             if (fLeftChild != NULL) {
159                 n->fLeftChild          = fLeftChild->cloneTree();
160                 n->fLeftChild->fParent = n;
161             }
162             if (fRightChild != NULL) {
163                 n->fRightChild          = fRightChild->cloneTree();
164                 n->fRightChild->fParent = n;
165             }
166         }
167     }
168     n->fRuleRoot = this->fRuleRoot;
169     n->fChainIn  = this->fChainIn;
170     return n;
171 }
172 
173 
174 
175 //-------------------------------------------------------------------------
176 //
177 //   flattenVariables   Walk a parse tree, replacing any variable
178 //                      references with a copy of the variable's definition.
179 //                      Aside from variables, the tree is not changed.
180 //
181 //                      Return the root of the tree.  If the root was not a variable
182 //                      reference, it remains unchanged - the root we started with
183 //                      is the root we return.  If, however, the root was a variable
184 //                      reference, the root of the newly cloned replacement tree will
185 //                      be returned, and the original tree deleted.
186 //
187 //                      This function works by recursively walking the tree
188 //                      without doing anything until a variable reference is
189 //                      found, then calling cloneTree() at that point.  Any
190 //                      nested references are handled by cloneTree(), not here.
191 //
192 //-------------------------------------------------------------------------
flattenVariables()193 RBBINode *RBBINode::flattenVariables() {
194     if (fType == varRef) {
195         RBBINode *retNode = fLeftChild->cloneTree();
196         delete this;
197         return retNode;
198     }
199 
200     if (fLeftChild != NULL) {
201         fLeftChild = fLeftChild->flattenVariables();
202         fLeftChild->fParent  = this;
203     }
204     if (fRightChild != NULL) {
205         fRightChild = fRightChild->flattenVariables();
206         fRightChild->fParent = this;
207     }
208     return this;
209 }
210 
211 
212 //-------------------------------------------------------------------------
213 //
214 //  flattenSets    Walk the parse tree, replacing any nodes of type setRef
215 //                 with a copy of the expression tree for the set.  A set's
216 //                 equivalent expression tree is precomputed and saved as
217 //                 the left child of the uset node.
218 //
219 //-------------------------------------------------------------------------
flattenSets()220 void RBBINode::flattenSets() {
221     U_ASSERT(fType != setRef);
222 
223     if (fLeftChild != NULL) {
224         if (fLeftChild->fType==setRef) {
225             RBBINode *setRefNode = fLeftChild;
226             RBBINode *usetNode   = setRefNode->fLeftChild;
227             RBBINode *replTree   = usetNode->fLeftChild;
228             fLeftChild           = replTree->cloneTree();
229             fLeftChild->fParent  = this;
230             delete setRefNode;
231         } else {
232             fLeftChild->flattenSets();
233         }
234     }
235 
236     if (fRightChild != NULL) {
237         if (fRightChild->fType==setRef) {
238             RBBINode *setRefNode = fRightChild;
239             RBBINode *usetNode   = setRefNode->fLeftChild;
240             RBBINode *replTree   = usetNode->fLeftChild;
241             fRightChild           = replTree->cloneTree();
242             fRightChild->fParent  = this;
243             delete setRefNode;
244         } else {
245             fRightChild->flattenSets();
246         }
247     }
248 }
249 
250 
251 
252 //-------------------------------------------------------------------------
253 //
254 //   findNodes()     Locate all the nodes of the specified type, starting
255 //                   at the specified root.
256 //
257 //-------------------------------------------------------------------------
findNodes(UVector * dest,RBBINode::NodeType kind,UErrorCode & status)258 void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
259     /* test for buffer overflows */
260     if (U_FAILURE(status)) {
261         return;
262     }
263     if (fType == kind) {
264         dest->addElement(this, status);
265     }
266     if (fLeftChild != NULL) {
267         fLeftChild->findNodes(dest, kind, status);
268     }
269     if (fRightChild != NULL) {
270         fRightChild->findNodes(dest, kind, status);
271     }
272 }
273 
274 
275 //-------------------------------------------------------------------------
276 //
277 //    print.         Print out a single node, for debugging.
278 //
279 //-------------------------------------------------------------------------
280 #ifdef RBBI_DEBUG
281 
serial(const RBBINode * node)282 static int32_t serial(const RBBINode *node) {
283     return (node == NULL? -1 : node->fSerialNum);
284 }
285 
286 
printNode()287 void RBBINode::printNode() {
288     static const char * const nodeTypeNames[] = {
289                 "setRef",
290                 "uset",
291                 "varRef",
292                 "leafChar",
293                 "lookAhead",
294                 "tag",
295                 "endMark",
296                 "opStart",
297                 "opCat",
298                 "opOr",
299                 "opStar",
300                 "opPlus",
301                 "opQuestion",
302                 "opBreak",
303                 "opReverse",
304                 "opLParen"
305     };
306 
307     if (this==NULL) {
308         RBBIDebugPrintf("%10p", (void *)this);
309     } else {
310         RBBIDebugPrintf("%10p %5d %12s %c%c  %5d       %5d     %5d       %6d     %d ",
311             (void *)this, fSerialNum, nodeTypeNames[fType],   fRuleRoot?'R':' ',  fChainIn?'C':' ',
312              serial(fLeftChild), serial(fRightChild), serial(fParent),
313             fFirstPos, fVal);
314         if (fType == varRef) {
315             RBBI_DEBUG_printUnicodeString(fText);
316         }
317     }
318     RBBIDebugPrintf("\n");
319 }
320 #endif
321 
322 
323 #ifdef RBBI_DEBUG
RBBI_DEBUG_printUnicodeString(const UnicodeString & s,int minWidth)324 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth)
325 {
326     int i;
327     for (i=0; i<s.length(); i++) {
328         RBBIDebugPrintf("%c", s.charAt(i));
329         // putc(s.charAt(i), stdout);
330     }
331     for (i=s.length(); i<minWidth; i++) {
332         RBBIDebugPrintf(" ");
333     }
334 }
335 #endif
336 
337 
338 //-------------------------------------------------------------------------
339 //
340 //    print.         Print out the tree of nodes rooted at "this"
341 //
342 //-------------------------------------------------------------------------
343 #ifdef RBBI_DEBUG
printNodeHeader()344 void RBBINode::printNodeHeader() {
345     RBBIDebugPrintf(" Address   serial        type     LeftChild  RightChild   Parent   position value\n");
346 }
347 
printTree(UBool printHeading)348 void RBBINode::printTree(UBool printHeading) {
349     if (printHeading) {
350         printNodeHeader();
351     }
352     this->printNode();
353     if (this != NULL) {
354         // Only dump the definition under a variable reference if asked to.
355         // Unconditinally dump children of all other node types.
356         if (fType != varRef) {
357             if (fLeftChild != NULL) {
358                 fLeftChild->printTree(FALSE);
359             }
360 
361             if (fRightChild != NULL) {
362                 fRightChild->printTree(FALSE);
363             }
364         }
365     }
366 }
367 #endif
368 
369 
370 
371 U_NAMESPACE_END
372 
373 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
374