1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2011, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  ucharstrieiterator.h
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010nov15
14 *   created by: Markus W. Scherer
15 */
16 
17 #include "unicode/utypes.h"
18 #include "unicode/ucharstrie.h"
19 #include "unicode/unistr.h"
20 #include "uvectr32.h"
21 
22 U_NAMESPACE_BEGIN
23 
Iterator(ConstChar16Ptr trieUChars,int32_t maxStringLength,UErrorCode & errorCode)24 UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
25                                UErrorCode &errorCode)
26         : uchars_(trieUChars),
27           pos_(uchars_), initialPos_(uchars_),
28           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
29           skipValue_(FALSE),
30           maxLength_(maxStringLength), value_(0), stack_(NULL) {
31     if(U_FAILURE(errorCode)) {
32         return;
33     }
34     // stack_ is a pointer so that it's easy to turn ucharstrie.h into
35     // a public API header for which we would want it to depend only on
36     // other public headers.
37     // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
38     // via the UnicodeString and UVector32 implementations, so this additional
39     // cost is minimal.
40     stack_=new UVector32(errorCode);
41     if(stack_==NULL) {
42         errorCode=U_MEMORY_ALLOCATION_ERROR;
43     }
44 }
45 
Iterator(const UCharsTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)46 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
47                                UErrorCode &errorCode)
48         : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
49           remainingMatchLength_(trie.remainingMatchLength_),
50           initialRemainingMatchLength_(trie.remainingMatchLength_),
51           skipValue_(FALSE),
52           maxLength_(maxStringLength), value_(0), stack_(NULL) {
53     if(U_FAILURE(errorCode)) {
54         return;
55     }
56     stack_=new UVector32(errorCode);
57     if(U_FAILURE(errorCode)) {
58         return;
59     }
60     if(stack_==NULL) {
61         errorCode=U_MEMORY_ALLOCATION_ERROR;
62         return;
63     }
64     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
65     if(length>=0) {
66         // Pending linear-match node, append remaining UChars to str_.
67         ++length;
68         if(maxLength_>0 && length>maxLength_) {
69             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
70         }
71         str_.append(pos_, length);
72         pos_+=length;
73         remainingMatchLength_-=length;
74     }
75 }
76 
~Iterator()77 UCharsTrie::Iterator::~Iterator() {
78     delete stack_;
79 }
80 
81 UCharsTrie::Iterator &
reset()82 UCharsTrie::Iterator::reset() {
83     pos_=initialPos_;
84     remainingMatchLength_=initialRemainingMatchLength_;
85     skipValue_=FALSE;
86     int32_t length=remainingMatchLength_+1;  // Remaining match length.
87     if(maxLength_>0 && length>maxLength_) {
88         length=maxLength_;
89     }
90     str_.truncate(length);
91     pos_+=length;
92     remainingMatchLength_-=length;
93     stack_->setSize(0);
94     return *this;
95 }
96 
97 UBool
hasNext() const98 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
99 
100 UBool
next(UErrorCode & errorCode)101 UCharsTrie::Iterator::next(UErrorCode &errorCode) {
102     if(U_FAILURE(errorCode)) {
103         return FALSE;
104     }
105     const UChar *pos=pos_;
106     if(pos==NULL) {
107         if(stack_->isEmpty()) {
108             return FALSE;
109         }
110         // Pop the state off the stack and continue with the next outbound edge of
111         // the branch node.
112         int32_t stackSize=stack_->size();
113         int32_t length=stack_->elementAti(stackSize-1);
114         pos=uchars_+stack_->elementAti(stackSize-2);
115         stack_->setSize(stackSize-2);
116         str_.truncate(length&0xffff);
117         length=(int32_t)((uint32_t)length>>16);
118         if(length>1) {
119             pos=branchNext(pos, length, errorCode);
120             if(pos==NULL) {
121                 return TRUE;  // Reached a final value.
122             }
123         } else {
124             str_.append(*pos++);
125         }
126     }
127     if(remainingMatchLength_>=0) {
128         // We only get here if we started in a pending linear-match node
129         // with more than maxLength remaining units.
130         return truncateAndStop();
131     }
132     for(;;) {
133         int32_t node=*pos++;
134         if(node>=kMinValueLead) {
135             if(skipValue_) {
136                 pos=skipNodeValue(pos, node);
137                 node&=kNodeTypeMask;
138                 skipValue_=FALSE;
139             } else {
140                 // Deliver value for the string so far.
141                 UBool isFinal=(UBool)(node>>15);
142                 if(isFinal) {
143                     value_=readValue(pos, node&0x7fff);
144                 } else {
145                     value_=readNodeValue(pos, node);
146                 }
147                 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
148                     pos_=NULL;
149                 } else {
150                     // We cannot skip the value right here because it shares its
151                     // lead unit with a match node which we have to evaluate
152                     // next time.
153                     // Instead, keep pos_ on the node lead unit itself.
154                     pos_=pos-1;
155                     skipValue_=TRUE;
156                 }
157                 return TRUE;
158             }
159         }
160         if(maxLength_>0 && str_.length()==maxLength_) {
161             return truncateAndStop();
162         }
163         if(node<kMinLinearMatch) {
164             if(node==0) {
165                 node=*pos++;
166             }
167             pos=branchNext(pos, node+1, errorCode);
168             if(pos==NULL) {
169                 return TRUE;  // Reached a final value.
170             }
171         } else {
172             // Linear-match node, append length units to str_.
173             int32_t length=node-kMinLinearMatch+1;
174             if(maxLength_>0 && str_.length()+length>maxLength_) {
175                 str_.append(pos, maxLength_-str_.length());
176                 return truncateAndStop();
177             }
178             str_.append(pos, length);
179             pos+=length;
180         }
181     }
182 }
183 
184 // Branch node, needs to take the first outbound edge and push state for the rest.
185 const UChar *
branchNext(const UChar * pos,int32_t length,UErrorCode & errorCode)186 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
187     while(length>kMaxBranchLinearSubNodeLength) {
188         ++pos;  // ignore the comparison unit
189         // Push state for the greater-or-equal edge.
190         stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
191         stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
192         // Follow the less-than edge.
193         length>>=1;
194         pos=jumpByDelta(pos);
195     }
196     // List of key-value pairs where values are either final values or jump deltas.
197     // Read the first (key, value) pair.
198     UChar trieUnit=*pos++;
199     int32_t node=*pos++;
200     UBool isFinal=(UBool)(node>>15);
201     int32_t value=readValue(pos, node&=0x7fff);
202     pos=skipValue(pos, node);
203     stack_->addElement((int32_t)(pos-uchars_), errorCode);
204     stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
205     str_.append(trieUnit);
206     if(isFinal) {
207         pos_=NULL;
208         value_=value;
209         return NULL;
210     } else {
211         return pos+value;
212     }
213 }
214 
215 U_NAMESPACE_END
216