1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2012, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  bytestrieiterator.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010nov03
14 *   created by: Markus W. Scherer
15 */
16 
17 #include "unicode/utypes.h"
18 #include "unicode/bytestrie.h"
19 #include "unicode/stringpiece.h"
20 #include "charstr.h"
21 #include "uvectr32.h"
22 
23 U_NAMESPACE_BEGIN
24 
Iterator(const void * trieBytes,int32_t maxStringLength,UErrorCode & errorCode)25 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
26                               UErrorCode &errorCode)
27         : bytes_(static_cast<const uint8_t *>(trieBytes)),
28           pos_(bytes_), initialPos_(bytes_),
29           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
30           str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
31     if(U_FAILURE(errorCode)) {
32         return;
33     }
34     // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
35     // a public API header for which we would want it to depend only on
36     // other public headers.
37     // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
38     // via the CharString and UVector32 implementations, so this additional
39     // cost is minimal.
40     str_=new CharString();
41     stack_=new UVector32(errorCode);
42     if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
43         errorCode=U_MEMORY_ALLOCATION_ERROR;
44     }
45 }
46 
Iterator(const BytesTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)47 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
48                               UErrorCode &errorCode)
49         : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
50           remainingMatchLength_(trie.remainingMatchLength_),
51           initialRemainingMatchLength_(trie.remainingMatchLength_),
52           str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
53     if(U_FAILURE(errorCode)) {
54         return;
55     }
56     str_=new CharString();
57     stack_=new UVector32(errorCode);
58     if(U_FAILURE(errorCode)) {
59         return;
60     }
61     if(str_==NULL || stack_==NULL) {
62         errorCode=U_MEMORY_ALLOCATION_ERROR;
63         return;
64     }
65     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
66     if(length>=0) {
67         // Pending linear-match node, append remaining bytes to str_.
68         ++length;
69         if(maxLength_>0 && length>maxLength_) {
70             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
71         }
72         str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
73         pos_+=length;
74         remainingMatchLength_-=length;
75     }
76 }
77 
~Iterator()78 BytesTrie::Iterator::~Iterator() {
79     delete str_;
80     delete stack_;
81 }
82 
83 BytesTrie::Iterator &
reset()84 BytesTrie::Iterator::reset() {
85     pos_=initialPos_;
86     remainingMatchLength_=initialRemainingMatchLength_;
87     int32_t length=remainingMatchLength_+1;  // Remaining match length.
88     if(maxLength_>0 && length>maxLength_) {
89         length=maxLength_;
90     }
91     str_->truncate(length);
92     pos_+=length;
93     remainingMatchLength_-=length;
94     stack_->setSize(0);
95     return *this;
96 }
97 
98 UBool
hasNext() const99 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
100 
101 UBool
next(UErrorCode & errorCode)102 BytesTrie::Iterator::next(UErrorCode &errorCode) {
103     if(U_FAILURE(errorCode)) {
104         return FALSE;
105     }
106     const uint8_t *pos=pos_;
107     if(pos==NULL) {
108         if(stack_->isEmpty()) {
109             return FALSE;
110         }
111         // Pop the state off the stack and continue with the next outbound edge of
112         // the branch node.
113         int32_t stackSize=stack_->size();
114         int32_t length=stack_->elementAti(stackSize-1);
115         pos=bytes_+stack_->elementAti(stackSize-2);
116         stack_->setSize(stackSize-2);
117         str_->truncate(length&0xffff);
118         length=(int32_t)((uint32_t)length>>16);
119         if(length>1) {
120             pos=branchNext(pos, length, errorCode);
121             if(pos==NULL) {
122                 return TRUE;  // Reached a final value.
123             }
124         } else {
125             str_->append((char)*pos++, errorCode);
126         }
127     }
128     if(remainingMatchLength_>=0) {
129         // We only get here if we started in a pending linear-match node
130         // with more than maxLength remaining bytes.
131         return truncateAndStop();
132     }
133     for(;;) {
134         int32_t node=*pos++;
135         if(node>=kMinValueLead) {
136             // Deliver value for the byte sequence so far.
137             UBool isFinal=(UBool)(node&kValueIsFinal);
138             value_=readValue(pos, node>>1);
139             if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
140                 pos_=NULL;
141             } else {
142                 pos_=skipValue(pos, node);
143             }
144             return TRUE;
145         }
146         if(maxLength_>0 && str_->length()==maxLength_) {
147             return truncateAndStop();
148         }
149         if(node<kMinLinearMatch) {
150             if(node==0) {
151                 node=*pos++;
152             }
153             pos=branchNext(pos, node+1, errorCode);
154             if(pos==NULL) {
155                 return TRUE;  // Reached a final value.
156             }
157         } else {
158             // Linear-match node, append length bytes to str_.
159             int32_t length=node-kMinLinearMatch+1;
160             if(maxLength_>0 && str_->length()+length>maxLength_) {
161                 str_->append(reinterpret_cast<const char *>(pos),
162                             maxLength_-str_->length(), errorCode);
163                 return truncateAndStop();
164             }
165             str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
166             pos+=length;
167         }
168     }
169 }
170 
171 StringPiece
getString() const172 BytesTrie::Iterator::getString() const {
173     return str_ == NULL ? StringPiece() : str_->toStringPiece();
174 }
175 
176 UBool
truncateAndStop()177 BytesTrie::Iterator::truncateAndStop() {
178     pos_=NULL;
179     value_=-1;  // no real value for str
180     return TRUE;
181 }
182 
183 // Branch node, needs to take the first outbound edge and push state for the rest.
184 const uint8_t *
branchNext(const uint8_t * pos,int32_t length,UErrorCode & errorCode)185 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
186     while(length>kMaxBranchLinearSubNodeLength) {
187         ++pos;  // ignore the comparison byte
188         // Push state for the greater-or-equal edge.
189         stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
190         stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
191         // Follow the less-than edge.
192         length>>=1;
193         pos=jumpByDelta(pos);
194     }
195     // List of key-value pairs where values are either final values or jump deltas.
196     // Read the first (key, value) pair.
197     uint8_t trieByte=*pos++;
198     int32_t node=*pos++;
199     UBool isFinal=(UBool)(node&kValueIsFinal);
200     int32_t value=readValue(pos, node>>1);
201     pos=skipValue(pos, node);
202     stack_->addElement((int32_t)(pos-bytes_), errorCode);
203     stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
204     str_->append((char)trieByte, errorCode);
205     if(isFinal) {
206         pos_=NULL;
207         value_=value;
208         return NULL;
209     } else {
210         return pos+value;
211     }
212 }
213 
214 U_NAMESPACE_END
215