1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (c) 2003-2011, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 * Author: Alan Liu
9 * Created: September 24 2003
10 * Since: ICU 2.8
11 **********************************************************************
12 */
13 #include "ruleiter.h"
14 #include "unicode/parsepos.h"
15 #include "unicode/symtable.h"
16 #include "unicode/unistr.h"
17 #include "unicode/utf16.h"
18 #include "patternprops.h"
19 
20 /* \U87654321 or \ud800\udc00 */
21 #define MAX_U_NOTATION_LEN 12
22 
23 U_NAMESPACE_BEGIN
24 
RuleCharacterIterator(const UnicodeString & theText,const SymbolTable * theSym,ParsePosition & thePos)25 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
26                       ParsePosition& thePos) :
27     text(theText),
28     pos(thePos),
29     sym(theSym),
30     buf(0),
31     bufPos(0)
32 {}
33 
atEnd() const34 UBool RuleCharacterIterator::atEnd() const {
35     return buf == 0 && pos.getIndex() == text.length();
36 }
37 
next(int32_t options,UBool & isEscaped,UErrorCode & ec)38 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
39     if (U_FAILURE(ec)) return DONE;
40 
41     UChar32 c = DONE;
42     isEscaped = FALSE;
43 
44     for (;;) {
45         c = _current();
46         _advance(U16_LENGTH(c));
47 
48         if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
49             (options & PARSE_VARIABLES) != 0 && sym != 0) {
50             UnicodeString name = sym->parseReference(text, pos, text.length());
51             // If name is empty there was an isolated SYMBOL_REF;
52             // return it.  Caller must be prepared for this.
53             if (name.length() == 0) {
54                 break;
55             }
56             bufPos = 0;
57             buf = sym->lookup(name);
58             if (buf == 0) {
59                 ec = U_UNDEFINED_VARIABLE;
60                 return DONE;
61             }
62             // Handle empty variable value
63             if (buf->length() == 0) {
64                 buf = 0;
65             }
66             continue;
67         }
68 
69         if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
70             continue;
71         }
72 
73         if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
74             UnicodeString tempEscape;
75             int32_t offset = 0;
76             c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
77             jumpahead(offset);
78             isEscaped = TRUE;
79             if (c < 0) {
80                 ec = U_MALFORMED_UNICODE_ESCAPE;
81                 return DONE;
82             }
83         }
84 
85         break;
86     }
87 
88     return c;
89 }
90 
getPos(RuleCharacterIterator::Pos & p) const91 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
92     p.buf = buf;
93     p.pos = pos.getIndex();
94     p.bufPos = bufPos;
95 }
96 
setPos(const RuleCharacterIterator::Pos & p)97 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
98     buf = p.buf;
99     pos.setIndex(p.pos);
100     bufPos = p.bufPos;
101 }
102 
skipIgnored(int32_t options)103 void RuleCharacterIterator::skipIgnored(int32_t options) {
104     if ((options & SKIP_WHITESPACE) != 0) {
105         for (;;) {
106             UChar32 a = _current();
107             if (!PatternProps::isWhiteSpace(a)) break;
108             _advance(U16_LENGTH(a));
109         }
110     }
111 }
112 
lookahead(UnicodeString & result,int32_t maxLookAhead) const113 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
114     if (maxLookAhead < 0) {
115         maxLookAhead = 0x7FFFFFFF;
116     }
117     if (buf != 0) {
118         buf->extract(bufPos, maxLookAhead, result);
119     } else {
120         text.extract(pos.getIndex(), maxLookAhead, result);
121     }
122     return result;
123 }
124 
jumpahead(int32_t count)125 void RuleCharacterIterator::jumpahead(int32_t count) {
126     _advance(count);
127 }
128 
129 /*
130 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
131     int32_t b = pos.getIndex();
132     text.extract(0, b, result);
133     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
134 }
135 */
136 
_current() const137 UChar32 RuleCharacterIterator::_current() const {
138     if (buf != 0) {
139         return buf->char32At(bufPos);
140     } else {
141         int i = pos.getIndex();
142         return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
143     }
144 }
145 
_advance(int32_t count)146 void RuleCharacterIterator::_advance(int32_t count) {
147     if (buf != 0) {
148         bufPos += count;
149         if (bufPos == buf->length()) {
150             buf = 0;
151         }
152     } else {
153         pos.setIndex(pos.getIndex() + count);
154         if (pos.getIndex() > text.length()) {
155             pos.setIndex(text.length());
156         }
157     }
158 }
159 
160 U_NAMESPACE_END
161 
162 //eof
163