1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (c) 2002-2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 #ifndef USETITER_H
10 #define USETITER_H
11 
12 #include "unicode/utypes.h"
13 
14 #if U_SHOW_CPLUSPLUS_API
15 
16 #include "unicode/uobject.h"
17 #include "unicode/unistr.h"
18 
19 /**
20  * \file
21  * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
22  */
23 
24 U_NAMESPACE_BEGIN
25 
26 class UnicodeSet;
27 class UnicodeString;
28 
29 /**
30  *
31  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
32  * iterates over either code points or code point ranges.  After all
33  * code points or ranges have been returned, it returns the
34  * multicharacter strings of the UnicodeSet, if any.
35  *
36  * This class is not intended to be subclassed.  Consider any fields
37  *  or methods declared as "protected" to be private.  The use of
38  *  protected in this class is an artifact of history.
39  *
40  * <p>To iterate over code points and strings, use a loop like this:
41  * <pre>
42  * UnicodeSetIterator it(set);
43  * while (it.next()) {
44  *     processItem(it.getString());
45  * }
46  * </pre>
47  * <p>Each item in the set is accessed as a string.  Set elements
48  *    consisting of single code points are returned as strings containing
49  *    just the one code point.
50  *
51  * <p>To iterate over code point ranges, instead of individual code points,
52  *    use a loop like this:
53  * <pre>
54  * UnicodeSetIterator it(set);
55  * while (it.nextRange()) {
56  *   if (it.isString()) {
57  *     processString(it.getString());
58  *   } else {
59  *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
60  *   }
61  * }
62  * </pre>
63  * @author M. Davis
64  * @stable ICU 2.4
65  */
66 class U_COMMON_API UnicodeSetIterator : public UObject {
67 
68  protected:
69 
70     /**
71      * Value of <tt>codepoint</tt> if the iterator points to a string.
72      * If <tt>codepoint == IS_STRING</tt>, then examine
73      * <tt>string</tt> for the current iteration result.
74      * @stable ICU 2.4
75      */
76     enum { IS_STRING = -1 };
77 
78     /**
79      * Current code point, or the special value <tt>IS_STRING</tt>, if
80      * the iterator points to a string.
81      * @stable ICU 2.4
82      */
83     UChar32 codepoint;
84 
85     /**
86      * When iterating over ranges using <tt>nextRange()</tt>,
87      * <tt>codepointEnd</tt> contains the inclusive end of the
88      * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
89      * iterating over code points using <tt>next()</tt>, or if
90      * <tt>codepoint == IS_STRING</tt>, then the value of
91      * <tt>codepointEnd</tt> is undefined.
92      * @stable ICU 2.4
93      */
94     UChar32 codepointEnd;
95 
96     /**
97      * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
98      * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
99      * value of <tt>string</tt> is undefined.
100      * @stable ICU 2.4
101      */
102     const UnicodeString* string;
103 
104  public:
105 
106     /**
107      * Create an iterator over the given set.  The iterator is valid
108      * only so long as <tt>set</tt> is valid.
109      * @param set set to iterate over
110      * @stable ICU 2.4
111      */
112     UnicodeSetIterator(const UnicodeSet& set);
113 
114     /**
115      * Create an iterator over nothing.  <tt>next()</tt> and
116      * <tt>nextRange()</tt> return false. This is a convenience
117      * constructor allowing the target to be set later.
118      * @stable ICU 2.4
119      */
120     UnicodeSetIterator();
121 
122     /**
123      * Destructor.
124      * @stable ICU 2.4
125      */
126     virtual ~UnicodeSetIterator();
127 
128     /**
129      * Returns true if the current element is a string.  If so, the
130      * caller can retrieve it with <tt>getString()</tt>.  If this
131      * method returns false, the current element is a code point or
132      * code point range, depending on whether <tt>next()</tt> or
133      * <tt>nextRange()</tt> was called.
134      * Elements of types string and codepoint can both be retrieved
135      * with the function <tt>getString()</tt>.
136      * Elements of type codepoint can also be retrieved with
137      * <tt>getCodepoint()</tt>.
138      * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
139      * of the range, and <tt>getCodepointEnd()</tt> returns the end
140      * of the range.
141      * @stable ICU 2.4
142      */
143     inline UBool isString() const;
144 
145     /**
146      * Returns the current code point, if <tt>isString()</tt> returned
147      * false.  Otherwise returns an undefined result.
148      * @stable ICU 2.4
149      */
150     inline UChar32 getCodepoint() const;
151 
152     /**
153      * Returns the end of the current code point range, if
154      * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
155      * called.  Otherwise returns an undefined result.
156      * @stable ICU 2.4
157      */
158     inline UChar32 getCodepointEnd() const;
159 
160     /**
161      * Returns the current string, if <tt>isString()</tt> returned
162      * true.  If the current iteration item is a code point, a UnicodeString
163      * containing that single code point is returned.
164      *
165      * Ownership of the returned string remains with the iterator.
166      * The string is guaranteed to remain valid only until the iterator is
167      *   advanced to the next item, or until the iterator is deleted.
168      *
169      * @stable ICU 2.4
170      */
171     const UnicodeString& getString();
172 
173     /**
174      * Advances the iteration position to the next element in the set,
175      * which can be either a single code point or a string.
176      * If there are no more elements in the set, return false.
177      *
178      * <p>
179      * If <tt>isString() == TRUE</tt>, the value is a
180      * string, otherwise the value is a
181      * single code point.  Elements of either type can be retrieved
182      * with the function <tt>getString()</tt>, while elements of
183      * consisting of a single code point can be retrieved with
184      * <tt>getCodepoint()</tt>
185      *
186      * <p>The order of iteration is all code points in sorted order,
187      * followed by all strings sorted order.    Do not mix
188      * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
189      * calling <tt>reset()</tt> between them.  The results of doing so
190      * are undefined.
191      *
192      * @return true if there was another element in the set.
193      * @stable ICU 2.4
194      */
195     UBool next();
196 
197     /**
198      * Returns the next element in the set, either a code point range
199      * or a string.  If there are no more elements in the set, return
200      * false.  If <tt>isString() == TRUE</tt>, the value is a
201      * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
202      * range of one or more code points from <tt>getCodepoint()</tt> to
203      * <tt>getCodepointeEnd()</tt> inclusive.
204      *
205      * <p>The order of iteration is all code points ranges in sorted
206      * order, followed by all strings sorted order.  Ranges are
207      * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
208      * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
209      * <tt>next()</tt> and <tt>nextRange()</tt> without calling
210      * <tt>reset()</tt> between them.  The results of doing so are
211      * undefined.
212      *
213      * @return true if there was another element in the set.
214      * @stable ICU 2.4
215      */
216     UBool nextRange();
217 
218     /**
219      * Sets this iterator to visit the elements of the given set and
220      * resets it to the start of that set.  The iterator is valid only
221      * so long as <tt>set</tt> is valid.
222      * @param set the set to iterate over.
223      * @stable ICU 2.4
224      */
225     void reset(const UnicodeSet& set);
226 
227     /**
228      * Resets this iterator to the start of the set.
229      * @stable ICU 2.4
230      */
231     void reset();
232 
233     /**
234      * ICU "poor man's RTTI", returns a UClassID for this class.
235      *
236      * @stable ICU 2.4
237      */
238     static UClassID U_EXPORT2 getStaticClassID();
239 
240     /**
241      * ICU "poor man's RTTI", returns a UClassID for the actual class.
242      *
243      * @stable ICU 2.4
244      */
245     virtual UClassID getDynamicClassID() const;
246 
247     // ======================= PRIVATES ===========================
248 
249  protected:
250 
251     // endElement and nextElements are really UChar32's, but we keep
252     // them as signed int32_t's so we can do comparisons with
253     // endElement set to -1.  Leave them as int32_t's.
254     /** The set
255      * @stable ICU 2.4
256      */
257     const UnicodeSet* set;
258     /** End range
259      * @stable ICU 2.4
260      */
261     int32_t endRange;
262     /** Range
263      * @stable ICU 2.4
264      */
265     int32_t range;
266     /** End element
267      * @stable ICU 2.4
268      */
269     int32_t endElement;
270     /** Next element
271      * @stable ICU 2.4
272      */
273     int32_t nextElement;
274     //UBool abbreviated;
275     /** Next string
276      * @stable ICU 2.4
277      */
278     int32_t nextString;
279     /** String count
280      * @stable ICU 2.4
281      */
282     int32_t stringCount;
283 
284     /**
285      *  Points to the string to use when the caller asks for a
286      *  string and the current iteration item is a code point, not a string.
287      *  @internal
288      */
289     UnicodeString *cpString;
290 
291     /** Copy constructor. Disallowed.
292      * @stable ICU 2.4
293      */
294     UnicodeSetIterator(const UnicodeSetIterator&); // disallow
295 
296     /** Assignment operator. Disallowed.
297      * @stable ICU 2.4
298      */
299     UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
300 
301     /** Load range
302      * @stable ICU 2.4
303      */
304     virtual void loadRange(int32_t range);
305 
306 };
307 
isString()308 inline UBool UnicodeSetIterator::isString() const {
309     return codepoint == (UChar32)IS_STRING;
310 }
311 
getCodepoint()312 inline UChar32 UnicodeSetIterator::getCodepoint() const {
313     return codepoint;
314 }
315 
getCodepointEnd()316 inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
317     return codepointEnd;
318 }
319 
320 
321 U_NAMESPACE_END
322 
323 #endif /* U_SHOW_CPLUSPLUS_API */
324 
325 #endif
326