1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2010-2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * utf16collationiterator.h
9 *
10 * created on: 2010oct27
11 * created by: Markus W. Scherer
12 */
13 
14 #ifndef __UTF16COLLATIONITERATOR_H__
15 #define __UTF16COLLATIONITERATOR_H__
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_COLLATION
20 
21 #include "cmemory.h"
22 #include "collation.h"
23 #include "collationdata.h"
24 #include "collationiterator.h"
25 #include "normalizer2impl.h"
26 
27 U_NAMESPACE_BEGIN
28 
29 /**
30  * UTF-16 collation element and character iterator.
31  * Handles normalized UTF-16 text inline, with length or NUL-terminated.
32  * Unnormalized text is handled by a subclass.
33  */
34 class U_I18N_API UTF16CollationIterator : public CollationIterator {
35 public:
UTF16CollationIterator(const CollationData * d,UBool numeric,const UChar * s,const UChar * p,const UChar * lim)36     UTF16CollationIterator(const CollationData *d, UBool numeric,
37                            const UChar *s, const UChar *p, const UChar *lim)
38             : CollationIterator(d, numeric),
39               start(s), pos(p), limit(lim) {}
40 
41     UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText);
42 
43     virtual ~UTF16CollationIterator();
44 
45     virtual UBool operator==(const CollationIterator &other) const;
46 
47     virtual void resetToOffset(int32_t newOffset);
48 
49     virtual int32_t getOffset() const;
50 
setText(const UChar * s,const UChar * lim)51     void setText(const UChar *s, const UChar *lim) {
52         reset();
53         start = pos = s;
54         limit = lim;
55     }
56 
57     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
58 
59     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
60 
61 protected:
62     // Copy constructor only for subclasses which set the pointers.
UTF16CollationIterator(const UTF16CollationIterator & other)63     UTF16CollationIterator(const UTF16CollationIterator &other)
64             : CollationIterator(other),
65               start(NULL), pos(NULL), limit(NULL) {}
66 
67     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
68 
69     virtual UChar handleGetTrailSurrogate();
70 
71     virtual UBool foundNULTerminator();
72 
73     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
74 
75     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
76 
77     // UTF-16 string pointers.
78     // limit can be NULL for NUL-terminated strings.
79     const UChar *start, *pos, *limit;
80 };
81 
82 /**
83  * Incrementally checks the input text for FCD and normalizes where necessary.
84  */
85 class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
86 public:
FCDUTF16CollationIterator(const CollationData * data,UBool numeric,const UChar * s,const UChar * p,const UChar * lim)87     FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
88                               const UChar *s, const UChar *p, const UChar *lim)
89             : UTF16CollationIterator(data, numeric, s, p, lim),
90               rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim),
91               nfcImpl(data->nfcImpl),
92               checkDir(1) {}
93 
94     FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText);
95 
96     virtual ~FCDUTF16CollationIterator();
97 
98     virtual UBool operator==(const CollationIterator &other) const;
99 
100     virtual void resetToOffset(int32_t newOffset);
101 
102     virtual int32_t getOffset() const;
103 
104     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
105 
106     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
107 
108 protected:
109     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
110 
111     virtual UBool foundNULTerminator();
112 
113     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
114 
115     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
116 
117 private:
118     /**
119      * Switches to forward checking if possible.
120      * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
121      * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
122      */
123     void switchToForward();
124 
125     /**
126      * Extend the FCD text segment forward or normalize around pos.
127      * To be called when checkDir > 0 && pos != limit.
128      * @return TRUE if success, checkDir == 0 and pos != limit
129      */
130     UBool nextSegment(UErrorCode &errorCode);
131 
132     /**
133      * Switches to backward checking.
134      * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
135      * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
136      */
137     void switchToBackward();
138 
139     /**
140      * Extend the FCD text segment backward or normalize around pos.
141      * To be called when checkDir < 0 && pos != start.
142      * @return TRUE if success, checkDir == 0 and pos != start
143      */
144     UBool previousSegment(UErrorCode &errorCode);
145 
146     UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode);
147 
148     // Text pointers: The input text is [rawStart, rawLimit[
149     // where rawLimit can be NULL for NUL-terminated text.
150     //
151     // checkDir > 0:
152     //
153     // The input text [segmentStart..pos[ passes the FCD check.
154     // Moving forward checks incrementally.
155     // segmentLimit is undefined. limit == rawLimit.
156     //
157     // checkDir < 0:
158     // The input text [pos..segmentLimit[ passes the FCD check.
159     // Moving backward checks incrementally.
160     // segmentStart is undefined, start == rawStart.
161     //
162     // checkDir == 0:
163     //
164     // The input text [segmentStart..segmentLimit[ is being processed.
165     // These pointers are at FCD boundaries.
166     // Either this text segment already passes the FCD check
167     // and segmentStart==start<=pos<=limit==segmentLimit,
168     // or the current segment had to be normalized so that
169     // [segmentStart..segmentLimit[ turned into the normalized string,
170     // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
171     const UChar *rawStart;
172     const UChar *segmentStart;
173     const UChar *segmentLimit;
174     // rawLimit==NULL for a NUL-terminated string.
175     const UChar *rawLimit;
176 
177     const Normalizer2Impl &nfcImpl;
178     UnicodeString normalized;
179     // Direction of incremental FCD check. See comments before rawStart.
180     int8_t checkDir;
181 };
182 
183 U_NAMESPACE_END
184 
185 #endif  // !UCONFIG_NO_COLLATION
186 #endif  // __UTF16COLLATIONITERATOR_H__
187