1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
6 **********************************************************************
7 *   Date        Name        Description
8 *  03/22/2000   helena      Creation.
9 **********************************************************************
10 */
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
15 
16 #include "unicode/brkiter.h"
17 #include "unicode/schriter.h"
18 #include "unicode/search.h"
19 #include "usrchimp.h"
20 #include "cmemory.h"
21 
22 // public constructors and destructors -----------------------------------
23 U_NAMESPACE_BEGIN
24 
SearchIterator(const SearchIterator & other)25 SearchIterator::SearchIterator(const SearchIterator &other)
26     : UObject(other)
27 {
28     m_breakiterator_            = other.m_breakiterator_;
29     m_text_                     = other.m_text_;
30     m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));
31     m_search_->breakIter        = other.m_search_->breakIter;
32     m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
33     m_search_->isOverlap        = other.m_search_->isOverlap;
34     m_search_->elementComparisonType = other.m_search_->elementComparisonType;
35     m_search_->matchedIndex     = other.m_search_->matchedIndex;
36     m_search_->matchedLength    = other.m_search_->matchedLength;
37     m_search_->text             = other.m_search_->text;
38     m_search_->textLength       = other.m_search_->textLength;
39 }
40 
~SearchIterator()41 SearchIterator::~SearchIterator()
42 {
43     if (m_search_ != NULL) {
44         uprv_free(m_search_);
45     }
46 }
47 
48 // public get and set methods ----------------------------------------
49 
setAttribute(USearchAttribute attribute,USearchAttributeValue value,UErrorCode & status)50 void SearchIterator::setAttribute(USearchAttribute       attribute,
51                                   USearchAttributeValue  value,
52                                   UErrorCode            &status)
53 {
54     if (U_SUCCESS(status)) {
55         switch (attribute)
56         {
57         case USEARCH_OVERLAP :
58             m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
59             break;
60         case USEARCH_CANONICAL_MATCH :
61             m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
62             break;
63         case USEARCH_ELEMENT_COMPARISON :
64             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
65                 m_search_->elementComparisonType = (int16_t)value;
66             } else {
67                 m_search_->elementComparisonType = 0;
68             }
69             break;
70         default:
71             status = U_ILLEGAL_ARGUMENT_ERROR;
72         }
73     }
74     if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
75         status = U_ILLEGAL_ARGUMENT_ERROR;
76     }
77 }
78 
getAttribute(USearchAttribute attribute) const79 USearchAttributeValue SearchIterator::getAttribute(
80                                           USearchAttribute  attribute) const
81 {
82     switch (attribute) {
83     case USEARCH_OVERLAP :
84         return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
85     case USEARCH_CANONICAL_MATCH :
86         return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
87                                                                 USEARCH_OFF);
88     case USEARCH_ELEMENT_COMPARISON :
89         {
90             int16_t value = m_search_->elementComparisonType;
91             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
92                 return (USearchAttributeValue)value;
93             } else {
94                 return USEARCH_STANDARD_ELEMENT_COMPARISON;
95             }
96         }
97     default :
98         return USEARCH_DEFAULT;
99     }
100 }
101 
getMatchedStart() const102 int32_t SearchIterator::getMatchedStart() const
103 {
104     return m_search_->matchedIndex;
105 }
106 
getMatchedLength() const107 int32_t SearchIterator::getMatchedLength() const
108 {
109     return m_search_->matchedLength;
110 }
111 
getMatchedText(UnicodeString & result) const112 void SearchIterator::getMatchedText(UnicodeString &result) const
113 {
114     int32_t matchedindex  = m_search_->matchedIndex;
115     int32_t     matchedlength = m_search_->matchedLength;
116     if (matchedindex != USEARCH_DONE && matchedlength != 0) {
117         result.setTo(m_search_->text + matchedindex, matchedlength);
118     }
119     else {
120         result.remove();
121     }
122 }
123 
setBreakIterator(BreakIterator * breakiter,UErrorCode & status)124 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
125                                       UErrorCode &status)
126 {
127     if (U_SUCCESS(status)) {
128 #if 0
129         m_search_->breakIter = NULL;
130         // the c++ breakiterator may not make use of ubreakiterator.
131         // so we'll have to keep track of it ourselves.
132 #else
133         // Well, gee... the Constructors that take a BreakIterator
134         // all cast the BreakIterator to a UBreakIterator and
135         // pass it to the corresponding usearch_openFromXXX
136         // routine, so there's no reason not to do this.
137         //
138         // Besides, a UBreakIterator is a BreakIterator, so
139         // any subclass of BreakIterator should work fine here...
140         m_search_->breakIter = (UBreakIterator *) breakiter;
141 #endif
142 
143         m_breakiterator_ = breakiter;
144     }
145 }
146 
getBreakIterator(void) const147 const BreakIterator * SearchIterator::getBreakIterator(void) const
148 {
149     return m_breakiterator_;
150 }
151 
setText(const UnicodeString & text,UErrorCode & status)152 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
153 {
154     if (U_SUCCESS(status)) {
155         if (text.length() == 0) {
156             status = U_ILLEGAL_ARGUMENT_ERROR;
157         }
158         else {
159             m_text_        = text;
160             m_search_->text = m_text_.getBuffer();
161             m_search_->textLength = m_text_.length();
162         }
163     }
164 }
165 
setText(CharacterIterator & text,UErrorCode & status)166 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
167 {
168     if (U_SUCCESS(status)) {
169         text.getText(m_text_);
170         setText(m_text_, status);
171     }
172 }
173 
getText(void) const174 const UnicodeString & SearchIterator::getText(void) const
175 {
176     return m_text_;
177 }
178 
179 // operator overloading ----------------------------------------------
180 
operator ==(const SearchIterator & that) const181 UBool SearchIterator::operator==(const SearchIterator &that) const
182 {
183     if (this == &that) {
184         return TRUE;
185     }
186     return (m_breakiterator_            == that.m_breakiterator_ &&
187             m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
188             m_search_->isOverlap        == that.m_search_->isOverlap &&
189             m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
190             m_search_->matchedIndex     == that.m_search_->matchedIndex &&
191             m_search_->matchedLength    == that.m_search_->matchedLength &&
192             m_search_->textLength       == that.m_search_->textLength &&
193             getOffset() == that.getOffset() &&
194             (uprv_memcmp(m_search_->text, that.m_search_->text,
195                               m_search_->textLength * sizeof(UChar)) == 0));
196 }
197 
198 // public methods ----------------------------------------------------
199 
first(UErrorCode & status)200 int32_t SearchIterator::first(UErrorCode &status)
201 {
202     if (U_FAILURE(status)) {
203         return USEARCH_DONE;
204     }
205     setOffset(0, status);
206     return handleNext(0, status);
207 }
208 
following(int32_t position,UErrorCode & status)209 int32_t SearchIterator::following(int32_t position,
210                                       UErrorCode &status)
211 {
212     if (U_FAILURE(status)) {
213         return USEARCH_DONE;
214     }
215     setOffset(position, status);
216     return handleNext(position, status);
217 }
218 
last(UErrorCode & status)219 int32_t SearchIterator::last(UErrorCode &status)
220 {
221     if (U_FAILURE(status)) {
222         return USEARCH_DONE;
223     }
224     setOffset(m_search_->textLength, status);
225     return handlePrev(m_search_->textLength, status);
226 }
227 
preceding(int32_t position,UErrorCode & status)228 int32_t SearchIterator::preceding(int32_t position,
229                                       UErrorCode &status)
230 {
231     if (U_FAILURE(status)) {
232         return USEARCH_DONE;
233     }
234     setOffset(position, status);
235     return handlePrev(position, status);
236 }
237 
next(UErrorCode & status)238 int32_t SearchIterator::next(UErrorCode &status)
239 {
240     if (U_SUCCESS(status)) {
241         int32_t offset = getOffset();
242         int32_t matchindex  = m_search_->matchedIndex;
243         int32_t     matchlength = m_search_->matchedLength;
244         m_search_->reset = FALSE;
245         if (m_search_->isForwardSearching == TRUE) {
246             int32_t textlength = m_search_->textLength;
247             if (offset == textlength || matchindex == textlength ||
248                 (matchindex != USEARCH_DONE &&
249                 matchindex + matchlength >= textlength)) {
250                 // not enough characters to match
251                 setMatchNotFound();
252                 return USEARCH_DONE;
253             }
254         }
255         else {
256             // switching direction.
257             // if matchedIndex == USEARCH_DONE, it means that either a
258             // setOffset has been called or that previous ran off the text
259             // string. the iterator would have been set to offset 0 if a
260             // match is not found.
261             m_search_->isForwardSearching = TRUE;
262             if (m_search_->matchedIndex != USEARCH_DONE) {
263                 // there's no need to set the collation element iterator
264                 // the next call to next will set the offset.
265                 return matchindex;
266             }
267         }
268 
269         if (matchlength > 0) {
270             // if matchlength is 0 we are at the start of the iteration
271             if (m_search_->isOverlap) {
272                 offset ++;
273             }
274             else {
275                 offset += matchlength;
276             }
277         }
278         return handleNext(offset, status);
279     }
280     return USEARCH_DONE;
281 }
282 
previous(UErrorCode & status)283 int32_t SearchIterator::previous(UErrorCode &status)
284 {
285     if (U_SUCCESS(status)) {
286         int32_t offset;
287         if (m_search_->reset) {
288             offset                       = m_search_->textLength;
289             m_search_->isForwardSearching = FALSE;
290             m_search_->reset              = FALSE;
291             setOffset(offset, status);
292         }
293         else {
294             offset = getOffset();
295         }
296 
297         int32_t matchindex = m_search_->matchedIndex;
298         if (m_search_->isForwardSearching == TRUE) {
299             // switching direction.
300             // if matchedIndex == USEARCH_DONE, it means that either a
301             // setOffset has been called or that next ran off the text
302             // string. the iterator would have been set to offset textLength if
303             // a match is not found.
304             m_search_->isForwardSearching = FALSE;
305             if (matchindex != USEARCH_DONE) {
306                 return matchindex;
307             }
308         }
309         else {
310             if (offset == 0 || matchindex == 0) {
311                 // not enough characters to match
312                 setMatchNotFound();
313                 return USEARCH_DONE;
314             }
315         }
316 
317         if (matchindex != USEARCH_DONE) {
318             if (m_search_->isOverlap) {
319                 matchindex += m_search_->matchedLength - 2;
320             }
321 
322             return handlePrev(matchindex, status);
323         }
324 
325         return handlePrev(offset, status);
326     }
327 
328     return USEARCH_DONE;
329 }
330 
reset()331 void SearchIterator::reset()
332 {
333     UErrorCode status = U_ZERO_ERROR;
334     setMatchNotFound();
335     setOffset(0, status);
336     m_search_->isOverlap          = FALSE;
337     m_search_->isCanonicalMatch   = FALSE;
338     m_search_->elementComparisonType = 0;
339     m_search_->isForwardSearching = TRUE;
340     m_search_->reset              = TRUE;
341 }
342 
343 // protected constructors and destructors -----------------------------
344 
SearchIterator()345 SearchIterator::SearchIterator()
346 {
347     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
348     m_search_->breakIter          = NULL;
349     m_search_->isOverlap          = FALSE;
350     m_search_->isCanonicalMatch   = FALSE;
351     m_search_->elementComparisonType = 0;
352     m_search_->isForwardSearching = TRUE;
353     m_search_->reset              = TRUE;
354     m_search_->matchedIndex       = USEARCH_DONE;
355     m_search_->matchedLength      = 0;
356     m_search_->text               = NULL;
357     m_search_->textLength         = 0;
358     m_breakiterator_              = NULL;
359 }
360 
SearchIterator(const UnicodeString & text,BreakIterator * breakiter)361 SearchIterator::SearchIterator(const UnicodeString &text,
362                                      BreakIterator *breakiter) :
363                                      m_breakiterator_(breakiter),
364                                      m_text_(text)
365 {
366     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
367     m_search_->breakIter          = NULL;
368     m_search_->isOverlap          = FALSE;
369     m_search_->isCanonicalMatch   = FALSE;
370     m_search_->elementComparisonType = 0;
371     m_search_->isForwardSearching = TRUE;
372     m_search_->reset              = TRUE;
373     m_search_->matchedIndex       = USEARCH_DONE;
374     m_search_->matchedLength      = 0;
375     m_search_->text               = m_text_.getBuffer();
376     m_search_->textLength         = text.length();
377 }
378 
SearchIterator(CharacterIterator & text,BreakIterator * breakiter)379 SearchIterator::SearchIterator(CharacterIterator &text,
380                                BreakIterator     *breakiter) :
381                                m_breakiterator_(breakiter)
382 {
383     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
384     m_search_->breakIter          = NULL;
385     m_search_->isOverlap          = FALSE;
386     m_search_->isCanonicalMatch   = FALSE;
387     m_search_->elementComparisonType = 0;
388     m_search_->isForwardSearching = TRUE;
389     m_search_->reset              = TRUE;
390     m_search_->matchedIndex       = USEARCH_DONE;
391     m_search_->matchedLength      = 0;
392     text.getText(m_text_);
393     m_search_->text               = m_text_.getBuffer();
394     m_search_->textLength         = m_text_.length();
395     m_breakiterator_             = breakiter;
396 }
397 
398 // protected methods ------------------------------------------------------
399 
operator =(const SearchIterator & that)400 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
401 {
402     if (this != &that) {
403         m_breakiterator_            = that.m_breakiterator_;
404         m_text_                     = that.m_text_;
405         m_search_->breakIter        = that.m_search_->breakIter;
406         m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
407         m_search_->isOverlap        = that.m_search_->isOverlap;
408         m_search_->elementComparisonType = that.m_search_->elementComparisonType;
409         m_search_->matchedIndex     = that.m_search_->matchedIndex;
410         m_search_->matchedLength    = that.m_search_->matchedLength;
411         m_search_->text             = that.m_search_->text;
412         m_search_->textLength       = that.m_search_->textLength;
413     }
414     return *this;
415 }
416 
setMatchLength(int32_t length)417 void SearchIterator::setMatchLength(int32_t length)
418 {
419     m_search_->matchedLength = length;
420 }
421 
setMatchStart(int32_t position)422 void SearchIterator::setMatchStart(int32_t position)
423 {
424     m_search_->matchedIndex = position;
425 }
426 
setMatchNotFound()427 void SearchIterator::setMatchNotFound()
428 {
429     setMatchStart(USEARCH_DONE);
430     setMatchLength(0);
431     UErrorCode status = U_ZERO_ERROR;
432     // by default no errors should be returned here since offsets are within
433     // range.
434     if (m_search_->isForwardSearching) {
435         setOffset(m_search_->textLength, status);
436     }
437     else {
438         setOffset(0, status);
439     }
440 }
441 
442 
443 U_NAMESPACE_END
444 
445 #endif /* #if !UCONFIG_NO_COLLATION */
446