1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ********************************************************************************
5 *   Copyright (C) 1996-2015, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 ********************************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 
12 #if !UCONFIG_NO_BREAK_ITERATION
13 
14 #include "unicode/ubrk.h"
15 
16 #include "unicode/brkiter.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ustring.h"
19 #include "unicode/uchriter.h"
20 #include "unicode/rbbi.h"
21 #include "rbbirb.h"
22 #include "uassert.h"
23 #include "cmemory.h"
24 
25 U_NAMESPACE_USE
26 
27 //------------------------------------------------------------------------------
28 //
29 //    ubrk_open      Create a canned type of break iterator based on type (word, line, etc.)
30 //                   and locale.
31 //
32 //------------------------------------------------------------------------------
33 U_CAPI UBreakIterator* U_EXPORT2
ubrk_open(UBreakIteratorType type,const char * locale,const UChar * text,int32_t textLength,UErrorCode * status)34 ubrk_open(UBreakIteratorType type,
35       const char *locale,
36       const UChar *text,
37       int32_t textLength,
38       UErrorCode *status)
39 {
40 
41   if(U_FAILURE(*status)) return 0;
42 
43   BreakIterator *result = 0;
44 
45   switch(type) {
46 
47   case UBRK_CHARACTER:
48     result = BreakIterator::createCharacterInstance(Locale(locale), *status);
49     break;
50 
51   case UBRK_WORD:
52     result = BreakIterator::createWordInstance(Locale(locale), *status);
53     break;
54 
55   case UBRK_LINE:
56     result = BreakIterator::createLineInstance(Locale(locale), *status);
57     break;
58 
59   case UBRK_SENTENCE:
60     result = BreakIterator::createSentenceInstance(Locale(locale), *status);
61     break;
62 
63   case UBRK_TITLE:
64     result = BreakIterator::createTitleInstance(Locale(locale), *status);
65     break;
66 
67   default:
68     *status = U_ILLEGAL_ARGUMENT_ERROR;
69   }
70 
71   // check for allocation error
72   if (U_FAILURE(*status)) {
73      return 0;
74   }
75   if(result == 0) {
76     *status = U_MEMORY_ALLOCATION_ERROR;
77     return 0;
78   }
79 
80 
81   UBreakIterator *uBI = (UBreakIterator *)result;
82   if (text != NULL) {
83       ubrk_setText(uBI, text, textLength, status);
84   }
85   return uBI;
86 }
87 
88 
89 
90 //------------------------------------------------------------------------------
91 //
92 //   ubrk_openRules      open a break iterator from a set of break rules.
93 //                       Invokes the rule builder.
94 //
95 //------------------------------------------------------------------------------
96 U_CAPI UBreakIterator* U_EXPORT2
ubrk_openRules(const UChar * rules,int32_t rulesLength,const UChar * text,int32_t textLength,UParseError * parseErr,UErrorCode * status)97 ubrk_openRules(  const UChar        *rules,
98                        int32_t       rulesLength,
99                  const UChar        *text,
100                        int32_t       textLength,
101                        UParseError  *parseErr,
102                        UErrorCode   *status)  {
103 
104     if (status == NULL || U_FAILURE(*status)){
105         return 0;
106     }
107 
108     BreakIterator *result = 0;
109     UnicodeString ruleString(rules, rulesLength);
110     result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
111     if(U_FAILURE(*status)) {
112         return 0;
113     }
114 
115     UBreakIterator *uBI = (UBreakIterator *)result;
116     if (text != NULL) {
117         ubrk_setText(uBI, text, textLength, status);
118     }
119     return uBI;
120 }
121 
122 
123 U_CAPI UBreakIterator* U_EXPORT2
ubrk_openBinaryRules(const uint8_t * binaryRules,int32_t rulesLength,const UChar * text,int32_t textLength,UErrorCode * status)124 ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
125                      const UChar *  text, int32_t textLength,
126                      UErrorCode *   status)
127 {
128     if (U_FAILURE(*status)) {
129         return NULL;
130     }
131     if (rulesLength < 0) {
132         *status = U_ILLEGAL_ARGUMENT_ERROR;
133         return NULL;
134     }
135     LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
136     if (U_FAILURE(*status)) {
137         return NULL;
138     }
139     UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
140     if (text != NULL) {
141         ubrk_setText(uBI, text, textLength, status);
142     }
143     return uBI;
144 }
145 
146 
147 U_CAPI UBreakIterator * U_EXPORT2
ubrk_safeClone(const UBreakIterator * bi,void *,int32_t * pBufferSize,UErrorCode * status)148 ubrk_safeClone(
149           const UBreakIterator *bi,
150           void * /*stackBuffer*/,
151           int32_t *pBufferSize,
152           UErrorCode *status)
153 {
154     if (status == NULL || U_FAILURE(*status)){
155         return NULL;
156     }
157     if (bi == NULL) {
158        *status = U_ILLEGAL_ARGUMENT_ERROR;
159         return NULL;
160     }
161     if (pBufferSize != NULL) {
162         int32_t inputSize = *pBufferSize;
163         *pBufferSize = 1;
164         if (inputSize == 0) {
165             return NULL;  // preflighting for deprecated functionality
166         }
167     }
168     BreakIterator *newBI = ((BreakIterator *)bi)->clone();
169     if (newBI == NULL) {
170         *status = U_MEMORY_ALLOCATION_ERROR;
171     } else {
172         *status = U_SAFECLONE_ALLOCATED_WARNING;
173     }
174     return (UBreakIterator *)newBI;
175 }
176 
177 U_CAPI UBreakIterator * U_EXPORT2
ubrk_clone(const UBreakIterator * bi,UErrorCode * status)178 ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
179     if (U_FAILURE(*status)) {
180         return nullptr;
181     }
182     BreakIterator *newBI = ((BreakIterator *)bi)->clone();
183     if (newBI == nullptr) {
184         *status = U_MEMORY_ALLOCATION_ERROR;
185         return nullptr;
186     }
187     return (UBreakIterator *)newBI;
188 }
189 
190 
191 U_CAPI void U_EXPORT2
ubrk_close(UBreakIterator * bi)192 ubrk_close(UBreakIterator *bi)
193 {
194     delete (BreakIterator *)bi;
195 }
196 
197 U_CAPI void U_EXPORT2
ubrk_setText(UBreakIterator * bi,const UChar * text,int32_t textLength,UErrorCode * status)198 ubrk_setText(UBreakIterator* bi,
199              const UChar*    text,
200              int32_t         textLength,
201              UErrorCode*     status)
202 {
203     UText  ut = UTEXT_INITIALIZER;
204     utext_openUChars(&ut, text, textLength, status);
205     ((BreakIterator*)bi)->setText(&ut, *status);
206     // A stack allocated UText wrapping a UChar * string
207     //   can be dumped without explicitly closing it.
208 }
209 
210 
211 
212 U_CAPI void U_EXPORT2
ubrk_setUText(UBreakIterator * bi,UText * text,UErrorCode * status)213 ubrk_setUText(UBreakIterator *bi,
214              UText          *text,
215              UErrorCode     *status)
216 {
217   ((BreakIterator*)bi)->setText(text, *status);
218 }
219 
220 
221 
222 
223 
224 U_CAPI int32_t U_EXPORT2
ubrk_current(const UBreakIterator * bi)225 ubrk_current(const UBreakIterator *bi)
226 {
227 
228   return ((BreakIterator*)bi)->current();
229 }
230 
231 U_CAPI int32_t U_EXPORT2
ubrk_next(UBreakIterator * bi)232 ubrk_next(UBreakIterator *bi)
233 {
234 
235   return ((BreakIterator*)bi)->next();
236 }
237 
238 U_CAPI int32_t U_EXPORT2
ubrk_previous(UBreakIterator * bi)239 ubrk_previous(UBreakIterator *bi)
240 {
241 
242   return ((BreakIterator*)bi)->previous();
243 }
244 
245 U_CAPI int32_t U_EXPORT2
ubrk_first(UBreakIterator * bi)246 ubrk_first(UBreakIterator *bi)
247 {
248 
249   return ((BreakIterator*)bi)->first();
250 }
251 
252 U_CAPI int32_t U_EXPORT2
ubrk_last(UBreakIterator * bi)253 ubrk_last(UBreakIterator *bi)
254 {
255 
256   return ((BreakIterator*)bi)->last();
257 }
258 
259 U_CAPI int32_t U_EXPORT2
ubrk_preceding(UBreakIterator * bi,int32_t offset)260 ubrk_preceding(UBreakIterator *bi,
261            int32_t offset)
262 {
263 
264   return ((BreakIterator*)bi)->preceding(offset);
265 }
266 
267 U_CAPI int32_t U_EXPORT2
ubrk_following(UBreakIterator * bi,int32_t offset)268 ubrk_following(UBreakIterator *bi,
269            int32_t offset)
270 {
271 
272   return ((BreakIterator*)bi)->following(offset);
273 }
274 
275 U_CAPI const char* U_EXPORT2
ubrk_getAvailable(int32_t index)276 ubrk_getAvailable(int32_t index)
277 {
278 
279   return uloc_getAvailable(index);
280 }
281 
282 U_CAPI int32_t U_EXPORT2
ubrk_countAvailable()283 ubrk_countAvailable()
284 {
285 
286   return uloc_countAvailable();
287 }
288 
289 
290 U_CAPI  UBool U_EXPORT2
ubrk_isBoundary(UBreakIterator * bi,int32_t offset)291 ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
292 {
293     return ((BreakIterator*)bi)->isBoundary(offset);
294 }
295 
296 
297 U_CAPI  int32_t U_EXPORT2
ubrk_getRuleStatus(UBreakIterator * bi)298 ubrk_getRuleStatus(UBreakIterator *bi)
299 {
300     return ((BreakIterator*)bi)->getRuleStatus();
301 }
302 
303 U_CAPI  int32_t U_EXPORT2
ubrk_getRuleStatusVec(UBreakIterator * bi,int32_t * fillInVec,int32_t capacity,UErrorCode * status)304 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
305 {
306     return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
307 }
308 
309 
310 U_CAPI const char* U_EXPORT2
ubrk_getLocaleByType(const UBreakIterator * bi,ULocDataLocaleType type,UErrorCode * status)311 ubrk_getLocaleByType(const UBreakIterator *bi,
312                      ULocDataLocaleType type,
313                      UErrorCode* status)
314 {
315     if (bi == NULL) {
316         if (U_SUCCESS(*status)) {
317             *status = U_ILLEGAL_ARGUMENT_ERROR;
318         }
319         return NULL;
320     }
321     return ((BreakIterator*)bi)->getLocaleID(type, *status);
322 }
323 
324 
325 U_CAPI void U_EXPORT2
ubrk_refreshUText(UBreakIterator * bi,UText * text,UErrorCode * status)326 ubrk_refreshUText(UBreakIterator *bi,
327                        UText          *text,
328                        UErrorCode     *status)
329 {
330     BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
331     bii->refreshInputText(text, *status);
332 }
333 
334 U_CAPI int32_t U_EXPORT2
ubrk_getBinaryRules(UBreakIterator * bi,uint8_t * binaryRules,int32_t rulesCapacity,UErrorCode * status)335 ubrk_getBinaryRules(UBreakIterator *bi,
336                     uint8_t *       binaryRules, int32_t rulesCapacity,
337                     UErrorCode *    status)
338 {
339     if (U_FAILURE(*status)) {
340         return 0;
341     }
342     if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) {
343         *status = U_ILLEGAL_ARGUMENT_ERROR;
344         return 0;
345     }
346     RuleBasedBreakIterator* rbbi;
347     if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == NULL) {
348         *status = U_ILLEGAL_ARGUMENT_ERROR;
349         return 0;
350     }
351     uint32_t rulesLength;
352     const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
353     if (rulesLength > INT32_MAX) {
354         *status = U_INDEX_OUTOFBOUNDS_ERROR;
355         return 0;
356     }
357     if (binaryRules != NULL) { // if not preflighting
358         // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
359         if ((int32_t)rulesLength > rulesCapacity) {
360             *status = U_BUFFER_OVERFLOW_ERROR;
361         } else {
362             uprv_memcpy(binaryRules, returnedRules, rulesLength);
363         }
364     }
365     return (int32_t)rulesLength;
366 }
367 
368 
369 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
370