1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // file: rbbi_cache.cpp
5 
6 #include "unicode/utypes.h"
7 
8 #if !UCONFIG_NO_BREAK_ITERATION
9 
10 #include "unicode/ubrk.h"
11 #include "unicode/rbbi.h"
12 
13 #include "rbbi_cache.h"
14 
15 #include "brkeng.h"
16 #include "cmemory.h"
17 #include "rbbidata.h"
18 #include "rbbirb.h"
19 #include "uassert.h"
20 #include "uvectr32.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 /*
25  * DictionaryCache implementation
26  */
27 
DictionaryCache(RuleBasedBreakIterator * bi,UErrorCode & status)28 RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
29         fBI(bi), fBreaks(status), fPositionInCache(-1),
30         fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
31 }
32 
~DictionaryCache()33 RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
34 }
35 
reset()36 void RuleBasedBreakIterator::DictionaryCache::reset() {
37     fPositionInCache = -1;
38     fStart = 0;
39     fLimit = 0;
40     fFirstRuleStatusIndex = 0;
41     fOtherRuleStatusIndex = 0;
42     fBreaks.removeAllElements();
43 }
44 
following(int32_t fromPos,int32_t * result,int32_t * statusIndex)45 UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
46     if (fromPos >= fLimit || fromPos < fStart) {
47         fPositionInCache = -1;
48         return FALSE;
49     }
50 
51     // Sequential iteration, move from previous boundary to the following
52 
53     int32_t r = 0;
54     if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
55         ++fPositionInCache;
56         if (fPositionInCache >= fBreaks.size()) {
57             fPositionInCache = -1;
58             return FALSE;
59         }
60         r = fBreaks.elementAti(fPositionInCache);
61         U_ASSERT(r > fromPos);
62         *result = r;
63         *statusIndex = fOtherRuleStatusIndex;
64         return TRUE;
65     }
66 
67     // Random indexing. Linear search for the boundary following the given position.
68 
69     for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
70         r= fBreaks.elementAti(fPositionInCache);
71         if (r > fromPos) {
72             *result = r;
73             *statusIndex = fOtherRuleStatusIndex;
74             return TRUE;
75         }
76     }
77     UPRV_UNREACHABLE;
78 }
79 
80 
preceding(int32_t fromPos,int32_t * result,int32_t * statusIndex)81 UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
82     if (fromPos <= fStart || fromPos > fLimit) {
83         fPositionInCache = -1;
84         return FALSE;
85     }
86 
87     if (fromPos == fLimit) {
88         fPositionInCache = fBreaks.size() - 1;
89         if (fPositionInCache >= 0) {
90             U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
91         }
92     }
93 
94     int32_t r;
95     if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
96         --fPositionInCache;
97         r = fBreaks.elementAti(fPositionInCache);
98         U_ASSERT(r < fromPos);
99         *result = r;
100         *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
101         return TRUE;
102     }
103 
104     if (fPositionInCache == 0) {
105         fPositionInCache = -1;
106         return FALSE;
107     }
108 
109     for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
110         r = fBreaks.elementAti(fPositionInCache);
111         if (r < fromPos) {
112             *result = r;
113             *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
114             return TRUE;
115         }
116     }
117     UPRV_UNREACHABLE;
118 }
119 
populateDictionary(int32_t startPos,int32_t endPos,int32_t firstRuleStatus,int32_t otherRuleStatus)120 void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
121                                        int32_t firstRuleStatus, int32_t otherRuleStatus) {
122     if ((endPos - startPos) <= 1) {
123         return;
124     }
125 
126     reset();
127     fFirstRuleStatusIndex = firstRuleStatus;
128     fOtherRuleStatusIndex = otherRuleStatus;
129 
130     int32_t rangeStart = startPos;
131     int32_t rangeEnd = endPos;
132 
133     uint16_t    category;
134     int32_t     current;
135     UErrorCode  status = U_ZERO_ERROR;
136     int32_t     foundBreakCount = 0;
137     UText      *text = &fBI->fText;
138 
139     // Loop through the text, looking for ranges of dictionary characters.
140     // For each span, find the appropriate break engine, and ask it to find
141     // any breaks within the span.
142 
143     utext_setNativeIndex(text, rangeStart);
144     UChar32     c = utext_current32(text);
145     category = UTRIE2_GET16(fBI->fData->fTrie, c);
146 
147     while(U_SUCCESS(status)) {
148         while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
149             utext_next32(text);           // TODO: cleaner loop structure.
150             c = utext_current32(text);
151             category = UTRIE2_GET16(fBI->fData->fTrie, c);
152         }
153         if (current >= rangeEnd) {
154             break;
155         }
156 
157         // We now have a dictionary character. Get the appropriate language object
158         // to deal with it.
159         const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
160 
161         // Ask the language object if there are any breaks. It will add them to the cache and
162         // leave the text pointer on the other side of its range, ready to search for the next one.
163         if (lbe != NULL) {
164             foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
165         }
166 
167         // Reload the loop variables for the next go-round
168         c = utext_current32(text);
169         category = UTRIE2_GET16(fBI->fData->fTrie, c);
170     }
171 
172     // If we found breaks, ensure that the first and last entries are
173     // the original starting and ending position. And initialize the
174     // cache iteration position to the first entry.
175 
176     // printf("foundBreakCount = %d\n", foundBreakCount);
177     if (foundBreakCount > 0) {
178         U_ASSERT(foundBreakCount == fBreaks.size());
179         if (startPos < fBreaks.elementAti(0)) {
180             // The dictionary did not place a boundary at the start of the segment of text.
181             // Add one now. This should not commonly happen, but it would be easy for interactions
182             // of the rules for dictionary segments and the break engine implementations to
183             // inadvertently cause it. Cover it here, just in case.
184             fBreaks.insertElementAt(startPos, 0, status);
185         }
186         if (endPos > fBreaks.peeki()) {
187             fBreaks.push(endPos, status);
188         }
189         fPositionInCache = 0;
190         // Note: Dictionary matching may extend beyond the original limit.
191         fStart = fBreaks.elementAti(0);
192         fLimit = fBreaks.peeki();
193     } else {
194         // there were no language-based breaks, even though the segment contained
195         // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
196         // for this range will fail, and the calling code will fall back to the rule based boundaries.
197     }
198 }
199 
200 
201 /*
202  *   BreakCache implemetation
203  */
204 
BreakCache(RuleBasedBreakIterator * bi,UErrorCode & status)205 RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
206         fBI(bi), fSideBuffer(status) {
207     reset();
208 }
209 
210 
~BreakCache()211 RuleBasedBreakIterator::BreakCache::~BreakCache() {
212 }
213 
214 
reset(int32_t pos,int32_t ruleStatus)215 void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
216     fStartBufIdx = 0;
217     fEndBufIdx = 0;
218     fTextIdx = pos;
219     fBufIdx = 0;
220     fBoundaries[0] = pos;
221     fStatuses[0] = (uint16_t)ruleStatus;
222 }
223 
224 
current()225 int32_t  RuleBasedBreakIterator::BreakCache::current() {
226     fBI->fPosition = fTextIdx;
227     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
228     fBI->fDone = FALSE;
229     return fTextIdx;
230 }
231 
232 
following(int32_t startPos,UErrorCode & status)233 void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
234     if (U_FAILURE(status)) {
235         return;
236     }
237     if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
238         // startPos is in the cache. Do a next() from that position.
239         // TODO: an awkward set of interactions with bi->fDone
240         //       seek() does not clear it; it can't because of interactions with populateNear().
241         //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
242         //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
243         fBI->fDone = false;
244         next();
245     }
246     return;
247 }
248 
249 
preceding(int32_t startPos,UErrorCode & status)250 void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
251     if (U_FAILURE(status)) {
252         return;
253     }
254     if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
255         if (startPos == fTextIdx) {
256             previous(status);
257         } else {
258             // seek() leaves the BreakCache positioned at the preceding boundary
259             //        if the requested position is between two bounaries.
260             // current() pushes the BreakCache position out to the BreakIterator itself.
261             U_ASSERT(startPos > fTextIdx);
262             current();
263         }
264     }
265     return;
266 }
267 
268 
269 /*
270  * Out-of-line code for BreakCache::next().
271  * Cache does not already contain the boundary
272  */
nextOL()273 void RuleBasedBreakIterator::BreakCache::nextOL() {
274     fBI->fDone = !populateFollowing();
275     fBI->fPosition = fTextIdx;
276     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
277     return;
278 }
279 
280 
previous(UErrorCode & status)281 void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
282     if (U_FAILURE(status)) {
283         return;
284     }
285     int32_t initialBufIdx = fBufIdx;
286     if (fBufIdx == fStartBufIdx) {
287         // At start of cache. Prepend to it.
288         populatePreceding(status);
289     } else {
290         // Cache already holds the next boundary
291         fBufIdx = modChunkSize(fBufIdx - 1);
292         fTextIdx = fBoundaries[fBufIdx];
293     }
294     fBI->fDone = (fBufIdx == initialBufIdx);
295     fBI->fPosition = fTextIdx;
296     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
297     return;
298 }
299 
300 
seek(int32_t pos)301 UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
302     if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
303         return FALSE;
304     }
305     if (pos == fBoundaries[fStartBufIdx]) {
306         // Common case: seek(0), from BreakIterator::first()
307         fBufIdx = fStartBufIdx;
308         fTextIdx = fBoundaries[fBufIdx];
309         return TRUE;
310     }
311     if (pos == fBoundaries[fEndBufIdx]) {
312         fBufIdx = fEndBufIdx;
313         fTextIdx = fBoundaries[fBufIdx];
314         return TRUE;
315     }
316 
317     int32_t min = fStartBufIdx;
318     int32_t max = fEndBufIdx;
319     while (min != max) {
320         int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
321         probe = modChunkSize(probe);
322         if (fBoundaries[probe] > pos) {
323             max = probe;
324         } else {
325             min = modChunkSize(probe + 1);
326         }
327     }
328     U_ASSERT(fBoundaries[max] > pos);
329     fBufIdx = modChunkSize(max - 1);
330     fTextIdx = fBoundaries[fBufIdx];
331     U_ASSERT(fTextIdx <= pos);
332     return TRUE;
333 }
334 
335 
populateNear(int32_t position,UErrorCode & status)336 UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
337     if (U_FAILURE(status)) {
338         return FALSE;
339     }
340     U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
341 
342     // Find a boundary somewhere in the vicinity of the requested position.
343     // Depending on the safe rules and the text data, it could be either before, at, or after
344     // the requested position.
345 
346 
347     // If the requested position is not near already cached positions, clear the existing cache,
348     // find a near-by boundary and begin new cache contents there.
349 
350     if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
351         int32_t aBoundary = 0;
352         int32_t ruleStatusIndex = 0;
353         if (position > 20) {
354             int32_t backupPos = fBI->handleSafePrevious(position);
355 
356             if (backupPos > 0) {
357                 // Advance to the boundary following the backup position.
358                 // There is a complication: the safe reverse rules identify pairs of code points
359                 // that are safe. If advancing from the safe point moves forwards by less than
360                 // two code points, we need to advance one more time to ensure that the boundary
361                 // is good, including a correct rules status value.
362                 //
363                 fBI->fPosition = backupPos;
364                 aBoundary = fBI->handleNext();
365                 if (aBoundary <= backupPos + 4) {
366                     // +4 is a quick test for possibly having advanced only one codepoint.
367                     // Four being the length of the longest potential code point, a supplementary in UTF-8
368                     utext_setNativeIndex(&fBI->fText, aBoundary);
369                     if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
370                         // The initial handleNext() only advanced by a single code point. Go again.
371                         aBoundary = fBI->handleNext();   // Safe rules identify safe pairs.
372                     }
373                 }
374                 ruleStatusIndex = fBI->fRuleStatusIndex;
375             }
376         }
377         reset(aBoundary, ruleStatusIndex);        // Reset cache to hold aBoundary as a single starting point.
378     }
379 
380     // Fill in boundaries between existing cache content and the new requested position.
381 
382     if (fBoundaries[fEndBufIdx] < position) {
383         // The last position in the cache precedes the requested position.
384         // Add following position(s) to the cache.
385         while (fBoundaries[fEndBufIdx] < position) {
386             if (!populateFollowing()) {
387                 UPRV_UNREACHABLE;
388             }
389         }
390         fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
391         fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
392         while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
393             previous(status);
394         }
395         return true;
396     }
397 
398     if (fBoundaries[fStartBufIdx] > position) {
399         // The first position in the cache is beyond the requested position.
400         // back up more until we get a boundary <= the requested position.
401         while (fBoundaries[fStartBufIdx] > position) {
402             populatePreceding(status);
403         }
404         fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
405         fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
406         while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
407             next();
408         }
409         if (fTextIdx > position) {
410             // If position is not itself a boundary, the next() loop above will overshoot.
411             // Back up one, leaving cache position at the boundary preceding the requested position.
412             previous(status);
413         }
414         return true;
415     }
416 
417     U_ASSERT(fTextIdx == position);
418     return true;
419 }
420 
421 
422 
populateFollowing()423 UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
424     int32_t fromPosition = fBoundaries[fEndBufIdx];
425     int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
426     int32_t pos = 0;
427     int32_t ruleStatusIdx = 0;
428 
429     if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
430         addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
431         return TRUE;
432     }
433 
434     fBI->fPosition = fromPosition;
435     pos = fBI->handleNext();
436     if (pos == UBRK_DONE) {
437         return FALSE;
438     }
439 
440     ruleStatusIdx = fBI->fRuleStatusIndex;
441     if (fBI->fDictionaryCharCount > 0) {
442         // The text segment obtained from the rules includes dictionary characters.
443         // Subdivide it, with subdivided results going into the dictionary cache.
444         fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
445         if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
446             addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
447             return TRUE;
448             // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
449             //       But be careful with interactions with populateNear().
450         }
451     }
452 
453     // Rule based segment did not include dictionary characters.
454     // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
455     //    meaning that we didn't take the return, above.
456     // Add its end point to the cache.
457     addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
458 
459     // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
460     //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
461     //
462     for (int count=0; count<6; ++count) {
463         pos = fBI->handleNext();
464         if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
465             break;
466         }
467         addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
468     }
469 
470     return TRUE;
471 }
472 
473 
populatePreceding(UErrorCode & status)474 UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
475     if (U_FAILURE(status)) {
476         return FALSE;
477     }
478 
479     int32_t fromPosition = fBoundaries[fStartBufIdx];
480     if (fromPosition == 0) {
481         return FALSE;
482     }
483 
484     int32_t position = 0;
485     int32_t positionStatusIdx = 0;
486 
487     if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
488         addPreceding(position, positionStatusIdx, UpdateCachePosition);
489         return TRUE;
490     }
491 
492     int32_t backupPosition = fromPosition;
493 
494     // Find a boundary somewhere preceding the first already-cached boundary
495     do {
496         backupPosition = backupPosition - 30;
497         if (backupPosition <= 0) {
498             backupPosition = 0;
499         } else {
500             backupPosition = fBI->handleSafePrevious(backupPosition);
501         }
502         if (backupPosition == UBRK_DONE || backupPosition == 0) {
503             position = 0;
504             positionStatusIdx = 0;
505         } else {
506             // Advance to the boundary following the backup position.
507             // There is a complication: the safe reverse rules identify pairs of code points
508             // that are safe. If advancing from the safe point moves forwards by less than
509             // two code points, we need to advance one more time to ensure that the boundary
510             // is good, including a correct rules status value.
511             //
512             fBI->fPosition = backupPosition;
513             position = fBI->handleNext();
514             if (position <= backupPosition + 4) {
515                 // +4 is a quick test for possibly having advanced only one codepoint.
516                 // Four being the length of the longest potential code point, a supplementary in UTF-8
517                 utext_setNativeIndex(&fBI->fText, position);
518                 if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
519                     // The initial handleNext() only advanced by a single code point. Go again.
520                     position = fBI->handleNext();   // Safe rules identify safe pairs.
521                 }
522             }
523             positionStatusIdx = fBI->fRuleStatusIndex;
524         }
525     } while (position >= fromPosition);
526 
527     // Find boundaries between the one we just located and the first already-cached boundary
528     // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
529 
530     fSideBuffer.removeAllElements();
531     fSideBuffer.addElement(position, status);
532     fSideBuffer.addElement(positionStatusIdx, status);
533 
534     do {
535         int32_t prevPosition = fBI->fPosition = position;
536         int32_t prevStatusIdx = positionStatusIdx;
537         position = fBI->handleNext();
538         positionStatusIdx = fBI->fRuleStatusIndex;
539         if (position == UBRK_DONE) {
540             break;
541         }
542 
543         UBool segmentHandledByDictionary = FALSE;
544         if (fBI->fDictionaryCharCount != 0) {
545             // Segment from the rules includes dictionary characters.
546             // Subdivide it, with subdivided results going into the dictionary cache.
547             int32_t dictSegEndPosition = position;
548             fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
549             while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
550                 segmentHandledByDictionary = true;
551                 U_ASSERT(position > prevPosition);
552                 if (position >= fromPosition) {
553                     break;
554                 }
555                 U_ASSERT(position <= dictSegEndPosition);
556                 fSideBuffer.addElement(position, status);
557                 fSideBuffer.addElement(positionStatusIdx, status);
558                 prevPosition = position;
559             }
560             U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
561         }
562 
563         if (!segmentHandledByDictionary && position < fromPosition) {
564             fSideBuffer.addElement(position, status);
565             fSideBuffer.addElement(positionStatusIdx, status);
566         }
567     } while (position < fromPosition);
568 
569     // Move boundaries from the side buffer to the main circular buffer.
570     UBool success = FALSE;
571     if (!fSideBuffer.isEmpty()) {
572         positionStatusIdx = fSideBuffer.popi();
573         position = fSideBuffer.popi();
574         addPreceding(position, positionStatusIdx, UpdateCachePosition);
575         success = TRUE;
576     }
577 
578     while (!fSideBuffer.isEmpty()) {
579         positionStatusIdx = fSideBuffer.popi();
580         position = fSideBuffer.popi();
581         if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
582             // No space in circular buffer to hold a new preceding result while
583             // also retaining the current cache (iteration) position.
584             // Bailing out is safe; the cache will refill again if needed.
585             break;
586         }
587     }
588 
589     return success;
590 }
591 
592 
addFollowing(int32_t position,int32_t ruleStatusIdx,UpdatePositionValues update)593 void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
594     U_ASSERT(position > fBoundaries[fEndBufIdx]);
595     U_ASSERT(ruleStatusIdx <= UINT16_MAX);
596     int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
597     if (nextIdx == fStartBufIdx) {
598         fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
599     }
600     fBoundaries[nextIdx] = position;
601     fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
602     fEndBufIdx = nextIdx;
603     if (update == UpdateCachePosition) {
604         // Set current position to the newly added boundary.
605         fBufIdx = nextIdx;
606         fTextIdx = position;
607     } else {
608         // Retaining the original cache position.
609         // Check if the added boundary wraps around the buffer, and would over-write the original position.
610         // It's the responsibility of callers of this function to not add too many.
611         U_ASSERT(nextIdx != fBufIdx);
612     }
613 }
614 
addPreceding(int32_t position,int32_t ruleStatusIdx,UpdatePositionValues update)615 bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
616     U_ASSERT(position < fBoundaries[fStartBufIdx]);
617     U_ASSERT(ruleStatusIdx <= UINT16_MAX);
618     int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
619     if (nextIdx == fEndBufIdx) {
620         if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
621             // Failure. The insertion of the new boundary would claim the buffer position that is the
622             // current iteration position. And we also want to retain the current iteration position.
623             // (The buffer is already completely full of entries that precede the iteration position.)
624             return false;
625         }
626         fEndBufIdx = modChunkSize(fEndBufIdx - 1);
627     }
628     fBoundaries[nextIdx] = position;
629     fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
630     fStartBufIdx = nextIdx;
631     if (update == UpdateCachePosition) {
632         fBufIdx = nextIdx;
633         fTextIdx = position;
634     }
635     return true;
636 }
637 
638 
dumpCache()639 void RuleBasedBreakIterator::BreakCache::dumpCache() {
640 #ifdef RBBI_DEBUG
641     RBBIDebugPrintf("fTextIdx:%d   fBufIdx:%d\n", fTextIdx, fBufIdx);
642     for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
643         RBBIDebugPrintf("%d  %d\n", i, fBoundaries[i]);
644         if (i == fEndBufIdx) {
645             break;
646         }
647     }
648 #endif
649 }
650 
651 U_NAMESPACE_END
652 
653 #endif // #if !UCONFIG_NO_BREAK_ITERATION
654