1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 //  rbbisetb.cpp
5 //
6 /*
7 ***************************************************************************
8 *   Copyright (C) 2002-2008 International Business Machines Corporation   *
9 *   and others. All rights reserved.                                      *
10 ***************************************************************************
11 */
12 //
13 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules
14 //                   (part of the rule building process.)
15 //
16 //      Starting with the rules parse tree from the scanner,
17 //
18 //                   -  Enumerate the set of UnicodeSets that are referenced
19 //                      by the RBBI rules.
20 //                   -  compute a set of non-overlapping character ranges
21 //                      with all characters within a range belonging to the same
22 //                      set of input unicode sets.
23 //                   -  Derive a set of non-overlapping UnicodeSet (like things)
24 //                      that will correspond to columns in the state table for
25 //                      the RBBI execution engine.  All characters within one
26 //                      of these sets belong to the same set of the original
27 //                      UnicodeSets from the user's rules.
28 //                   -  construct the trie table that maps input characters
29 //                      to the index of the matching non-overlapping set of set from
30 //                      the previous step.
31 //
32 
33 #include "unicode/utypes.h"
34 
35 #if !UCONFIG_NO_BREAK_ITERATION
36 
37 #include "unicode/uniset.h"
38 #include "uvector.h"
39 #include "uassert.h"
40 #include "cmemory.h"
41 #include "cstring.h"
42 
43 #include "rbbisetb.h"
44 #include "rbbinode.h"
45 
46 U_NAMESPACE_BEGIN
47 
48 const int32_t kMaxCharCategoriesFor8BitsTrie = 255;
49 //------------------------------------------------------------------------
50 //
51 //   Constructor
52 //
53 //------------------------------------------------------------------------
RBBISetBuilder(RBBIRuleBuilder * rb)54 RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
55 {
56     fRB             = rb;
57     fStatus         = rb->fStatus;
58     fRangeList      = nullptr;
59     fMutableTrie    = nullptr;
60     fTrie           = nullptr;
61     fTrieSize       = 0;
62     fGroupCount     = 0;
63     fSawBOF         = false;
64 }
65 
66 
67 //------------------------------------------------------------------------
68 //
69 //   Destructor
70 //
71 //------------------------------------------------------------------------
~RBBISetBuilder()72 RBBISetBuilder::~RBBISetBuilder()
73 {
74     RangeDescriptor   *nextRangeDesc;
75 
76     // Walk through & delete the linked list of RangeDescriptors
77     for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
78         RangeDescriptor *r = nextRangeDesc;
79         nextRangeDesc      = r->fNext;
80         delete r;
81     }
82 
83     ucptrie_close(fTrie);
84     umutablecptrie_close(fMutableTrie);
85 }
86 
87 
88 
89 
90 //------------------------------------------------------------------------
91 //
92 //   build          Build the list of non-overlapping character ranges
93 //                  from the Unicode Sets.
94 //
95 //------------------------------------------------------------------------
buildRanges()96 void RBBISetBuilder::buildRanges() {
97     RBBINode        *usetNode;
98     RangeDescriptor *rlRange;
99 
100     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
101 
102     //
103     //  Initialize the process by creating a single range encompassing all characters
104     //  that is in no sets.
105     //
106     fRangeList                = new RangeDescriptor(*fStatus); // will check for status here
107     if (fRangeList == NULL) {
108         *fStatus = U_MEMORY_ALLOCATION_ERROR;
109         return;
110     }
111     fRangeList->fStartChar    = 0;
112     fRangeList->fEndChar      = 0x10ffff;
113 
114     if (U_FAILURE(*fStatus)) {
115         return;
116     }
117 
118     //
119     //  Find the set of non-overlapping ranges of characters
120     //
121     int  ni;
122     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
123         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
124         if (usetNode==NULL) {
125             break;
126         }
127 
128         UnicodeSet      *inputSet             = usetNode->fInputSet;
129         int32_t          inputSetRangeCount   = inputSet->getRangeCount();
130         int              inputSetRangeIndex   = 0;
131                          rlRange              = fRangeList;
132 
133         for (;;) {
134             if (inputSetRangeIndex >= inputSetRangeCount) {
135                 break;
136             }
137             UChar32      inputSetRangeBegin  = inputSet->getRangeStart(inputSetRangeIndex);
138             UChar32      inputSetRangeEnd    = inputSet->getRangeEnd(inputSetRangeIndex);
139 
140             // skip over ranges from the range list that are completely
141             //   below the current range from the input unicode set.
142             while (rlRange->fEndChar < inputSetRangeBegin) {
143                 rlRange = rlRange->fNext;
144             }
145 
146             // If the start of the range from the range list is before with
147             //   the start of the range from the unicode set, split the range list range
148             //   in two, with one part being before (wholly outside of) the unicode set
149             //   and the other containing the rest.
150             //   Then continue the loop; the post-split current range will then be skipped
151             //     over
152             if (rlRange->fStartChar < inputSetRangeBegin) {
153                 rlRange->split(inputSetRangeBegin, *fStatus);
154                 if (U_FAILURE(*fStatus)) {
155                     return;
156                 }
157                 continue;
158             }
159 
160             // Same thing at the end of the ranges...
161             // If the end of the range from the range list doesn't coincide with
162             //   the end of the range from the unicode set, split the range list
163             //   range in two.  The first part of the split range will be
164             //   wholly inside the Unicode set.
165             if (rlRange->fEndChar > inputSetRangeEnd) {
166                 rlRange->split(inputSetRangeEnd+1, *fStatus);
167                 if (U_FAILURE(*fStatus)) {
168                     return;
169                 }
170             }
171 
172             // The current rlRange is now entirely within the UnicodeSet range.
173             // Add this unicode set to the list of sets for this rlRange
174             if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
175                 rlRange->fIncludesSets->addElement(usetNode, *fStatus);
176                 if (U_FAILURE(*fStatus)) {
177                     return;
178                 }
179             }
180 
181             // Advance over ranges that we are finished with.
182             if (inputSetRangeEnd == rlRange->fEndChar) {
183                 inputSetRangeIndex++;
184             }
185             rlRange = rlRange->fNext;
186         }
187     }
188 
189     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
190 
191     //
192     //  Group the above ranges, with each group consisting of one or more
193     //    ranges that are in exactly the same set of original UnicodeSets.
194     //    The groups are numbered, and these group numbers are the set of
195     //    input symbols recognized by the run-time state machine.
196     //
197     //    Numbering: # 0  (state table column 0) is unused.
198     //               # 1  is reserved - table column 1 is for end-of-input
199     //               # 2  is reserved - table column 2 is for beginning-of-input
200     //               # 3  is the first range list.
201     //
202     RangeDescriptor *rlSearchRange;
203     int32_t dictGroupCount = 0;
204 
205     for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
206         for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
207             if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
208                 rlRange->fNum = rlSearchRange->fNum;
209                 rlRange->fIncludesDict = rlSearchRange->fIncludesDict;
210                 break;
211             }
212         }
213         if (rlRange->fNum == 0) {
214             rlRange->fFirstInGroup = true;
215             if (rlRange->isDictionaryRange()) {
216                 rlRange->fNum = ++dictGroupCount;
217                 rlRange->fIncludesDict = true;
218             } else {
219                 fGroupCount++;
220                 rlRange->fNum = fGroupCount+2;
221                 addValToSets(rlRange->fIncludesSets, rlRange->fNum);
222             }
223         }
224     }
225 
226     // Move the character category numbers for any dictionary ranges up, so that they
227     // immediately follow the non-dictionary ranges.
228 
229     fDictCategoriesStart = fGroupCount + 3;
230     for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
231         if (rlRange->fIncludesDict) {
232             rlRange->fNum += fDictCategoriesStart - 1;
233             if (rlRange->fFirstInGroup) {
234                 addValToSets(rlRange->fIncludesSets, rlRange->fNum);
235             }
236         }
237     }
238     fGroupCount += dictGroupCount;
239 
240 
241     // Handle input sets that contain the special string {eof}.
242     //   Column 1 of the state table is reserved for EOF on input.
243     //   Column 2 is reserved for before-the-start-input.
244     //            (This column can be optimized away later if there are no rule
245     //             references to {bof}.)
246     //   Add this column value (1 or 2) to the equivalent expression
247     //     subtree for each UnicodeSet that contains the string {eof}
248     //   Because {bof} and {eof} are not characters in the normal sense,
249     //   they don't affect the computation of the ranges or TRIE.
250 
251     UnicodeString eofString(u"eof");
252     UnicodeString bofString(u"bof");
253     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
254         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
255         if (usetNode==NULL) {
256             break;
257         }
258         UnicodeSet      *inputSet = usetNode->fInputSet;
259         if (inputSet->contains(eofString)) {
260             addValToSet(usetNode, 1);
261         }
262         if (inputSet->contains(bofString)) {
263             addValToSet(usetNode, 2);
264             fSawBOF = TRUE;
265         }
266     }
267 
268 
269     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
270     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
271 }
272 
273 
274 //
275 // Build the Trie table for mapping UChar32 values to the corresponding
276 // range group number.
277 //
buildTrie()278 void RBBISetBuilder::buildTrie() {
279     fMutableTrie = umutablecptrie_open(
280                         0,       //  Initial value for all code points.
281                         0,       //  Error value for out-of-range input.
282                         fStatus);
283 
284     for (RangeDescriptor *range = fRangeList; range!=nullptr && U_SUCCESS(*fStatus); range=range->fNext) {
285         umutablecptrie_setRange(fMutableTrie,
286                                 range->fStartChar,     // Range start
287                                 range->fEndChar,       // Range end (inclusive)
288                                 range->fNum,           // value for range
289                                 fStatus);
290     }
291 }
292 
293 
mergeCategories(IntPair categories)294 void RBBISetBuilder::mergeCategories(IntPair categories) {
295     U_ASSERT(categories.first >= 1);
296     U_ASSERT(categories.second > categories.first);
297     U_ASSERT((categories.first <  fDictCategoriesStart && categories.second <  fDictCategoriesStart) ||
298              (categories.first >= fDictCategoriesStart && categories.second >= fDictCategoriesStart));
299 
300     for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
301         int32_t rangeNum = rd->fNum;
302         if (rangeNum == categories.second) {
303             rd->fNum = categories.first;
304         } else if (rangeNum > categories.second) {
305             rd->fNum--;
306         }
307     }
308     --fGroupCount;
309     if (categories.second <= fDictCategoriesStart) {
310         --fDictCategoriesStart;
311     }
312 }
313 
314 
315 //-----------------------------------------------------------------------------------
316 //
317 //  getTrieSize()    Return the size that will be required to serialize the Trie.
318 //
319 //-----------------------------------------------------------------------------------
getTrieSize()320 int32_t RBBISetBuilder::getTrieSize()  {
321     if (U_FAILURE(*fStatus)) {
322         return 0;
323     }
324     if (fTrie == nullptr) {
325         bool use8Bits = getNumCharCategories() <= kMaxCharCategoriesFor8BitsTrie;
326         fTrie = umutablecptrie_buildImmutable(
327             fMutableTrie,
328             UCPTRIE_TYPE_FAST,
329             use8Bits ? UCPTRIE_VALUE_BITS_8 : UCPTRIE_VALUE_BITS_16,
330             fStatus);
331         fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, fStatus);
332         if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
333             *fStatus = U_ZERO_ERROR;
334         }
335     }
336     return fTrieSize;
337 }
338 
339 
340 //-----------------------------------------------------------------------------------
341 //
342 //  serializeTrie()   Put the serialized trie at the specified address.
343 //                    Trust the caller to have given us enough memory.
344 //                    getTrieSize() MUST be called first.
345 //
346 //-----------------------------------------------------------------------------------
serializeTrie(uint8_t * where)347 void RBBISetBuilder::serializeTrie(uint8_t *where) {
348     ucptrie_toBinary(fTrie,
349                      where,                // Buffer
350                      fTrieSize,            // Capacity
351                      fStatus);
352 }
353 
354 //------------------------------------------------------------------------
355 //
356 //  addValToSets     Add a runtime-mapped input value to each uset from a
357 //                   list of uset nodes. (val corresponds to a state table column.)
358 //                   For each of the original Unicode sets - which correspond
359 //                   directly to uset nodes - a logically equivalent expression
360 //                   is constructed in terms of the remapped runtime input
361 //                   symbol set.  This function adds one runtime input symbol to
362 //                   a list of sets.
363 //
364 //                   The "logically equivalent expression" is the tree for an
365 //                   or-ing together of all of the symbols that go into the set.
366 //
367 //------------------------------------------------------------------------
addValToSets(UVector * sets,uint32_t val)368 void  RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
369     int32_t       ix;
370 
371     for (ix=0; ix<sets->size(); ix++) {
372         RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
373         addValToSet(usetNode, val);
374     }
375 }
376 
addValToSet(RBBINode * usetNode,uint32_t val)377 void  RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
378     RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
379     if (leafNode == NULL) {
380         *fStatus = U_MEMORY_ALLOCATION_ERROR;
381         return;
382     }
383     leafNode->fVal = (unsigned short)val;
384     if (usetNode->fLeftChild == NULL) {
385         usetNode->fLeftChild = leafNode;
386         leafNode->fParent    = usetNode;
387     } else {
388         // There are already input symbols present for this set.
389         // Set up an OR node, with the previous stuff as the left child
390         //   and the new value as the right child.
391         RBBINode *orNode = new RBBINode(RBBINode::opOr);
392         if (orNode == NULL) {
393             *fStatus = U_MEMORY_ALLOCATION_ERROR;
394             return;
395         }
396         orNode->fLeftChild  = usetNode->fLeftChild;
397         orNode->fRightChild = leafNode;
398         orNode->fLeftChild->fParent  = orNode;
399         orNode->fRightChild->fParent = orNode;
400         usetNode->fLeftChild = orNode;
401         orNode->fParent = usetNode;
402     }
403 }
404 
405 
406 //------------------------------------------------------------------------
407 //
408 //   getNumCharCategories
409 //
410 //------------------------------------------------------------------------
getNumCharCategories() const411 int32_t  RBBISetBuilder::getNumCharCategories() const {
412     return fGroupCount + 3;
413 }
414 
415 
416 //------------------------------------------------------------------------
417 //
418 //   getDictCategoriesStart
419 //
420 //------------------------------------------------------------------------
getDictCategoriesStart() const421 int32_t  RBBISetBuilder::getDictCategoriesStart() const {
422     return fDictCategoriesStart;
423 }
424 
425 
426 //------------------------------------------------------------------------
427 //
428 //   sawBOF
429 //
430 //------------------------------------------------------------------------
sawBOF() const431 UBool  RBBISetBuilder::sawBOF() const {
432     return fSawBOF;
433 }
434 
435 
436 //------------------------------------------------------------------------
437 //
438 //   getFirstChar      Given a runtime RBBI character category, find
439 //                     the first UChar32 that is in the set of chars
440 //                     in the category.
441 //------------------------------------------------------------------------
getFirstChar(int32_t category) const442 UChar32  RBBISetBuilder::getFirstChar(int32_t category) const {
443     RangeDescriptor   *rlRange;
444     UChar32            retVal = (UChar32)-1;
445     for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
446         if (rlRange->fNum == category) {
447             retVal = rlRange->fStartChar;
448             break;
449         }
450     }
451     return retVal;
452 }
453 
454 
455 //------------------------------------------------------------------------
456 //
457 //   printRanges        A debugging function.
458 //                      dump out all of the range definitions.
459 //
460 //------------------------------------------------------------------------
461 #ifdef RBBI_DEBUG
printRanges()462 void RBBISetBuilder::printRanges() {
463     RangeDescriptor       *rlRange;
464     int                    i;
465 
466     RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
467     for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
468         RBBIDebugPrintf("%4x-%4x  ", rlRange->fStartChar, rlRange->fEndChar);
469 
470         for (i=0; i<rlRange->fIncludesSets->size(); i++) {
471             RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
472             UnicodeString   setName {u"anon"};
473             RBBINode       *setRef = usetNode->fParent;
474             if (setRef != nullptr) {
475                 RBBINode *varRef = setRef->fParent;
476                 if (varRef != nullptr  &&  varRef->fType == RBBINode::varRef) {
477                     setName = varRef->fText;
478                 }
479             }
480             RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf("  ");
481         }
482         RBBIDebugPrintf("\n");
483     }
484 }
485 #endif
486 
487 
488 //------------------------------------------------------------------------
489 //
490 //   printRangeGroups     A debugging function.
491 //                        dump out all of the range groups.
492 //
493 //------------------------------------------------------------------------
494 #ifdef RBBI_DEBUG
printRangeGroups()495 void RBBISetBuilder::printRangeGroups() {
496     int                    i;
497 
498     RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
499     for (RangeDescriptor *rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
500         if (rlRange->fFirstInGroup) {
501             int groupNum = rlRange->fNum;
502             RBBIDebugPrintf("%2i  ", groupNum);
503 
504             if (groupNum >= fDictCategoriesStart) { RBBIDebugPrintf(" <DICT> ");}
505 
506             for (i=0; i<rlRange->fIncludesSets->size(); i++) {
507                 RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
508                 UnicodeString   setName = UNICODE_STRING("anon", 4);
509                 RBBINode       *setRef = usetNode->fParent;
510                 if (setRef != NULL) {
511                     RBBINode *varRef = setRef->fParent;
512                     if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
513                         setName = varRef->fText;
514                     }
515                 }
516                 RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
517             }
518 
519             i = 0;
520             for (RangeDescriptor *tRange = rlRange; tRange != nullptr; tRange = tRange->fNext) {
521                 if (tRange->fNum == rlRange->fNum) {
522                     if (i++ % 5 == 0) {
523                         RBBIDebugPrintf("\n    ");
524                     }
525                     RBBIDebugPrintf("  %05x-%05x", tRange->fStartChar, tRange->fEndChar);
526                 }
527             }
528             RBBIDebugPrintf("\n");
529         }
530     }
531     RBBIDebugPrintf("\n");
532 }
533 #endif
534 
535 
536 //------------------------------------------------------------------------
537 //
538 //   printSets          A debugging function.
539 //                      dump out all of the set definitions.
540 //
541 //------------------------------------------------------------------------
542 #ifdef RBBI_DEBUG
printSets()543 void RBBISetBuilder::printSets() {
544     int                   i;
545 
546     RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
547     for (i=0; ; i++) {
548         RBBINode        *usetNode;
549         RBBINode        *setRef;
550         RBBINode        *varRef;
551         UnicodeString    setName;
552 
553         usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
554         if (usetNode == NULL) {
555             break;
556         }
557 
558         RBBIDebugPrintf("%3d    ", i);
559         setName = UNICODE_STRING("anonymous", 9);
560         setRef = usetNode->fParent;
561         if (setRef != NULL) {
562             varRef = setRef->fParent;
563             if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
564                 setName = varRef->fText;
565             }
566         }
567         RBBI_DEBUG_printUnicodeString(setName);
568         RBBIDebugPrintf("   ");
569         RBBI_DEBUG_printUnicodeString(usetNode->fText);
570         RBBIDebugPrintf("\n");
571         if (usetNode->fLeftChild != NULL) {
572             RBBINode::printTree(usetNode->fLeftChild, TRUE);
573         }
574     }
575     RBBIDebugPrintf("\n");
576 }
577 #endif
578 
579 
580 
581 //-------------------------------------------------------------------------------------
582 //
583 //  RangeDescriptor copy constructor
584 //
585 //-------------------------------------------------------------------------------------
586 
RangeDescriptor(const RangeDescriptor & other,UErrorCode & status)587 RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) :
588         fStartChar(other.fStartChar), fEndChar {other.fEndChar}, fNum {other.fNum},
589         fIncludesDict{other.fIncludesDict}, fFirstInGroup{other.fFirstInGroup} {
590 
591     if (U_FAILURE(status)) {
592         return;
593     }
594     fIncludesSets = new UVector(status);
595     if (this->fIncludesSets == nullptr) {
596         status = U_MEMORY_ALLOCATION_ERROR;
597     }
598     if (U_FAILURE(status)) {
599         return;
600     }
601 
602     for (int32_t i=0; i<other.fIncludesSets->size(); i++) {
603         this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
604     }
605 }
606 
607 
608 //-------------------------------------------------------------------------------------
609 //
610 //  RangeDesriptor default constructor
611 //
612 //-------------------------------------------------------------------------------------
RangeDescriptor(UErrorCode & status)613 RangeDescriptor::RangeDescriptor(UErrorCode &status) {
614     if (U_FAILURE(status)) {
615         return;
616     }
617     fIncludesSets = new UVector(status);
618     if (fIncludesSets == nullptr) {
619         status = U_MEMORY_ALLOCATION_ERROR;
620     }
621 }
622 
623 
624 //-------------------------------------------------------------------------------------
625 //
626 //  RangeDesriptor Destructor
627 //
628 //-------------------------------------------------------------------------------------
~RangeDescriptor()629 RangeDescriptor::~RangeDescriptor() {
630     delete  fIncludesSets;
631     fIncludesSets = nullptr;
632 }
633 
634 //-------------------------------------------------------------------------------------
635 //
636 //  RangeDesriptor::split()
637 //
638 //-------------------------------------------------------------------------------------
split(UChar32 where,UErrorCode & status)639 void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
640     U_ASSERT(where>fStartChar && where<=fEndChar);
641     RangeDescriptor *nr = new RangeDescriptor(*this, status);
642     if(nr == nullptr) {
643         status = U_MEMORY_ALLOCATION_ERROR;
644         return;
645     }
646     if (U_FAILURE(status)) {
647         delete nr;
648         return;
649     }
650     //  RangeDescriptor copy constructor copies all fields.
651     //  Only need to update those that are different after the split.
652     nr->fStartChar = where;
653     this->fEndChar = where-1;
654     nr->fNext      = this->fNext;
655     this->fNext    = nr;
656 }
657 
658 
659 //-------------------------------------------------------------------------------------
660 //
661 //   RangeDescriptor::isDictionaryRange
662 //
663 //            Test whether this range includes characters from
664 //            the original Unicode Set named "dictionary".
665 //
666 //            This function looks through the Unicode Sets that
667 //            the range includes, checking for one named "dictionary"
668 //
669 //            TODO:  a faster way would be to find the set node for
670 //                   "dictionary" just once, rather than looking it
671 //                   up by name every time.
672 //
673 //-------------------------------------------------------------------------------------
isDictionaryRange()674 bool RangeDescriptor::isDictionaryRange() {
675     static const char16_t *dictionary = u"dictionary";
676     for (int32_t i=0; i<fIncludesSets->size(); i++) {
677         RBBINode *usetNode  = (RBBINode *)fIncludesSets->elementAt(i);
678         RBBINode *setRef = usetNode->fParent;
679         if (setRef != nullptr) {
680             RBBINode *varRef = setRef->fParent;
681             if (varRef && varRef->fType == RBBINode::varRef) {
682                 const UnicodeString *setName = &varRef->fText;
683                 if (setName->compare(dictionary, -1) == 0) {
684                     return true;
685                 }
686             }
687         }
688     }
689     return false;
690 }
691 
692 U_NAMESPACE_END
693 
694 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
695