1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2009-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 
12 #if !UCONFIG_NO_COLLATION
13 
14 #include "unicode/alphaindex.h"
15 #include "unicode/coll.h"
16 #include "unicode/localpointer.h"
17 #include "unicode/normalizer2.h"
18 #include "unicode/tblcoll.h"
19 #include "unicode/uchar.h"
20 #include "unicode/ulocdata.h"
21 #include "unicode/uniset.h"
22 #include "unicode/uobject.h"
23 #include "unicode/usetiter.h"
24 #include "unicode/utf16.h"
25 
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "uassert.h"
29 #include "uvector.h"
30 #include "uvectr64.h"
31 
32 //#include <string>
33 //#include <iostream>
34 
35 U_NAMESPACE_BEGIN
36 
37 namespace {
38 
39 /**
40  * Prefix string for Chinese index buckets.
41  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
42  */
43 const UChar BASE[1] = { 0xFDD0 };
44 const int32_t BASE_LENGTH = 1;
45 
46 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
47                                 const UnicodeString &one, const UnicodeString &other);
48 
49 }  // namespace
50 
51 static int32_t U_CALLCONV
52 collatorComparator(const void *context, const void *left, const void *right);
53 
54 static int32_t U_CALLCONV
55 recordCompareFn(const void *context, const void *left, const void *right);
56 
57 //  UVector<Record *> support function, delete a Record.
58 static void U_CALLCONV
alphaIndex_deleteRecord(void * obj)59 alphaIndex_deleteRecord(void *obj) {
60     delete static_cast<AlphabeticIndex::Record *>(obj);
61 }
62 
63 namespace {
64 
ownedString(const UnicodeString & s,LocalPointer<UnicodeString> & owned,UErrorCode & errorCode)65 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
66                            UErrorCode &errorCode) {
67     if (U_FAILURE(errorCode)) { return NULL; }
68     if (owned.isValid()) {
69         return owned.orphan();
70     }
71     UnicodeString *p = new UnicodeString(s);
72     if (p == NULL) {
73         errorCode = U_MEMORY_ALLOCATION_ERROR;
74     }
75     return p;
76 }
77 
getString(const UVector & list,int32_t i)78 inline UnicodeString *getString(const UVector &list, int32_t i) {
79     return static_cast<UnicodeString *>(list[i]);
80 }
81 
getBucket(const UVector & list,int32_t i)82 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
83     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
84 }
85 
getRecord(const UVector & list,int32_t i)86 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
87     return static_cast<AlphabeticIndex::Record *>(list[i]);
88 }
89 
90 /**
91  * Like Java Collections.binarySearch(List, String, Comparator).
92  *
93  * @return the index>=0 where the item was found,
94  *         or the index<0 for inserting the string at ~index in sorted order
95  */
binarySearch(const UVector & list,const UnicodeString & s,const Collator & coll)96 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
97     if (list.size() == 0) { return ~0; }
98     int32_t start = 0;
99     int32_t limit = list.size();
100     for (;;) {
101         int32_t i = (start + limit) / 2;
102         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
103         UErrorCode errorCode = U_ZERO_ERROR;
104         UCollationResult cmp = coll.compare(s, *si, errorCode);
105         if (cmp == UCOL_EQUAL) {
106             return i;
107         } else if (cmp < 0) {
108             if (i == start) {
109                 return ~start;  // insert s before *si
110             }
111             limit = i;
112         } else {
113             if (i == start) {
114                 return ~(start + 1);  // insert s after *si
115             }
116             start = i;
117         }
118     }
119 }
120 
121 }  // namespace
122 
123 // The BucketList is not in the anonymous namespace because only Clang
124 // seems to support its use in other classes from there.
125 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
126 class BucketList : public UObject {
127 public:
BucketList(UVector * bucketList,UVector * publicBucketList)128     BucketList(UVector *bucketList, UVector *publicBucketList)
129             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
130         int32_t displayIndex = 0;
131         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
132             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
133         }
134     }
135 
136     // The virtual destructor must not be inline.
137     // See ticket #8454 for details.
138     virtual ~BucketList();
139 
getBucketCount() const140     int32_t getBucketCount() const {
141         return immutableVisibleList_->size();
142     }
143 
getBucketIndex(const UnicodeString & name,const Collator & collatorPrimaryOnly,UErrorCode & errorCode)144     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
145                            UErrorCode &errorCode) {
146         // binary search
147         int32_t start = 0;
148         int32_t limit = bucketList_->size();
149         while ((start + 1) < limit) {
150             int32_t i = (start + limit) / 2;
151             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
152             UCollationResult nameVsBucket =
153                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
154             if (nameVsBucket < 0) {
155                 limit = i;
156             } else {
157                 start = i;
158             }
159         }
160         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
161         if (bucket->displayBucket_ != NULL) {
162             bucket = bucket->displayBucket_;
163         }
164         return bucket->displayIndex_;
165     }
166 
167     /** All of the buckets, visible and invisible. */
168     UVector *bucketList_;
169     /** Just the visible buckets. */
170     UVector *immutableVisibleList_;
171 };
172 
~BucketList()173 BucketList::~BucketList() {
174     delete bucketList_;
175     if (immutableVisibleList_ != bucketList_) {
176         delete immutableVisibleList_;
177     }
178 }
179 
~ImmutableIndex()180 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
181     delete buckets_;
182     delete collatorPrimaryOnly_;
183 }
184 
185 int32_t
getBucketCount() const186 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
187     return buckets_->getBucketCount();
188 }
189 
190 int32_t
getBucketIndex(const UnicodeString & name,UErrorCode & errorCode) const191 AlphabeticIndex::ImmutableIndex::getBucketIndex(
192         const UnicodeString &name, UErrorCode &errorCode) const {
193     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
194 }
195 
196 const AlphabeticIndex::Bucket *
getBucket(int32_t index) const197 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
198     if (0 <= index && index < buckets_->getBucketCount()) {
199         return icu::getBucket(*buckets_->immutableVisibleList_, index);
200     } else {
201         return NULL;
202     }
203 }
204 
AlphabeticIndex(const Locale & locale,UErrorCode & status)205 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
206         : inputList_(NULL),
207           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
208           maxLabelCount_(99),
209           initialLabels_(NULL), firstCharsInScripts_(NULL),
210           collator_(NULL), collatorPrimaryOnly_(NULL),
211           buckets_(NULL) {
212     init(&locale, status);
213 }
214 
215 
AlphabeticIndex(RuleBasedCollator * collator,UErrorCode & status)216 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
217         : inputList_(NULL),
218           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
219           maxLabelCount_(99),
220           initialLabels_(NULL), firstCharsInScripts_(NULL),
221           collator_(collator), collatorPrimaryOnly_(NULL),
222           buckets_(NULL) {
223     init(NULL, status);
224 }
225 
226 
227 
~AlphabeticIndex()228 AlphabeticIndex::~AlphabeticIndex() {
229     delete collator_;
230     delete collatorPrimaryOnly_;
231     delete firstCharsInScripts_;
232     delete buckets_;
233     delete inputList_;
234     delete initialLabels_;
235 }
236 
237 
addLabels(const UnicodeSet & additions,UErrorCode & status)238 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
239     if (U_FAILURE(status)) {
240         return *this;
241     }
242     initialLabels_->addAll(additions);
243     clearBuckets();
244     return *this;
245 }
246 
247 
addLabels(const Locale & locale,UErrorCode & status)248 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
249     addIndexExemplars(locale, status);
250     clearBuckets();
251     return *this;
252 }
253 
254 
buildImmutableIndex(UErrorCode & errorCode)255 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
256     if (U_FAILURE(errorCode)) { return NULL; }
257     // In C++, the ImmutableIndex must own its copy of the BucketList,
258     // even if it contains no records, for proper memory management.
259     // We could clone the buckets_ if they are not NULL,
260     // but that would be worth it only if this method is called multiple times,
261     // or called after using the old-style bucket iterator API.
262     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
263     LocalPointer<RuleBasedCollator> coll(collatorPrimaryOnly_->clone());
264     if (immutableBucketList.isNull() || coll.isNull()) {
265         errorCode = U_MEMORY_ALLOCATION_ERROR;
266         return NULL;
267     }
268     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
269     if (immIndex == NULL) {
270         errorCode = U_MEMORY_ALLOCATION_ERROR;
271         return NULL;
272     }
273     // The ImmutableIndex adopted its parameter objects.
274     immutableBucketList.orphan();
275     coll.orphan();
276     return immIndex;
277 }
278 
getBucketCount(UErrorCode & status)279 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
280     initBuckets(status);
281     if (U_FAILURE(status)) {
282         return 0;
283     }
284     return buckets_->getBucketCount();
285 }
286 
287 
getRecordCount(UErrorCode & status)288 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
289     if (U_FAILURE(status) || inputList_ == NULL) {
290         return 0;
291     }
292     return inputList_->size();
293 }
294 
initLabels(UVector & indexCharacters,UErrorCode & errorCode) const295 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
296     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
297     if (U_FAILURE(errorCode)) { return; }
298 
299     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
300     const UnicodeString &overflowBoundary =
301         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
302 
303     // We make a sorted array of elements.
304     // Some of the input may be redundant.
305     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
306     // We filter out those cases.
307     UnicodeSetIterator iter(*initialLabels_);
308     while (iter.next()) {
309         const UnicodeString *item = &iter.getString();
310         LocalPointer<UnicodeString> ownedItem;
311         UBool checkDistinct;
312         int32_t itemLength = item->length();
313         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
314             checkDistinct = FALSE;
315         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
316                 item->charAt(itemLength - 2) != 0x2a) {
317             // Use a label if it is marked with one trailing star,
318             // even if the label string sorts the same when all contractions are suppressed.
319             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
320             item = ownedItem.getAlias();
321             if (item == NULL) {
322                 errorCode = U_MEMORY_ALLOCATION_ERROR;
323                 return;
324             }
325             checkDistinct = FALSE;
326         } else {
327             checkDistinct = TRUE;
328         }
329         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
330             // Ignore a primary-ignorable or non-alphabetic index character.
331         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
332             // Ignore an index character that will land in the overflow bucket.
333         } else if (checkDistinct &&
334                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
335             // Ignore a multi-code point index character that does not sort distinctly
336             // from the sequence of its separate characters.
337         } else {
338             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
339             if (insertionPoint < 0) {
340                 indexCharacters.insertElementAt(
341                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
342             } else {
343                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
344                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
345                     indexCharacters.setElementAt(
346                         ownedString(*item, ownedItem, errorCode), insertionPoint);
347                 }
348             }
349         }
350     }
351     if (U_FAILURE(errorCode)) { return; }
352 
353     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
354 
355     int32_t size = indexCharacters.size() - 1;
356     if (size > maxLabelCount_) {
357         int32_t count = 0;
358         int32_t old = -1;
359         for (int32_t i = 0; i < indexCharacters.size();) {
360             ++count;
361             int32_t bump = count * maxLabelCount_ / size;
362             if (bump == old) {
363                 indexCharacters.removeElementAt(i);
364             } else {
365                 old = bump;
366                 ++i;
367             }
368         }
369     }
370 }
371 
372 namespace {
373 
fixLabel(const UnicodeString & current,UnicodeString & temp)374 const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
375     if (!current.startsWith(BASE, BASE_LENGTH)) {
376         return current;
377     }
378     UChar rest = current.charAt(BASE_LENGTH);
379     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
380         int32_t count = rest-0x2800;
381         temp.setTo((UChar)(0x30 + count % 10));
382         if (count >= 10) {
383             count /= 10;
384             temp.insert(0, (UChar)(0x30 + count % 10));
385             if (count >= 10) {
386                 count /= 10;
387                 temp.insert(0, (UChar)(0x30 + count));
388             }
389         }
390         return temp.append((UChar)0x5283);
391     }
392     return temp.setTo(current, BASE_LENGTH);
393 }
394 
hasMultiplePrimaryWeights(const RuleBasedCollator & coll,uint32_t variableTop,const UnicodeString & s,UVector64 & ces,UErrorCode & errorCode)395 UBool hasMultiplePrimaryWeights(
396         const RuleBasedCollator &coll, uint32_t variableTop,
397         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
398     ces.removeAllElements();
399     coll.internalGetCEs(s, ces, errorCode);
400     if (U_FAILURE(errorCode)) { return FALSE; }
401     UBool seenPrimary = FALSE;
402     for (int32_t i = 0; i < ces.size(); ++i) {
403         int64_t ce = ces.elementAti(i);
404         uint32_t p = (uint32_t)(ce >> 32);
405         if (p > variableTop) {
406             // not primary ignorable
407             if (seenPrimary) {
408                 return TRUE;
409             }
410             seenPrimary = TRUE;
411         }
412     }
413     return FALSE;
414 }
415 
416 }  // namespace
417 
createBucketList(UErrorCode & errorCode) const418 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
419     // Initialize indexCharacters.
420     UVector indexCharacters(errorCode);
421     indexCharacters.setDeleter(uprv_deleteUObject);
422     initLabels(indexCharacters, errorCode);
423     if (U_FAILURE(errorCode)) { return NULL; }
424 
425     // Variables for hasMultiplePrimaryWeights().
426     UVector64 ces(errorCode);
427     uint32_t variableTop;
428     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
429         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
430     } else {
431         variableTop = 0;
432     }
433     UBool hasInvisibleBuckets = FALSE;
434 
435     // Helper arrays for Chinese Pinyin collation.
436     Bucket *asciiBuckets[26] = {
437         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
438         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
439     };
440     Bucket *pinyinBuckets[26] = {
441         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
442         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
443     };
444     UBool hasPinyin = FALSE;
445 
446     LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
447     if (U_FAILURE(errorCode)) {
448         return NULL;
449     }
450     bucketList->setDeleter(uprv_deleteUObject);
451 
452     // underflow bucket
453     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
454     if (bucket == NULL) {
455         errorCode = U_MEMORY_ALLOCATION_ERROR;
456         return NULL;
457     }
458     bucketList->addElement(bucket, errorCode);
459     if (U_FAILURE(errorCode)) { return NULL; }
460 
461     UnicodeString temp;
462 
463     // fix up the list, adding underflow, additions, overflow
464     // Insert inflow labels as needed.
465     int32_t scriptIndex = -1;
466     const UnicodeString *scriptUpperBoundary = &emptyString_;
467     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
468         UnicodeString &current = *getString(indexCharacters, i);
469         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
470             // We crossed the script boundary into a new script.
471             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
472             UBool skippedScript = FALSE;
473             for (;;) {
474                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
475                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
476                     break;
477                 }
478                 skippedScript = TRUE;
479             }
480             if (skippedScript && bucketList->size() > 1) {
481                 // We are skipping one or more scripts,
482                 // and we are not just getting out of the underflow label.
483                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
484                 if (bucket == NULL) {
485                     errorCode = U_MEMORY_ALLOCATION_ERROR;
486                     return NULL;
487                 }
488                 bucketList->addElement(bucket, errorCode);
489             }
490         }
491         // Add a bucket with the current label.
492         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
493         if (bucket == NULL) {
494             errorCode = U_MEMORY_ALLOCATION_ERROR;
495             return NULL;
496         }
497         bucketList->addElement(bucket, errorCode);
498         // Remember ASCII and Pinyin buckets for Pinyin redirects.
499         UChar c;
500         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
501             asciiBuckets[c - 0x41] = bucket;
502         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
503                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
504             pinyinBuckets[c - 0x41] = bucket;
505             hasPinyin = TRUE;
506         }
507         // Check for multiple primary weights.
508         if (!current.startsWith(BASE, BASE_LENGTH) &&
509                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
510                                           ces, errorCode) &&
511                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
512             // "AE-ligature" or "Sch" etc.
513             for (int32_t j = bucketList->size() - 2;; --j) {
514                 Bucket *singleBucket = getBucket(*bucketList, j);
515                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
516                     // There is no single-character bucket since the last
517                     // underflow or inflow label.
518                     break;
519                 }
520                 if (singleBucket->displayBucket_ == NULL &&
521                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
522                                                    singleBucket->lowerBoundary_,
523                                                    ces, errorCode)) {
524                     // Add an invisible bucket that redirects strings greater than the expansion
525                     // to the previous single-character bucket.
526                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
527                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
528                     bucket = new Bucket(emptyString_,
529                         UnicodeString(current).append((UChar)0xFFFF),
530                         U_ALPHAINDEX_NORMAL);
531                     if (bucket == NULL) {
532                         errorCode = U_MEMORY_ALLOCATION_ERROR;
533                         return NULL;
534                     }
535                     bucket->displayBucket_ = singleBucket;
536                     bucketList->addElement(bucket, errorCode);
537                     hasInvisibleBuckets = TRUE;
538                     break;
539                 }
540             }
541         }
542     }
543     if (U_FAILURE(errorCode)) { return NULL; }
544     if (bucketList->size() == 1) {
545         // No real labels, show only the underflow label.
546         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
547         if (bl == NULL) {
548             errorCode = U_MEMORY_ALLOCATION_ERROR;
549             return NULL;
550         }
551         bucketList.orphan();
552         return bl;
553     }
554     // overflow bucket
555     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
556     if (bucket == NULL) {
557         errorCode = U_MEMORY_ALLOCATION_ERROR;
558         return NULL;
559     }
560     bucketList->addElement(bucket, errorCode); // final
561 
562     if (hasPinyin) {
563         // Redirect Pinyin buckets.
564         Bucket *asciiBucket = NULL;
565         for (int32_t i = 0; i < 26; ++i) {
566             if (asciiBuckets[i] != NULL) {
567                 asciiBucket = asciiBuckets[i];
568             }
569             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
570                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
571                 hasInvisibleBuckets = TRUE;
572             }
573         }
574     }
575 
576     if (U_FAILURE(errorCode)) { return NULL; }
577     if (!hasInvisibleBuckets) {
578         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
579         if (bl == NULL) {
580             errorCode = U_MEMORY_ALLOCATION_ERROR;
581             return NULL;
582         }
583         bucketList.orphan();
584         return bl;
585     }
586     // Merge inflow buckets that are visually adjacent.
587     // Iterate backwards: Merge inflow into overflow rather than the other way around.
588     int32_t i = bucketList->size() - 1;
589     Bucket *nextBucket = getBucket(*bucketList, i);
590     while (--i > 0) {
591         bucket = getBucket(*bucketList, i);
592         if (bucket->displayBucket_ != NULL) {
593             continue;  // skip invisible buckets
594         }
595         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
596             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
597                 bucket->displayBucket_ = nextBucket;
598                 continue;
599             }
600         }
601         nextBucket = bucket;
602     }
603 
604     LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
605     if (U_FAILURE(errorCode)) {
606         return NULL;
607     }
608     // Do not call publicBucketList->setDeleter():
609     // This vector shares its objects with the bucketList.
610     for (int32_t j = 0; j < bucketList->size(); ++j) {
611         bucket = getBucket(*bucketList, j);
612         if (bucket->displayBucket_ == NULL) {
613             publicBucketList->addElement(bucket, errorCode);
614         }
615     }
616     if (U_FAILURE(errorCode)) { return NULL; }
617     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
618     if (bl == NULL) {
619         errorCode = U_MEMORY_ALLOCATION_ERROR;
620         return NULL;
621     }
622     bucketList.orphan();
623     publicBucketList.orphan();
624     return bl;
625 }
626 
627 /**
628  * Creates an index, and buckets and sorts the list of records into the index.
629  */
initBuckets(UErrorCode & errorCode)630 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
631     if (U_FAILURE(errorCode) || buckets_ != NULL) {
632         return;
633     }
634     buckets_ = createBucketList(errorCode);
635     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
636         return;
637     }
638 
639     // Sort the records by name.
640     // Stable sort preserves input order of collation duplicates.
641     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
642 
643     // Now, we traverse all of the input, which is now sorted.
644     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
645     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
646     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
647     // we need to improve it for that case.
648 
649     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
650     int32_t bucketIndex = 1;
651     Bucket *nextBucket;
652     const UnicodeString *upperBoundary;
653     if (bucketIndex < buckets_->bucketList_->size()) {
654         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
655         upperBoundary = &nextBucket->lowerBoundary_;
656     } else {
657         nextBucket = NULL;
658         upperBoundary = NULL;
659     }
660     for (int32_t i = 0; i < inputList_->size(); ++i) {
661         Record *r = getRecord(*inputList_, i);
662         // if the current bucket isn't the right one, find the one that is
663         // We have a special flag for the last bucket so that we don't look any further
664         while (upperBoundary != NULL &&
665                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
666             currentBucket = nextBucket;
667             // now reset the boundary that we compare against
668             if (bucketIndex < buckets_->bucketList_->size()) {
669                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
670                 upperBoundary = &nextBucket->lowerBoundary_;
671             } else {
672                 upperBoundary = NULL;
673             }
674         }
675         // now put the record into the bucket.
676         Bucket *bucket = currentBucket;
677         if (bucket->displayBucket_ != NULL) {
678             bucket = bucket->displayBucket_;
679         }
680         if (bucket->records_ == NULL) {
681             bucket->records_ = new UVector(errorCode);
682             if (bucket->records_ == NULL) {
683                 errorCode = U_MEMORY_ALLOCATION_ERROR;
684                 return;
685             }
686         }
687         bucket->records_->addElement(r, errorCode);
688     }
689 }
690 
clearBuckets()691 void AlphabeticIndex::clearBuckets() {
692     if (buckets_ != NULL) {
693         delete buckets_;
694         buckets_ = NULL;
695         internalResetBucketIterator();
696     }
697 }
698 
internalResetBucketIterator()699 void AlphabeticIndex::internalResetBucketIterator() {
700     labelsIterIndex_ = -1;
701     currentBucket_ = NULL;
702 }
703 
704 
addIndexExemplars(const Locale & locale,UErrorCode & status)705 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
706     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
707     if (U_FAILURE(status)) {
708         return;
709     }
710 
711     UnicodeSet exemplars;
712     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
713     if (U_SUCCESS(status)) {
714         initialLabels_->addAll(exemplars);
715         return;
716     }
717     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
718 
719     // The locale data did not include explicit Index characters.
720     // Synthesize a set of them from the locale's standard exemplar characters.
721     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
722     if (U_FAILURE(status)) {
723         return;
724     }
725 
726     // question: should we add auxiliary exemplars?
727     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) {
728         exemplars.add(0x61, 0x7A);
729     }
730     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
731         // cut down to small list
732         exemplars.remove(0xAC00, 0xD7A3).
733             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
734             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
735             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
736             add(0xD30C).add(0xD558);
737     }
738     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
739         // cut down to small list
740         // make use of the fact that Ethiopic is allocated in 8's, where
741         // the base is 0 mod 8.
742         UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status);
743         ethiopic.retainAll(exemplars);
744         exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic);
745     }
746 
747     // Upper-case any that aren't already so.
748     //   (We only do this for synthesized index characters.)
749     UnicodeSetIterator it(exemplars);
750     UnicodeString upperC;
751     while (it.next()) {
752         const UnicodeString &exemplarC = it.getString();
753         upperC = exemplarC;
754         upperC.toUpper(locale);
755         initialLabels_->add(upperC);
756     }
757 }
758 
addChineseIndexCharacters(UErrorCode & errorCode)759 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
760     UnicodeSet contractions;
761     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
762     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
763     initialLabels_->addAll(contractions);
764     UnicodeSetIterator iter(contractions);
765     while (iter.next()) {
766         const UnicodeString &s = iter.getString();
767         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
768         UChar c = s.charAt(s.length() - 1);
769         if (0x41 <= c && c <= 0x5A) {  // A-Z
770             // There are Pinyin labels, add ASCII A-Z labels as well.
771             initialLabels_->add(0x41, 0x5A);  // A-Z
772             break;
773         }
774     }
775     return TRUE;
776 }
777 
778 
779 /*
780  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
781  */
782 static const UChar CGJ = 0x034F;
separated(const UnicodeString & item)783 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
784     UnicodeString result;
785     if (item.length() == 0) {
786         return result;
787     }
788     int32_t i = 0;
789     for (;;) {
790         UChar32  cp = item.char32At(i);
791         result.append(cp);
792         i = item.moveIndex32(i, 1);
793         if (i >= item.length()) {
794             break;
795         }
796         result.append(CGJ);
797     }
798     return result;
799 }
800 
801 
operator ==(const AlphabeticIndex &) const802 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
803     return FALSE;
804 }
805 
806 
operator !=(const AlphabeticIndex &) const807 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
808     return FALSE;
809 }
810 
811 
getCollator() const812 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
813     return *collator_;
814 }
815 
816 
getInflowLabel() const817 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
818     return inflowLabel_;
819 }
820 
getOverflowLabel() const821 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
822     return overflowLabel_;
823 }
824 
825 
getUnderflowLabel() const826 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
827     return underflowLabel_;
828 }
829 
830 
setInflowLabel(const UnicodeString & label,UErrorCode &)831 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
832     inflowLabel_ = label;
833     clearBuckets();
834     return *this;
835 }
836 
837 
setOverflowLabel(const UnicodeString & label,UErrorCode &)838 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
839     overflowLabel_ = label;
840     clearBuckets();
841     return *this;
842 }
843 
844 
setUnderflowLabel(const UnicodeString & label,UErrorCode &)845 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
846     underflowLabel_ = label;
847     clearBuckets();
848     return *this;
849 }
850 
851 
getMaxLabelCount() const852 int32_t AlphabeticIndex::getMaxLabelCount() const {
853     return maxLabelCount_;
854 }
855 
856 
setMaxLabelCount(int32_t maxLabelCount,UErrorCode & status)857 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
858     if (U_FAILURE(status)) {
859         return *this;
860     }
861     if (maxLabelCount <= 0) {
862         status = U_ILLEGAL_ARGUMENT_ERROR;
863         return *this;
864     }
865     maxLabelCount_ = maxLabelCount;
866     clearBuckets();
867     return *this;
868 }
869 
870 
871 //
872 //  init() - Common code for constructors.
873 //
874 
init(const Locale * locale,UErrorCode & status)875 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
876     if (U_FAILURE(status)) { return; }
877     if (locale == NULL && collator_ == NULL) {
878         status = U_ILLEGAL_ARGUMENT_ERROR;
879         return;
880     }
881 
882     initialLabels_         = new UnicodeSet();
883     if (initialLabels_ == NULL) {
884         status = U_MEMORY_ALLOCATION_ERROR;
885         return;
886     }
887 
888     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
889     overflowLabel_ = inflowLabel_;
890     underflowLabel_ = inflowLabel_;
891 
892     if (collator_ == NULL) {
893         Collator *coll = Collator::createInstance(*locale, status);
894         if (U_FAILURE(status)) {
895             delete coll;
896             return;
897         }
898         if (coll == NULL) {
899             status = U_MEMORY_ALLOCATION_ERROR;
900             return;
901         }
902         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
903         if (collator_ == NULL) {
904             delete coll;
905             status = U_UNSUPPORTED_ERROR;
906             return;
907         }
908     }
909     collatorPrimaryOnly_ = collator_->clone();
910     if (collatorPrimaryOnly_ == NULL) {
911         status = U_MEMORY_ALLOCATION_ERROR;
912         return;
913     }
914     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
915     firstCharsInScripts_ = firstStringsInScript(status);
916     if (U_FAILURE(status)) { return; }
917     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
918     // Guard against a degenerate collator where
919     // some script boundary strings are primary ignorable.
920     for (;;) {
921         if (U_FAILURE(status)) { return; }
922         if (firstCharsInScripts_->isEmpty()) {
923             // AlphabeticIndex requires some non-ignorable script boundary strings.
924             status = U_ILLEGAL_ARGUMENT_ERROR;
925             return;
926         }
927         if (collatorPrimaryOnly_->compare(
928                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
929                 emptyString_, status) == UCOL_EQUAL) {
930             firstCharsInScripts_->removeElementAt(0);
931         } else {
932             break;
933         }
934     }
935 
936     // Chinese index characters, which are specific to each of the several Chinese tailorings,
937     // take precedence over the single locale data exemplar set per language.
938     if (!addChineseIndexCharacters(status) && locale != NULL) {
939         addIndexExemplars(*locale, status);
940     }
941 }
942 
943 
944 //
945 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
946 //
947 static int32_t U_CALLCONV
collatorComparator(const void * context,const void * left,const void * right)948 collatorComparator(const void *context, const void *left, const void *right) {
949     const UElement *leftElement = static_cast<const UElement *>(left);
950     const UElement *rightElement = static_cast<const UElement *>(right);
951     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
952     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
953 
954     if (leftString == rightString) {
955         // Catches case where both are NULL
956         return 0;
957     }
958     if (leftString == NULL) {
959         return 1;
960     }
961     if (rightString == NULL) {
962         return -1;
963     }
964     const Collator *col = static_cast<const Collator *>(context);
965     UErrorCode errorCode = U_ZERO_ERROR;
966     return col->compare(*leftString, *rightString, errorCode);
967 }
968 
969 //
970 //  Comparison function for UVector<Record *> sorting with a collator.
971 //
972 static int32_t U_CALLCONV
recordCompareFn(const void * context,const void * left,const void * right)973 recordCompareFn(const void *context, const void *left, const void *right) {
974     const UElement *leftElement = static_cast<const UElement *>(left);
975     const UElement *rightElement = static_cast<const UElement *>(right);
976     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
977     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
978     const Collator *col = static_cast<const Collator *>(context);
979     UErrorCode errorCode = U_ZERO_ERROR;
980     return col->compare(leftRec->name_, rightRec->name_, errorCode);
981 }
982 
firstStringsInScript(UErrorCode & status)983 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
984     if (U_FAILURE(status)) {
985         return NULL;
986     }
987     LocalPointer<UVector> dest(new UVector(status), status);
988     if (U_FAILURE(status)) {
989         return NULL;
990     }
991     dest->setDeleter(uprv_deleteUObject);
992     // Fetch the script-first-primary contractions which are defined in the root collator.
993     // They all start with U+FDD1.
994     UnicodeSet set;
995     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
996     if (U_FAILURE(status)) {
997         return NULL;
998     }
999     if (set.isEmpty()) {
1000         status = U_UNSUPPORTED_ERROR;
1001         return NULL;
1002     }
1003     UnicodeSetIterator iter(set);
1004     while (iter.next()) {
1005         const UnicodeString &boundary = iter.getString();
1006         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1007         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1008             // Ignore boundaries for the special reordering groups.
1009             // Take only those for "real scripts" (where the sample character is a Letter,
1010             // and the one for unassigned implicit weights (Cn).
1011             continue;
1012         }
1013         UnicodeString *s = new UnicodeString(boundary);
1014         if (s == NULL) {
1015             status = U_MEMORY_ALLOCATION_ERROR;
1016             return NULL;
1017         }
1018         dest->addElement(s, status);
1019     }
1020     return dest.orphan();
1021 }
1022 
1023 
1024 namespace {
1025 
1026 /**
1027  * Returns true if one index character string is "better" than the other.
1028  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1029  * better, and otherwise binary-less-than is better.
1030  */
isOneLabelBetterThanOther(const Normalizer2 & nfkdNormalizer,const UnicodeString & one,const UnicodeString & other)1031 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1032                                 const UnicodeString &one, const UnicodeString &other) {
1033     // This is called with primary-equal strings, but never with one.equals(other).
1034     UErrorCode status = U_ZERO_ERROR;
1035     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1036     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1037     if (U_FAILURE(status)) { return FALSE; }
1038     int32_t result = n1.countChar32() - n2.countChar32();
1039     if (result != 0) {
1040         return result < 0;
1041     }
1042     result = n1.compareCodePointOrder(n2);
1043     if (result != 0) {
1044         return result < 0;
1045     }
1046     return one.compareCodePointOrder(other) < 0;
1047 }
1048 
1049 }  // namespace
1050 
1051 //
1052 //  Constructor & Destructor for AlphabeticIndex::Record
1053 //
1054 //     Records are internal only, instances are not directly surfaced in the public API.
1055 //     This class is mostly struct-like, with all public fields.
1056 
Record(const UnicodeString & name,const void * data)1057 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1058         : name_(name), data_(data) {}
1059 
~Record()1060 AlphabeticIndex::Record::~Record() {
1061 }
1062 
1063 
addRecord(const UnicodeString & name,const void * data,UErrorCode & status)1064 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1065     if (U_FAILURE(status)) {
1066         return *this;
1067     }
1068     if (inputList_ == NULL) {
1069         inputList_ = new UVector(status);
1070         if (inputList_ == NULL) {
1071             status = U_MEMORY_ALLOCATION_ERROR;
1072             return *this;
1073         }
1074         inputList_->setDeleter(alphaIndex_deleteRecord);
1075     }
1076     Record *r = new Record(name, data);
1077     if (r == NULL) {
1078         status = U_MEMORY_ALLOCATION_ERROR;
1079         return *this;
1080     }
1081     inputList_->addElement(r, status);
1082     clearBuckets();
1083     //std::string ss;
1084     //std::string ss2;
1085     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1086     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1087     return *this;
1088 }
1089 
1090 
clearRecords(UErrorCode & status)1091 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1092     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1093         inputList_->removeAllElements();
1094         clearBuckets();
1095     }
1096     return *this;
1097 }
1098 
getBucketIndex(const UnicodeString & name,UErrorCode & status)1099 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1100     initBuckets(status);
1101     if (U_FAILURE(status)) {
1102         return 0;
1103     }
1104     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1105 }
1106 
1107 
getBucketIndex() const1108 int32_t AlphabeticIndex::getBucketIndex() const {
1109     return labelsIterIndex_;
1110 }
1111 
1112 
nextBucket(UErrorCode & status)1113 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1114     if (U_FAILURE(status)) {
1115         return FALSE;
1116     }
1117     if (buckets_ == NULL && currentBucket_ != NULL) {
1118         status = U_ENUM_OUT_OF_SYNC_ERROR;
1119         return FALSE;
1120     }
1121     initBuckets(status);
1122     if (U_FAILURE(status)) {
1123         return FALSE;
1124     }
1125     ++labelsIterIndex_;
1126     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1127         labelsIterIndex_ = buckets_->getBucketCount();
1128         return FALSE;
1129     }
1130     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1131     resetRecordIterator();
1132     return TRUE;
1133 }
1134 
getBucketLabel() const1135 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1136     if (currentBucket_ != NULL) {
1137         return currentBucket_->label_;
1138     } else {
1139         return emptyString_;
1140     }
1141 }
1142 
1143 
getBucketLabelType() const1144 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1145     if (currentBucket_ != NULL) {
1146         return currentBucket_->labelType_;
1147     } else {
1148         return U_ALPHAINDEX_NORMAL;
1149     }
1150 }
1151 
1152 
getBucketRecordCount() const1153 int32_t AlphabeticIndex::getBucketRecordCount() const {
1154     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1155         return currentBucket_->records_->size();
1156     } else {
1157         return 0;
1158     }
1159 }
1160 
1161 
resetBucketIterator(UErrorCode & status)1162 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1163     if (U_FAILURE(status)) {
1164         return *this;
1165     }
1166     internalResetBucketIterator();
1167     return *this;
1168 }
1169 
1170 
nextRecord(UErrorCode & status)1171 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1172     if (U_FAILURE(status)) {
1173         return FALSE;
1174     }
1175     if (currentBucket_ == NULL) {
1176         // We are trying to iterate over the items in a bucket, but there is no
1177         // current bucket from the enumeration of buckets.
1178         status = U_INVALID_STATE_ERROR;
1179         return FALSE;
1180     }
1181     if (buckets_ == NULL) {
1182         status = U_ENUM_OUT_OF_SYNC_ERROR;
1183         return FALSE;
1184     }
1185     if (currentBucket_->records_ == NULL) {
1186         return FALSE;
1187     }
1188     ++itemsIterIndex_;
1189     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1190         itemsIterIndex_  = currentBucket_->records_->size();
1191         return FALSE;
1192     }
1193     return TRUE;
1194 }
1195 
1196 
getRecordName() const1197 const UnicodeString &AlphabeticIndex::getRecordName() const {
1198     const UnicodeString *retStr = &emptyString_;
1199     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1200         itemsIterIndex_ >= 0 &&
1201         itemsIterIndex_ < currentBucket_->records_->size()) {
1202             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1203             retStr = &item->name_;
1204     }
1205     return *retStr;
1206 }
1207 
getRecordData() const1208 const void *AlphabeticIndex::getRecordData() const {
1209     const void *retPtr = NULL;
1210     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1211         itemsIterIndex_ >= 0 &&
1212         itemsIterIndex_ < currentBucket_->records_->size()) {
1213             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1214             retPtr = item->data_;
1215     }
1216     return retPtr;
1217 }
1218 
1219 
resetRecordIterator()1220 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1221     itemsIterIndex_ = -1;
1222     return *this;
1223 }
1224 
1225 
1226 
Bucket(const UnicodeString & label,const UnicodeString & lowerBoundary,UAlphabeticIndexLabelType type)1227 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1228                                 const UnicodeString &lowerBoundary,
1229                                 UAlphabeticIndexLabelType type)
1230         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1231           displayBucket_(NULL), displayIndex_(-1),
1232           records_(NULL) {
1233 }
1234 
1235 
~Bucket()1236 AlphabeticIndex::Bucket::~Bucket() {
1237     delete records_;
1238 }
1239 
1240 U_NAMESPACE_END
1241 
1242 #endif  // !UCONFIG_NO_COLLATION
1243