1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2013-2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * collationrootelements.cpp
9 *
10 * created on: 2013mar05
11 * created by: Markus W. Scherer
12 */
13 
14 #include "unicode/utypes.h"
15 
16 #if !UCONFIG_NO_COLLATION
17 
18 #include "collation.h"
19 #include "collationrootelements.h"
20 #include "uassert.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 int64_t
lastCEWithPrimaryBefore(uint32_t p) const25 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
26     if(p == 0) { return 0; }
27     U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
28     int32_t index = findP(p);
29     uint32_t q = elements[index];
30     uint32_t secTer;
31     if(p == (q & 0xffffff00)) {
32         // p == elements[index] is a root primary. Find the CE before it.
33         // We must not be in a primary range.
34         U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
35         secTer = elements[index - 1];
36         if((secTer & SEC_TER_DELTA_FLAG) == 0) {
37             // Primary CE just before p.
38             p = secTer & 0xffffff00;
39             secTer = Collation::COMMON_SEC_AND_TER_CE;
40         } else {
41             // secTer = last secondary & tertiary for the previous primary
42             index -= 2;
43             for(;;) {
44                 p = elements[index];
45                 if((p & SEC_TER_DELTA_FLAG) == 0) {
46                     p &= 0xffffff00;
47                     break;
48                 }
49                 --index;
50             }
51         }
52     } else {
53         // p > elements[index] which is the previous primary.
54         // Find the last secondary & tertiary weights for it.
55         p = q & 0xffffff00;
56         secTer = Collation::COMMON_SEC_AND_TER_CE;
57         for(;;) {
58             q = elements[++index];
59             if((q & SEC_TER_DELTA_FLAG) == 0) {
60                 // We must not be in a primary range.
61                 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
62                 break;
63             }
64             secTer = q;
65         }
66     }
67     return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
68 }
69 
70 int64_t
firstCEWithPrimaryAtLeast(uint32_t p) const71 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
72     if(p == 0) { return 0; }
73     int32_t index = findP(p);
74     if(p != (elements[index] & 0xffffff00)) {
75         for(;;) {
76             p = elements[++index];
77             if((p & SEC_TER_DELTA_FLAG) == 0) {
78                 // First primary after p. We must not be in a primary range.
79                 U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
80                 break;
81             }
82         }
83     }
84     // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
85     return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE;
86 }
87 
88 uint32_t
getPrimaryBefore(uint32_t p,UBool isCompressible) const89 CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
90     int32_t index = findPrimary(p);
91     int32_t step;
92     uint32_t q = elements[index];
93     if(p == (q & 0xffffff00)) {
94         // Found p itself. Return the previous primary.
95         // See if p is at the end of a previous range.
96         step = (int32_t)q & PRIMARY_STEP_MASK;
97         if(step == 0) {
98             // p is not at the end of a range. Look for the previous primary.
99             do {
100                 p = elements[--index];
101             } while((p & SEC_TER_DELTA_FLAG) != 0);
102             return p & 0xffffff00;
103         }
104     } else {
105         // p is in a range, and not at the start.
106         uint32_t nextElement = elements[index + 1];
107         U_ASSERT(isEndOfPrimaryRange(nextElement));
108         step = (int32_t)nextElement & PRIMARY_STEP_MASK;
109     }
110     // Return the previous range primary.
111     if((p & 0xffff) == 0) {
112         return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
113     } else {
114         return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
115     }
116 }
117 
118 uint32_t
getSecondaryBefore(uint32_t p,uint32_t s) const119 CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
120     int32_t index;
121     uint32_t previousSec, sec;
122     if(p == 0) {
123         index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
124         // Gap at the beginning of the secondary CE range.
125         previousSec = 0;
126         sec = elements[index] >> 16;
127     } else {
128         index = findPrimary(p) + 1;
129         previousSec = Collation::BEFORE_WEIGHT16;
130         sec = getFirstSecTerForPrimary(index) >> 16;
131     }
132     U_ASSERT(s >= sec);
133     while(s > sec) {
134         previousSec = sec;
135         U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
136         sec = elements[index++] >> 16;
137     }
138     U_ASSERT(sec == s);
139     return previousSec;
140 }
141 
142 uint32_t
getTertiaryBefore(uint32_t p,uint32_t s,uint32_t t) const143 CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
144     U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
145     int32_t index;
146     uint32_t previousTer, secTer;
147     if(p == 0) {
148         if(s == 0) {
149             index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
150             // Gap at the beginning of the tertiary CE range.
151             previousTer = 0;
152         } else {
153             index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
154             previousTer = Collation::BEFORE_WEIGHT16;
155         }
156         secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
157     } else {
158         index = findPrimary(p) + 1;
159         previousTer = Collation::BEFORE_WEIGHT16;
160         secTer = getFirstSecTerForPrimary(index);
161     }
162     uint32_t st = (s << 16) | t;
163     while(st > secTer) {
164         if((secTer >> 16) == s) { previousTer = secTer; }
165         U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
166         secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
167     }
168     U_ASSERT(secTer == st);
169     return previousTer & 0xffff;
170 }
171 
172 uint32_t
getPrimaryAfter(uint32_t p,int32_t index,UBool isCompressible) const173 CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
174     U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
175     uint32_t q = elements[++index];
176     int32_t step;
177     if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) {
178         // Return the next primary in this range.
179         if((p & 0xffff) == 0) {
180             return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
181         } else {
182             return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
183         }
184     } else {
185         // Return the next primary in the list.
186         while((q & SEC_TER_DELTA_FLAG) != 0) {
187             q = elements[++index];
188         }
189         U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
190         return q;
191     }
192 }
193 
194 uint32_t
getSecondaryAfter(int32_t index,uint32_t s) const195 CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
196     uint32_t secTer;
197     uint32_t secLimit;
198     if(index == 0) {
199         // primary = 0
200         U_ASSERT(s != 0);
201         index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
202         secTer = elements[index];
203         // Gap at the end of the secondary CE range.
204         secLimit = 0x10000;
205     } else {
206         U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
207         secTer = getFirstSecTerForPrimary(index + 1);
208         // If this is an explicit sec/ter unit, then it will be read once more.
209         // Gap for secondaries of primary CEs.
210         secLimit = getSecondaryBoundary();
211     }
212     for(;;) {
213         uint32_t sec = secTer >> 16;
214         if(sec > s) { return sec; }
215         secTer = elements[++index];
216         if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
217     }
218 }
219 
220 uint32_t
getTertiaryAfter(int32_t index,uint32_t s,uint32_t t) const221 CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
222     uint32_t secTer;
223     uint32_t terLimit;
224     if(index == 0) {
225         // primary = 0
226         if(s == 0) {
227             U_ASSERT(t != 0);
228             index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
229             // Gap at the end of the tertiary CE range.
230             terLimit = 0x4000;
231         } else {
232             index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
233             // Gap for tertiaries of primary/secondary CEs.
234             terLimit = getTertiaryBoundary();
235         }
236         secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
237     } else {
238         U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
239         secTer = getFirstSecTerForPrimary(index + 1);
240         // If this is an explicit sec/ter unit, then it will be read once more.
241         terLimit = getTertiaryBoundary();
242     }
243     uint32_t st = (s << 16) | t;
244     for(;;) {
245         if(secTer > st) {
246             U_ASSERT((secTer >> 16) == s);
247             return secTer & 0xffff;
248         }
249         secTer = elements[++index];
250         // No tertiary greater than t for this primary+secondary.
251         if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
252         secTer &= ~SEC_TER_DELTA_FLAG;
253     }
254 }
255 
256 uint32_t
getFirstSecTerForPrimary(int32_t index) const257 CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
258     uint32_t secTer = elements[index];
259     if((secTer & SEC_TER_DELTA_FLAG) == 0) {
260         // No sec/ter delta.
261         return Collation::COMMON_SEC_AND_TER_CE;
262     }
263     secTer &= ~SEC_TER_DELTA_FLAG;
264     if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
265         // Implied sec/ter.
266         return Collation::COMMON_SEC_AND_TER_CE;
267     }
268     // Explicit sec/ter below common/common.
269     return secTer;
270 }
271 
272 int32_t
findPrimary(uint32_t p) const273 CollationRootElements::findPrimary(uint32_t p) const {
274     // Requirement: p must occur as a root primary.
275     U_ASSERT((p & 0xff) == 0);  // at most a 3-byte primary
276     int32_t index = findP(p);
277     // If p is in a range, then we just assume that p is an actual primary in this range.
278     // (Too cumbersome/expensive to check.)
279     // Otherwise, it must be an exact match.
280     U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
281     return index;
282 }
283 
284 int32_t
findP(uint32_t p) const285 CollationRootElements::findP(uint32_t p) const {
286     // p need not occur as a root primary.
287     // For example, it might be a reordering group boundary.
288     U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
289     // modified binary search
290     int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX];
291     U_ASSERT(p >= elements[start]);
292     int32_t limit = length - 1;
293     U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
294     U_ASSERT(p < elements[limit]);
295     while((start + 1) < limit) {
296         // Invariant: elements[start] and elements[limit] are primaries,
297         // and elements[start]<=p<=elements[limit].
298         int32_t i = (start + limit) / 2;
299         uint32_t q = elements[i];
300         if((q & SEC_TER_DELTA_FLAG) != 0) {
301             // Find the next primary.
302             int32_t j = i + 1;
303             for(;;) {
304                 if(j == limit) { break; }
305                 q = elements[j];
306                 if((q & SEC_TER_DELTA_FLAG) == 0) {
307                     i = j;
308                     break;
309                 }
310                 ++j;
311             }
312             if((q & SEC_TER_DELTA_FLAG) != 0) {
313                 // Find the preceding primary.
314                 j = i - 1;
315                 for(;;) {
316                     if(j == start) { break; }
317                     q = elements[j];
318                     if((q & SEC_TER_DELTA_FLAG) == 0) {
319                         i = j;
320                         break;
321                     }
322                     --j;
323                 }
324                 if((q & SEC_TER_DELTA_FLAG) != 0) {
325                     // No primary between start and limit.
326                     break;
327                 }
328             }
329         }
330         if(p < (q & 0xffffff00)) {  // Reset the "step" bits of a range end primary.
331             limit = i;
332         } else {
333             start = i;
334         }
335     }
336     return start;
337 }
338 
339 U_NAMESPACE_END
340 
341 #endif  // !UCONFIG_NO_COLLATION
342