1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  denseranges.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010sep25
14 *   created by: Markus W. Scherer
15 *
16 * Helper code for finding a small number of dense ranges.
17 */
18 
19 #include "unicode/utypes.h"
20 #include "denseranges.h"
21 
22 // Definitions in the anonymous namespace are invisible outside this file.
23 namespace {
24 
25 /**
26  * Collect up to 15 range gaps and sort them by ascending gap size.
27  */
28 class LargestGaps {
29 public:
LargestGaps(int32_t max)30     LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
31 
add(int32_t gapStart,int64_t gapLength)32     void add(int32_t gapStart, int64_t gapLength) {
33         int32_t i=length;
34         while(i>0 && gapLength>gapLengths[i-1]) {
35             --i;
36         }
37         if(i<maxLength) {
38             // The new gap is now one of the maxLength largest.
39             // Insert the new gap, moving up smaller ones of the previous
40             // length largest.
41             int32_t j= length<maxLength ? length++ : maxLength-1;
42             while(j>i) {
43                 gapStarts[j]=gapStarts[j-1];
44                 gapLengths[j]=gapLengths[j-1];
45                 --j;
46             }
47             gapStarts[i]=gapStart;
48             gapLengths[i]=gapLength;
49         }
50     }
51 
truncate(int32_t newLength)52     void truncate(int32_t newLength) {
53         if(newLength<length) {
54             length=newLength;
55         }
56     }
57 
count() const58     int32_t count() const { return length; }
gapStart(int32_t i) const59     int32_t gapStart(int32_t i) const { return gapStarts[i]; }
gapLength(int32_t i) const60     int64_t gapLength(int32_t i) const { return gapLengths[i]; }
61 
firstAfter(int32_t value) const62     int32_t firstAfter(int32_t value) const {
63         if(length==0) {
64             return -1;
65         }
66         int32_t minValue=0;
67         int32_t minIndex=-1;
68         for(int32_t i=0; i<length; ++i) {
69             if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
70                 minValue=gapStarts[i];
71                 minIndex=i;
72             }
73         }
74         return minIndex;
75     }
76 
77 private:
78     static const int32_t kCapacity=15;
79 
80     int32_t maxLength;
81     int32_t length;
82     int32_t gapStarts[kCapacity];
83     int64_t gapLengths[kCapacity];
84 };
85 
86 }  // namespace
87 
88 /**
89  * Does it make sense to write 1..capacity ranges?
90  * Returns 0 if not, otherwise the number of ranges.
91  * @param values Sorted array of signed-integer values.
92  * @param length Number of values.
93  * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
94  *                Should be 0x80..0x100, must be 1..0x100.
95  * @param ranges Output ranges array.
96  * @param capacity Maximum number of ranges.
97  * @return Minimum number of ranges (at most capacity) that have the desired density,
98  *         or 0 if that density cannot be achieved.
99  */
100 U_CAPI int32_t U_EXPORT2
uprv_makeDenseRanges(const int32_t values[],int32_t length,int32_t density,int32_t ranges[][2],int32_t capacity)101 uprv_makeDenseRanges(const int32_t values[], int32_t length,
102                      int32_t density,
103                      int32_t ranges[][2], int32_t capacity) {
104     if(length<=2) {
105         return 0;
106     }
107     int32_t minValue=values[0];
108     int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
109     // Use int64_t variables for intermediate-value precision and to avoid
110     // signed-int32_t overflow of maxValue-minValue.
111     int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
112     if(length>=(density*maxLength)/0x100) {
113         // Use one range.
114         ranges[0][0]=minValue;
115         ranges[0][1]=maxValue;
116         return 1;
117     }
118     if(length<=4) {
119         return 0;
120     }
121     // See if we can split [minValue, maxValue] into 2..capacity ranges,
122     // divided by the 1..(capacity-1) largest gaps.
123     LargestGaps gaps(capacity-1);
124     int32_t i;
125     int32_t expectedValue=minValue;
126     for(i=1; i<length; ++i) {
127         ++expectedValue;
128         int32_t actualValue=values[i];
129         if(expectedValue!=actualValue) {
130             gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
131             expectedValue=actualValue;
132         }
133     }
134     // We know gaps.count()>=1 because we have fewer values (length) than
135     // the length of the [minValue..maxValue] range (maxLength).
136     // (Otherwise we would have returned with the one range above.)
137     int32_t num;
138     for(i=0, num=2;; ++i, ++num) {
139         if(i>=gaps.count()) {
140             // The values are too sparse for capacity or fewer ranges
141             // of the requested density.
142             return 0;
143         }
144         maxLength-=gaps.gapLength(i);
145         if(length>num*2 && length>=(density*maxLength)/0x100) {
146             break;
147         }
148     }
149     // Use the num ranges with the num-1 largest gaps.
150     gaps.truncate(num-1);
151     ranges[0][0]=minValue;
152     for(i=0; i<=num-2; ++i) {
153         int32_t gapIndex=gaps.firstAfter(minValue);
154         int32_t gapStart=gaps.gapStart(gapIndex);
155         ranges[i][1]=gapStart-1;
156         ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
157     }
158     ranges[num-1][1]=maxValue;
159     return num;
160 }
161