1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc.
2    Author: Stefan Larsson
3 
4    This file is part of GNU Libidn.
5 
6    GNU Libidn is free software: you can redistribute it and/or
7    modify it under the terms of either:
8 
9      * the GNU Lesser General Public License as published by the Free
10        Software Foundation; either version 3 of the License, or (at
11        your option) any later version.
12 
13    or
14 
15      * the GNU General Public License as published by the Free
16        Software Foundation; either version 2 of the License, or (at
17        your option) any later version.
18 
19    or both in parallel, as here.
20 
21    GNU Libidn is distributed in the hope that it will be useful,
22    but WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    General Public License for more details.
25 
26    You should have received copies of the GNU General Public License and
27    the GNU Lesser General Public License along with this program.  If
28    not, see <http://www.gnu.org/licenses/>. */
29 
30 package gnu.inet.encoding;
31 
32 import java.util.ArrayList;
33 import java.util.Arrays;
34 import java.util.Collection;
35 import java.util.Collections;
36 import java.util.Comparator;
37 import java.util.Iterator;
38 import java.util.List;
39 import java.util.Locale;
40 
41 /**
42  * Set of integer ranges supporting efficient contains-checks.
43  * @author Stefan Larsson
44  */
45 public final class RangeSet
46 {
47   private static final RangeContainsComparator CONTAINS_COMPARATOR =
48       new RangeContainsComparator();
49 
50   private final Range[] ranges;
51 
52   private final Range mostSignificantGap;
53 
54   // TODO Store ranges with improved cache-locality, probably int[] with even/odd elements being first/last
55 
56   public static final class Range implements Comparable<Range>
57   {
58     private final int first;
59     private final int last;
60 
Range(int first, int last)61     public Range(int first, int last)
62     {
63       if (first > last) {
64 	throw new IllegalArgumentException("Reversed " + first + "-" + last);
65       }
66       this.first = first;
67       this.last = last;
68     }
69 
Range(int single)70     public Range(int single)
71     {
72       this.first = single;
73       this.last = single;
74     }
75 
Range(Range firstRange, Range lastRange)76     public Range(Range firstRange, Range lastRange)
77     {
78       // firstRange shouldn't start later than lastRange starts
79       if (firstRange.first > lastRange.first)
80       {
81 	throw new IllegalArgumentException(firstRange + " starts later than "
82 					   + lastRange);
83       }
84 
85       if (lastRange.first - firstRange.last > 1)
86       {
87 	throw new IllegalArgumentException("Disjunct " + firstRange
88 					   + " - " + lastRange);
89       }
90       this.first = firstRange.first;
91       this.last = lastRange.last;
92     }
93 
contains(final int i)94     public boolean contains(final int i) {
95       return first <= i && i <= last;
96     }
97 
98     /**
99      * Checks if this range completely can contain the other range.
100      * @param other other range to verify
101      * @return {@code true} if other completely contained by this,
102      *         otherwise {@code false}
103      */
contains(final Range other)104     public boolean contains(final Range other)
105     {
106       return (this.first <= other.first) && (other.last <= this.last);
107     }
108 
109     @Override
toString()110     public String toString()
111     {
112       return "[" + Integer.toHexString(first).toUpperCase(Locale.ENGLISH) + ","
113 	  + Integer.toHexString(last).toUpperCase(Locale.ENGLISH) + ']';
114     }
115 
116     //@Override
compareTo(final Range other)117     public int compareTo(final Range other)
118     {
119       if (this.first < other.first)
120       {
121 	return -1;
122       }
123       if (this.first > other.first)
124       {
125 	return 1;
126       }
127 
128       if (this.last < other.last)
129       {
130 	return -1;
131       }
132       if (this.last > other.last)
133       {
134 	return 1;
135       }
136 
137       return 0;
138     }
139 
140     @Override
equals(Object o)141     public boolean equals(Object o)
142     {
143       if (this == o) return true;
144       if (o == null || getClass() != o.getClass()) return false;
145 
146       Range range = (Range) o;
147 
148       if (first != range.first) return false;
149       if (last != range.last) return false;
150 
151       return true;
152     }
153 
154     @Override
hashCode()155     public int hashCode()
156     {
157       return 31 * first + last;
158     }
159   }
160 
161   private static class RangeContainsComparator implements Comparator<Range> {
162 
compare(Range current, Range contained)163     public int compare(Range current, Range contained)
164     {
165       if (current.last < contained.first)
166       {
167 	return -1;
168       }
169       if (contained.last < current.first)
170       {
171 	return 1;
172       }
173       return 0;
174     }
175   }
176 
RangeSet(final List<Range> ranges)177   private RangeSet(final List<Range> ranges) {
178     this.ranges = ranges.toArray(new Range[ranges.size()]);
179     this.mostSignificantGap = findMostSignificantGap(this.ranges);
180   }
181 
182   /**
183    * Returns the most significant gap, or {@code null} if no important gap found.
184    * @param ranges ranges to search
185    * @return most significant gap, or {@code null} if no important gap found
186    */
findMostSignificantGap(final Range[] ranges)187   private static Range findMostSignificantGap(final Range[] ranges)
188   {
189     if (ranges.length == 0) {
190       return new Range(0, Integer.MAX_VALUE);
191     }
192 
193     final int aIdx =
194 	    Arrays.binarySearch(ranges, new Range('a'), CONTAINS_COMPARATOR);
195     if (aIdx >= 0)
196     {
197       // 'a' in ranges, don't even attempt to exclude smartly
198       return null;
199     }
200 
201     final int insertionPoint = -(aIdx + 1);
202     if (insertionPoint == 0) {
203       return new Range(0, ranges[0].first - 1);
204     }
205     if (insertionPoint == ranges.length) {
206       return new Range(ranges[ranges.length - 1].last + 1, Integer.MAX_VALUE);
207     }
208     return new Range(ranges[insertionPoint - 1].last + 1,
209 		     ranges[insertionPoint].first - 1);
210   }
211 
212   public static final class Builder {
213     private final List<Range> ranges = new ArrayList<Range>();
214 
addRange(final Range range)215     public Builder addRange(final Range range) {
216       ranges.add(range);
217       return this;
218     }
219 
addRanges(final Collection<Range> ranges)220     public Builder addRanges(final Collection<Range> ranges) {
221       ranges.addAll(ranges);
222       return this;
223     }
224 
addRanges(final char[][] ranges)225     public Builder addRanges(final char[][] ranges) {
226       for (final char[] range : ranges) {
227 	if (range.length == 1) {
228 	  this.ranges.add(new Range(range[0]));
229 	} else if (range.length == 2) {
230 	  this.ranges.add(new Range(range[0], range[1]));
231 	} else {
232 	  throw new IllegalArgumentException("Unexpected range len:"
233 					     + range.length);
234 	}
235       }
236       return this;
237     }
238 
addRanges(final char[] items)239     public Builder addRanges(final char[] items) {
240       for (final char item : items) {
241 	this.ranges.add(new Range(item));
242       }
243       return this;
244     }
245 
build()246     public RangeSet build() {
247       Collections.sort(ranges);
248       final List<Range> mergedRanges = mergeRanges(ranges);
249       return new RangeSet(mergedRanges);
250     }
251 
mergeRanges(final List<Range> ranges)252     static List<Range> mergeRanges(final List<Range> ranges)
253     {
254       if (ranges.isEmpty())
255       {
256 	return Collections.emptyList();
257       }
258 
259       final List<Range> result = new ArrayList<Range>();
260       final Iterator<Range> it = ranges.iterator();
261 
262       Range leftRange = it.next();
263       List<Range> merged = Collections.singletonList(leftRange);
264       while (it.hasNext())
265       {
266 	// merge ranges as long as they're adjacent/overlapping
267 	while (merged.size() == 1 && it.hasNext())
268 	{
269 	  leftRange = merged.get(0);
270 	  Range rightRange = it.next();
271 	  merged = mergeRanges(leftRange, rightRange);
272 	}
273 	// when ranges weren't merge-able, add all but last, merge against last
274 	if (merged.size() > 1)
275 	{
276 	  result.addAll(merged.subList(0, merged.size() - 1));
277 	  merged = Collections.singletonList(merged.get(merged.size() - 1));
278 	}
279       }
280       result.addAll(merged);
281       return result;
282     }
283 
mergeRanges(Range leftRange, Range rightRange)284     static List<Range> mergeRanges(Range leftRange, Range rightRange)
285     {
286       if (leftRange.last + 1 >= rightRange.first) {
287 	final int last = Math.max(rightRange.last, leftRange.last);
288 	return Collections.singletonList(new Range(leftRange.first, last));
289       } else {
290 	final List<Range> result = new ArrayList<Range>(2);
291 	result.add(leftRange);
292 	result.add(rightRange);
293 	return result;
294       }
295     }
296   }
297 
builder()298   public static Builder builder() {
299     return new Builder();
300   }
301 
contains(final int i)302   public boolean contains(final int i)
303   {
304     if (mostSignificantGap != null && mostSignificantGap.contains(i)) {
305       return false;
306     }
307 
308     final Range searchRange = new Range(i);
309     int idx = Arrays.binarySearch(ranges, searchRange, CONTAINS_COMPARATOR);
310     return idx >= 0;
311   }
312 
containsAnyCodePoint(final CharSequence text)313   public boolean containsAnyCodePoint(final CharSequence text) {
314     final Range inputRange = createTextRange(text);
315     return containsAnyCodePoint(text, inputRange);
316   }
317 
containsAnyCodePoint(final CharSequence text, final Range inputRange)318   public boolean containsAnyCodePoint(final CharSequence text,
319 				      final Range inputRange) {
320     final int len = text.length();
321     if (len == 0)
322     {
323       return false;
324     }
325 
326     if (mostSignificantGap != null
327 	&& mostSignificantGap.contains(inputRange.first)
328 	&& mostSignificantGap.contains(inputRange.last)) {
329       return false;
330     }
331 
332     // if found, returns the index, otherwise "-insertionPoint - 1"
333     final int idxEnd =
334 	    Arrays.binarySearch(ranges, new Range(inputRange.last), CONTAINS_COMPARATOR);
335     // search for start in "head" range only (likely small)
336     final int startFromIdx = 0;
337     final int startEndIdx = idxEnd >= 0 ? idxEnd + 1 : -(idxEnd + 1);
338     final int idxStart =
339 	    Arrays.binarySearch(ranges, startFromIdx, startEndIdx,
340 				new Range(inputRange.first), CONTAINS_COMPARATOR);
341 
342     // If whole range in text outside same non-contained range, won't be found
343     // If whole range in text inside single contained range, must match
344     if (idxStart == idxEnd)
345     {
346       return idxStart >= 0;
347     }
348 
349     // if start or end inside contained range, match
350     if (idxStart >= 0 || idxEnd >= 0)
351     {
352       return true;
353     }
354 
355     // text spans across multiple ranges of set, need to search individual chars
356     final int searchStart = -idxStart + 1;
357     final int searchEnd = -idxEnd + 1;
358 
359     for (int i = 0; i < len; )
360     {
361       final int cp = Character.codePointAt(text, i);
362       i += Character.charCount(cp);
363       final int idx = Arrays.binarySearch(ranges, searchStart, searchEnd, new Range(cp), CONTAINS_COMPARATOR);
364       if (idx > 0)
365       {
366 	return true;
367       }
368     }
369     return false;
370   }
371 
372   /**
373    * Returns the range of the input or {@code all-inclusive range} if input is empty
374    * @param text input text
375    * @return range of input, or {@code all-inclusive} if empty input
376    */
createTextRange(final CharSequence text)377   public static Range createTextRange(final CharSequence text)
378   {
379     final int len = text.length();
380     if (len == 0) {
381       return new Range(Integer.MIN_VALUE, Integer.MAX_VALUE);
382     }
383 
384     int minCodePoint = Integer.MAX_VALUE;
385     int maxCodePoint = Integer.MIN_VALUE;
386     for (int i = 0; i < len; )
387     {
388       final int cp = Character.codePointAt(text, i);
389       minCodePoint = Math.min(minCodePoint, cp);
390       maxCodePoint = Math.max(maxCodePoint, cp);
391       i += Character.charCount(cp);
392     }
393     return new Range(minCodePoint, maxCodePoint);
394   }
395 
396   @Override
toString()397   public String toString()
398   {
399     return "RangeSet{" +
400 	    "ranges=" + Arrays.asList(ranges) +
401 	    ", mostSignificantGap=" + mostSignificantGap +
402 	    '}';
403   }
404 }
405