1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc. 2 Author: Stefan Larsson 3 4 This file is part of GNU Libidn. 5 6 GNU Libidn is free software: you can redistribute it and/or 7 modify it under the terms of either: 8 9 * the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at 11 your option) any later version. 12 13 or 14 15 * the GNU General Public License as published by the Free 16 Software Foundation; either version 2 of the License, or (at 17 your option) any later version. 18 19 or both in parallel, as here. 20 21 GNU Libidn is distributed in the hope that it will be useful, 22 but WITHOUT ANY WARRANTY; without even the implied warranty of 23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24 General Public License for more details. 25 26 You should have received copies of the GNU General Public License and 27 the GNU Lesser General Public License along with this program. If 28 not, see <http://www.gnu.org/licenses/>. */ 29 30 package gnu.inet.encoding; 31 32 import java.util.ArrayList; 33 import java.util.Arrays; 34 import java.util.Collection; 35 import java.util.Collections; 36 import java.util.Comparator; 37 import java.util.Iterator; 38 import java.util.List; 39 import java.util.Locale; 40 41 /** 42 * Set of integer ranges supporting efficient contains-checks. 43 * @author Stefan Larsson 44 */ 45 public final class RangeSet 46 { 47 private static final RangeContainsComparator CONTAINS_COMPARATOR = 48 new RangeContainsComparator(); 49 50 private final Range[] ranges; 51 52 private final Range mostSignificantGap; 53 54 // TODO Store ranges with improved cache-locality, probably int[] with even/odd elements being first/last 55 56 public static final class Range implements Comparable<Range> 57 { 58 private final int first; 59 private final int last; 60 Range(int first, int last)61 public Range(int first, int last) 62 { 63 if (first > last) { 64 throw new IllegalArgumentException("Reversed " + first + "-" + last); 65 } 66 this.first = first; 67 this.last = last; 68 } 69 Range(int single)70 public Range(int single) 71 { 72 this.first = single; 73 this.last = single; 74 } 75 Range(Range firstRange, Range lastRange)76 public Range(Range firstRange, Range lastRange) 77 { 78 // firstRange shouldn't start later than lastRange starts 79 if (firstRange.first > lastRange.first) 80 { 81 throw new IllegalArgumentException(firstRange + " starts later than " 82 + lastRange); 83 } 84 85 if (lastRange.first - firstRange.last > 1) 86 { 87 throw new IllegalArgumentException("Disjunct " + firstRange 88 + " - " + lastRange); 89 } 90 this.first = firstRange.first; 91 this.last = lastRange.last; 92 } 93 contains(final int i)94 public boolean contains(final int i) { 95 return first <= i && i <= last; 96 } 97 98 /** 99 * Checks if this range completely can contain the other range. 100 * @param other other range to verify 101 * @return {@code true} if other completely contained by this, 102 * otherwise {@code false} 103 */ contains(final Range other)104 public boolean contains(final Range other) 105 { 106 return (this.first <= other.first) && (other.last <= this.last); 107 } 108 109 @Override toString()110 public String toString() 111 { 112 return "[" + Integer.toHexString(first).toUpperCase(Locale.ENGLISH) + "," 113 + Integer.toHexString(last).toUpperCase(Locale.ENGLISH) + ']'; 114 } 115 116 //@Override compareTo(final Range other)117 public int compareTo(final Range other) 118 { 119 if (this.first < other.first) 120 { 121 return -1; 122 } 123 if (this.first > other.first) 124 { 125 return 1; 126 } 127 128 if (this.last < other.last) 129 { 130 return -1; 131 } 132 if (this.last > other.last) 133 { 134 return 1; 135 } 136 137 return 0; 138 } 139 140 @Override equals(Object o)141 public boolean equals(Object o) 142 { 143 if (this == o) return true; 144 if (o == null || getClass() != o.getClass()) return false; 145 146 Range range = (Range) o; 147 148 if (first != range.first) return false; 149 if (last != range.last) return false; 150 151 return true; 152 } 153 154 @Override hashCode()155 public int hashCode() 156 { 157 return 31 * first + last; 158 } 159 } 160 161 private static class RangeContainsComparator implements Comparator<Range> { 162 compare(Range current, Range contained)163 public int compare(Range current, Range contained) 164 { 165 if (current.last < contained.first) 166 { 167 return -1; 168 } 169 if (contained.last < current.first) 170 { 171 return 1; 172 } 173 return 0; 174 } 175 } 176 RangeSet(final List<Range> ranges)177 private RangeSet(final List<Range> ranges) { 178 this.ranges = ranges.toArray(new Range[ranges.size()]); 179 this.mostSignificantGap = findMostSignificantGap(this.ranges); 180 } 181 182 /** 183 * Returns the most significant gap, or {@code null} if no important gap found. 184 * @param ranges ranges to search 185 * @return most significant gap, or {@code null} if no important gap found 186 */ findMostSignificantGap(final Range[] ranges)187 private static Range findMostSignificantGap(final Range[] ranges) 188 { 189 if (ranges.length == 0) { 190 return new Range(0, Integer.MAX_VALUE); 191 } 192 193 final int aIdx = 194 Arrays.binarySearch(ranges, new Range('a'), CONTAINS_COMPARATOR); 195 if (aIdx >= 0) 196 { 197 // 'a' in ranges, don't even attempt to exclude smartly 198 return null; 199 } 200 201 final int insertionPoint = -(aIdx + 1); 202 if (insertionPoint == 0) { 203 return new Range(0, ranges[0].first - 1); 204 } 205 if (insertionPoint == ranges.length) { 206 return new Range(ranges[ranges.length - 1].last + 1, Integer.MAX_VALUE); 207 } 208 return new Range(ranges[insertionPoint - 1].last + 1, 209 ranges[insertionPoint].first - 1); 210 } 211 212 public static final class Builder { 213 private final List<Range> ranges = new ArrayList<Range>(); 214 addRange(final Range range)215 public Builder addRange(final Range range) { 216 ranges.add(range); 217 return this; 218 } 219 addRanges(final Collection<Range> ranges)220 public Builder addRanges(final Collection<Range> ranges) { 221 ranges.addAll(ranges); 222 return this; 223 } 224 addRanges(final char[][] ranges)225 public Builder addRanges(final char[][] ranges) { 226 for (final char[] range : ranges) { 227 if (range.length == 1) { 228 this.ranges.add(new Range(range[0])); 229 } else if (range.length == 2) { 230 this.ranges.add(new Range(range[0], range[1])); 231 } else { 232 throw new IllegalArgumentException("Unexpected range len:" 233 + range.length); 234 } 235 } 236 return this; 237 } 238 addRanges(final char[] items)239 public Builder addRanges(final char[] items) { 240 for (final char item : items) { 241 this.ranges.add(new Range(item)); 242 } 243 return this; 244 } 245 build()246 public RangeSet build() { 247 Collections.sort(ranges); 248 final List<Range> mergedRanges = mergeRanges(ranges); 249 return new RangeSet(mergedRanges); 250 } 251 mergeRanges(final List<Range> ranges)252 static List<Range> mergeRanges(final List<Range> ranges) 253 { 254 if (ranges.isEmpty()) 255 { 256 return Collections.emptyList(); 257 } 258 259 final List<Range> result = new ArrayList<Range>(); 260 final Iterator<Range> it = ranges.iterator(); 261 262 Range leftRange = it.next(); 263 List<Range> merged = Collections.singletonList(leftRange); 264 while (it.hasNext()) 265 { 266 // merge ranges as long as they're adjacent/overlapping 267 while (merged.size() == 1 && it.hasNext()) 268 { 269 leftRange = merged.get(0); 270 Range rightRange = it.next(); 271 merged = mergeRanges(leftRange, rightRange); 272 } 273 // when ranges weren't merge-able, add all but last, merge against last 274 if (merged.size() > 1) 275 { 276 result.addAll(merged.subList(0, merged.size() - 1)); 277 merged = Collections.singletonList(merged.get(merged.size() - 1)); 278 } 279 } 280 result.addAll(merged); 281 return result; 282 } 283 mergeRanges(Range leftRange, Range rightRange)284 static List<Range> mergeRanges(Range leftRange, Range rightRange) 285 { 286 if (leftRange.last + 1 >= rightRange.first) { 287 final int last = Math.max(rightRange.last, leftRange.last); 288 return Collections.singletonList(new Range(leftRange.first, last)); 289 } else { 290 final List<Range> result = new ArrayList<Range>(2); 291 result.add(leftRange); 292 result.add(rightRange); 293 return result; 294 } 295 } 296 } 297 builder()298 public static Builder builder() { 299 return new Builder(); 300 } 301 contains(final int i)302 public boolean contains(final int i) 303 { 304 if (mostSignificantGap != null && mostSignificantGap.contains(i)) { 305 return false; 306 } 307 308 final Range searchRange = new Range(i); 309 int idx = Arrays.binarySearch(ranges, searchRange, CONTAINS_COMPARATOR); 310 return idx >= 0; 311 } 312 containsAnyCodePoint(final CharSequence text)313 public boolean containsAnyCodePoint(final CharSequence text) { 314 final Range inputRange = createTextRange(text); 315 return containsAnyCodePoint(text, inputRange); 316 } 317 containsAnyCodePoint(final CharSequence text, final Range inputRange)318 public boolean containsAnyCodePoint(final CharSequence text, 319 final Range inputRange) { 320 final int len = text.length(); 321 if (len == 0) 322 { 323 return false; 324 } 325 326 if (mostSignificantGap != null 327 && mostSignificantGap.contains(inputRange.first) 328 && mostSignificantGap.contains(inputRange.last)) { 329 return false; 330 } 331 332 // if found, returns the index, otherwise "-insertionPoint - 1" 333 final int idxEnd = 334 Arrays.binarySearch(ranges, new Range(inputRange.last), CONTAINS_COMPARATOR); 335 // search for start in "head" range only (likely small) 336 final int startFromIdx = 0; 337 final int startEndIdx = idxEnd >= 0 ? idxEnd + 1 : -(idxEnd + 1); 338 final int idxStart = 339 Arrays.binarySearch(ranges, startFromIdx, startEndIdx, 340 new Range(inputRange.first), CONTAINS_COMPARATOR); 341 342 // If whole range in text outside same non-contained range, won't be found 343 // If whole range in text inside single contained range, must match 344 if (idxStart == idxEnd) 345 { 346 return idxStart >= 0; 347 } 348 349 // if start or end inside contained range, match 350 if (idxStart >= 0 || idxEnd >= 0) 351 { 352 return true; 353 } 354 355 // text spans across multiple ranges of set, need to search individual chars 356 final int searchStart = -idxStart + 1; 357 final int searchEnd = -idxEnd + 1; 358 359 for (int i = 0; i < len; ) 360 { 361 final int cp = Character.codePointAt(text, i); 362 i += Character.charCount(cp); 363 final int idx = Arrays.binarySearch(ranges, searchStart, searchEnd, new Range(cp), CONTAINS_COMPARATOR); 364 if (idx > 0) 365 { 366 return true; 367 } 368 } 369 return false; 370 } 371 372 /** 373 * Returns the range of the input or {@code all-inclusive range} if input is empty 374 * @param text input text 375 * @return range of input, or {@code all-inclusive} if empty input 376 */ createTextRange(final CharSequence text)377 public static Range createTextRange(final CharSequence text) 378 { 379 final int len = text.length(); 380 if (len == 0) { 381 return new Range(Integer.MIN_VALUE, Integer.MAX_VALUE); 382 } 383 384 int minCodePoint = Integer.MAX_VALUE; 385 int maxCodePoint = Integer.MIN_VALUE; 386 for (int i = 0; i < len; ) 387 { 388 final int cp = Character.codePointAt(text, i); 389 minCodePoint = Math.min(minCodePoint, cp); 390 maxCodePoint = Math.max(maxCodePoint, cp); 391 i += Character.charCount(cp); 392 } 393 return new Range(minCodePoint, maxCodePoint); 394 } 395 396 @Override toString()397 public String toString() 398 { 399 return "RangeSet{" + 400 "ranges=" + Arrays.asList(ranges) + 401 ", mostSignificantGap=" + mostSignificantGap + 402 '}'; 403 } 404 } 405