1 /* Copyright (C) 2004-2016 Free Software Foundation, Inc. 2 Author: Oliver Hitz 3 4 This file is part of GNU Libidn. 5 6 GNU Libidn is free software: you can redistribute it and/or 7 modify it under the terms of either: 8 9 * the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at 11 your option) any later version. 12 13 or 14 15 * the GNU General Public License as published by the Free 16 Software Foundation; either version 2 of the License, or (at 17 your option) any later version. 18 19 or both in parallel, as here. 20 21 GNU Libidn is distributed in the hope that it will be useful, 22 but WITHOUT ANY WARRANTY; without even the implied warranty of 23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24 General Public License for more details. 25 26 You should have received copies of the GNU General Public License and 27 the GNU Lesser General Public License along with this program. If 28 not, see <http://www.gnu.org/licenses/>. */ 29 30 package gnu.inet.encoding; 31 32 import java.util.Arrays; 33 34 /** 35 * This class offers static methods for preparing internationalized 36 * strings. It supports the following stringprep profiles: 37 * <ul> 38 * <li>RFC3491 nameprep 39 * <li>RFC3920 XMPP nodeprep and resourceprep 40 * </ul> 41 * Note that this implementation only supports 16-bit Unicode code 42 * points. 43 */ 44 public class Stringprep 45 { 46 private static final RangeSet.Range[] NODEPREP_PASSTHROUGH_RANGES = 47 new RangeSet.Range[] { new RangeSet.Range(0x5B, 0x7E), 48 new RangeSet.Range(0x30, 0x39), 49 new RangeSet.Range(0x28, 0x2E)}; 50 51 private static final RangeSet.Range[] NAMEPREP_PASSTHROUGH_RANGES = 52 new RangeSet.Range[] { new RangeSet.Range(0x5B, 0x7F), 53 new RangeSet.Range(0x00, 0x40)}; 54 55 private static final RangeSet.Range[] RESOURCEPREP_PASSTHROUGH_RANGES = 56 new RangeSet.Range[] { new RangeSet.Range(0x20, 0x7E)}; 57 58 59 private static final RangeSet RANGE_A1 = 60 RangeSet.builder().addRanges(RFC3454.A1) 61 .build(); 62 63 private static final RangeSet RANGE_B1 = 64 RangeSet.builder().addRanges(RFC3454.B1) 65 .build(); 66 67 private static final RangeSet RANGE_D1 = 68 RangeSet.builder().addRanges(RFC3454.D1) 69 .build(); 70 71 private static final RangeSet RANGE_D2 = 72 RangeSet.builder().addRanges(RFC3454.D2) 73 .build(); 74 75 76 private static final RangeSet RANGE_C3_to_C8_C12_C22 = 77 RangeSet.builder().addRanges(RFC3454.C12) 78 .addRanges(RFC3454.C22) 79 .addRanges(RFC3454.C3) 80 .addRanges(RFC3454.C4) 81 .addRanges(RFC3454.C5) 82 .addRanges(RFC3454.C6) 83 .addRanges(RFC3454.C7) 84 .addRanges(RFC3454.C8) 85 // TODO Add C9 table now, proper unicode support now 86 // Temporary rejection of all "unsupported" in java 1.4 87 .addRange(new RangeSet.Range(0xffff, 0x10ffff)) 88 .build(); 89 90 /** 91 * Characters prohibited by RFC3920 nodeprep that aren't defined as 92 * part of the RFC3454 tables. 93 */ 94 private static final char [] RFC3920_NODEPREP_PROHIBIT = new char [] { 95 '\u0022', '\u0026', '\'', '\u002F', 96 '\u003A', '\u003C', '\u003E', '\u0040' 97 }; 98 99 private static final RangeSet RANGE_C3_TO_C8_C11_12_21_22_NP_PROHIB = 100 RangeSet.builder().addRanges(RFC3454.C3) 101 .addRanges(RFC3454.C4) 102 .addRanges(RFC3454.C5) 103 .addRanges(RFC3454.C6) 104 .addRanges(RFC3454.C7) 105 .addRanges(RFC3454.C8) 106 .addRanges(RFC3454.C11) 107 .addRanges(RFC3454.C12) 108 .addRanges(RFC3454.C21) 109 .addRanges(RFC3454.C22) 110 .addRanges(RFC3920_NODEPREP_PROHIBIT) 111 // TODO Add C9 table now, proper unicode support now 112 // Temporary rejection of all "unsupported" in java 1.4 113 .addRange(new RangeSet.Range(0xffff, 0x10ffff)) 114 .build(); 115 116 private static final RangeSet RANGE_C3_to_C8_C12_C21_C22 = 117 RangeSet.builder().addRanges(RFC3454.C12) 118 .addRanges(RFC3454.C21) 119 .addRanges(RFC3454.C22) 120 .addRanges(RFC3454.C3) 121 .addRanges(RFC3454.C4) 122 .addRanges(RFC3454.C5) 123 .addRanges(RFC3454.C6) 124 .addRanges(RFC3454.C7) 125 .addRanges(RFC3454.C8) 126 // TODO Add C9 table now, proper unicode support now 127 // Temporary rejection of all "unsupported" in java 1.4 128 .addRange(new RangeSet.Range(0xffff, 0x10ffff)) 129 .build(); 130 131 132 /** 133 * Preps a name according to the Stringprep profile defined in 134 * RFC3491. Unassigned code points are not allowed. 135 * 136 * @param input the name to prep. 137 * @return the prepped name. 138 * @throws StringprepException If the name cannot be prepped with 139 * this profile. 140 * @throws NullPointerException If the name is null. 141 */ nameprep(String input)142 public static String nameprep(String input) 143 throws StringprepException, 144 NullPointerException 145 { 146 return nameprep(input, false); 147 } 148 149 /** 150 * Preps a name according to the Stringprep profile defined in 151 * RFC3491. 152 * 153 * @param input the name to prep. 154 * @param allowUnassigned true if the name may contain unassigned 155 * code points. 156 * @return the prepped name. 157 * @throws StringprepException If the name cannot be prepped with 158 * this profile. 159 * @throws NullPointerException If the name is null. 160 */ nameprep(String input, boolean allowUnassigned)161 public static String nameprep(String input, boolean allowUnassigned) 162 throws StringprepException, 163 NullPointerException 164 { 165 if (input == null) { 166 throw new NullPointerException(); 167 } 168 169 final RangeSet.Range inputRange = RangeSet.createTextRange(input); 170 if (onlyPassThrough(NAMEPREP_PASSTHROUGH_RANGES, inputRange)) { 171 return input; 172 } 173 if (!allowUnassigned && RANGE_A1.containsAnyCodePoint(input, inputRange)) { 174 throw new StringprepException(StringprepException.CONTAINS_UNASSIGNED); 175 } 176 177 StringBuilder s = new StringBuilder(input); 178 179 filter(s, RANGE_B1); 180 map(s, RFC3454.B2search, RFC3454.B2replace); 181 182 s = new StringBuilder(NFKC.normalizeNFKC(s.toString())); 183 final RangeSet.Range normalizedRange = RangeSet.createTextRange(s); 184 // B.3 is only needed if NFKC is not used, right? 185 // map(s, RFC3454.B3search, RFC3454.B3replace); 186 if (RANGE_C3_to_C8_C12_C22.containsAnyCodePoint(s, normalizedRange)) { 187 // Table C.9 only contains code points > 0xFFFF which Java 188 // doesn't handle 189 throw new StringprepException(StringprepException.CONTAINS_PROHIBITED); 190 } 191 192 // Bidi handling 193 boolean r = RANGE_D1.containsAnyCodePoint(s, normalizedRange); 194 boolean l = RANGE_D2.containsAnyCodePoint(s, normalizedRange); 195 196 // RFC 3454, section 6, requirement 1: already handled above (table C.8) 197 198 // RFC 3454, section 6, requirement 2 199 if (r && l) { 200 throw new StringprepException(StringprepException.BIDI_BOTHRAL); 201 } 202 203 // RFC 3454, section 6, requirement 3 204 if (r) { 205 if (!RANGE_D1.contains(s.charAt(0)) || 206 !RANGE_D1.contains(s.charAt(s.length()-1))) { 207 throw new StringprepException(StringprepException.BIDI_LTRAL); 208 } 209 } 210 211 return s.toString(); 212 } 213 214 /** 215 * Preps a node name according to the Stringprep profile defined in 216 * RFC3920. Unassigned code points are not allowed. 217 * 218 * @param input the node name to prep. 219 * @return the prepped node name. 220 * @throws StringprepException If the node name cannot be prepped 221 * with this profile. 222 * @throws NullPointerException If the node name is null. 223 */ nodeprep(String input)224 public static String nodeprep(String input) 225 throws StringprepException, 226 NullPointerException 227 { 228 return nodeprep(input, false); 229 } 230 231 /** 232 * Preps a node name according to the Stringprep profile defined in 233 * RFC3920. 234 * 235 * @param input the node name to prep. 236 * @param allowUnassigned true if the node name may contain 237 * unassigned code points. 238 * @return the prepped node name. 239 * @throws StringprepException If the node name cannot be prepped 240 * with this profile. 241 * @throws NullPointerException If the node name is null. 242 */ nodeprep(String input, boolean allowUnassigned)243 public static String nodeprep(String input, boolean allowUnassigned) 244 throws StringprepException, 245 NullPointerException 246 { 247 if (input == null) { 248 throw new NullPointerException(); 249 } 250 251 final RangeSet.Range inputRange = RangeSet.createTextRange(input); 252 if (onlyPassThrough(NODEPREP_PASSTHROUGH_RANGES, inputRange)) { 253 return input; 254 } 255 if (!allowUnassigned && RANGE_A1.containsAnyCodePoint(input, inputRange)) { 256 throw new StringprepException(StringprepException.CONTAINS_UNASSIGNED); 257 } 258 259 StringBuilder s = new StringBuilder(input); 260 261 filter(s, RANGE_B1); 262 map(s, RFC3454.B2search, RFC3454.B2replace); 263 264 s = new StringBuilder(NFKC.normalizeNFKC(s.toString())); 265 final RangeSet.Range normalizedRange = RangeSet.createTextRange(s); 266 if (RANGE_C3_TO_C8_C11_12_21_22_NP_PROHIB.containsAnyCodePoint(s, normalizedRange)) 267 { 268 throw new StringprepException(StringprepException.CONTAINS_PROHIBITED); 269 } 270 271 // Bidi handling 272 boolean r = RANGE_D1.containsAnyCodePoint(s, normalizedRange); 273 boolean l = RANGE_D2.containsAnyCodePoint(s, normalizedRange); 274 275 // RFC 3454, section 6, requirement 1: already handled above (table C.8) 276 277 // RFC 3454, section 6, requirement 2 278 if (r && l) { 279 throw new StringprepException(StringprepException.BIDI_BOTHRAL); 280 } 281 282 // RFC 3454, section 6, requirement 3 283 if (r) { 284 if (!RANGE_D1.contains(s.charAt(0)) || 285 !RANGE_D1.contains(s.charAt(s.length() - 1))) { 286 throw new StringprepException(StringprepException.BIDI_LTRAL); 287 } 288 } 289 290 return s.toString(); 291 } 292 293 /** 294 * Preps a resource name according to the Stringprep profile defined 295 * in RFC3920. Unassigned code points are not allowed. 296 * 297 * @param input the resource name to prep. 298 * @return the prepped node name. 299 * @throws StringprepException If the resource name cannot be prepped 300 * with this profile. 301 * @throws NullPointerException If the resource name is null. 302 */ resourceprep(String input)303 public static String resourceprep(String input) 304 throws StringprepException, 305 NullPointerException 306 { 307 return resourceprep(input, false); 308 } 309 310 /** 311 * Preps a resource name according to the Stringprep profile defined 312 * in RFC3920. 313 * 314 * @param input the resource name to prep. 315 * @param allowUnassigned true if the resource name may contain 316 * unassigned code points. 317 * @return the prepped node name. 318 * @throws StringprepException If the resource name cannot be prepped 319 * with this profile. 320 * @throws NullPointerException If the resource name is null. 321 */ resourceprep(String input, boolean allowUnassigned)322 public static String resourceprep(String input, boolean allowUnassigned) 323 throws StringprepException, 324 NullPointerException 325 { 326 if (input == null) { 327 throw new NullPointerException(); 328 } 329 330 final RangeSet.Range inputRange = RangeSet.createTextRange(input); 331 if (onlyPassThrough(RESOURCEPREP_PASSTHROUGH_RANGES, inputRange)) { 332 return input; 333 } 334 if (!allowUnassigned && RANGE_A1.containsAnyCodePoint(input)) { 335 throw new StringprepException(StringprepException.CONTAINS_UNASSIGNED); 336 } 337 338 StringBuilder s = new StringBuilder(input); 339 340 filter(s, RANGE_B1); 341 342 s = new StringBuilder(NFKC.normalizeNFKC(s.toString())); 343 final RangeSet.Range normalizedRange = RangeSet.createTextRange(s); 344 345 if (RANGE_C3_to_C8_C12_C21_C22.containsAnyCodePoint(s, normalizedRange)) { 346 // Table C.9 only contains code points > 0xFFFF which Java 347 // doesn't handle 348 349 throw new StringprepException(StringprepException.CONTAINS_PROHIBITED); 350 } 351 352 // Bidi handling 353 boolean r = RANGE_D1.containsAnyCodePoint(s, normalizedRange); 354 boolean l = RANGE_D2.containsAnyCodePoint(s, normalizedRange); 355 356 // RFC 3454, section 6, requirement 1: already handled above (table C.8) 357 358 // RFC 3454, section 6, requirement 2 359 if (r && l) { 360 throw new StringprepException(StringprepException.BIDI_BOTHRAL); 361 } 362 363 // RFC 3454, section 6, requirement 3 364 if (r) { 365 if (!RANGE_D1.contains(s.charAt(0)) || 366 !RANGE_D1.contains(s.charAt(s.length() - 1))) { 367 throw new StringprepException(StringprepException.BIDI_LTRAL); 368 } 369 } 370 371 return s.toString(); 372 } 373 onlyPassThrough(final RangeSet.Range[] passThroughs, final RangeSet.Range inputRange)374 private static boolean onlyPassThrough(final RangeSet.Range[] passThroughs, 375 final RangeSet.Range inputRange) { 376 for (final RangeSet.Range passThrough : passThroughs) { 377 if (passThrough.contains(inputRange)) { 378 return true; 379 } 380 } 381 return false; 382 } 383 filter(StringBuilder s, RangeSet f)384 static void filter(StringBuilder s, RangeSet f) 385 { 386 for (int j = 0; j < s.length(); ) { 387 if (f.contains(s.charAt(j))) { 388 s.deleteCharAt(j); 389 } else { 390 j++; 391 } 392 } 393 } 394 map(StringBuilder s, char[] search, String[] replace)395 static void map(StringBuilder s, char[] search, String[] replace) 396 { 397 for (int i = 0; i < s.length(); i++) { 398 char c = s.charAt(i); 399 int mapIndex = Arrays.binarySearch(search, c); 400 if (mapIndex >= 0) { 401 String replacement = replace[mapIndex]; 402 s.replace(i, i + 1, replacement); 403 i += replacement.length() - 1; 404 } 405 } 406 } 407 } 408