1 /* Copyright (C) 2004-2016 Free Software Foundation, Inc.
2    Author: Oliver Hitz
3 
4    This file is part of GNU Libidn.
5 
6    GNU Libidn is free software: you can redistribute it and/or
7    modify it under the terms of either:
8 
9      * the GNU Lesser General Public License as published by the Free
10        Software Foundation; either version 3 of the License, or (at
11        your option) any later version.
12 
13    or
14 
15      * the GNU General Public License as published by the Free
16        Software Foundation; either version 2 of the License, or (at
17        your option) any later version.
18 
19    or both in parallel, as here.
20 
21    GNU Libidn is distributed in the hope that it will be useful,
22    but WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    General Public License for more details.
25 
26    You should have received copies of the GNU General Public License and
27    the GNU Lesser General Public License along with this program.  If
28    not, see <http://www.gnu.org/licenses/>. */
29 
30 package gnu.inet.encoding;
31 
32 import java.util.Arrays;
33 
34 /**
35  * This class offers static methods for preparing internationalized
36  * strings. It supports the following stringprep profiles:
37  * <ul>
38  * <li>RFC3491 nameprep
39  * <li>RFC3920 XMPP nodeprep and resourceprep
40  * </ul>
41  * Note that this implementation only supports 16-bit Unicode code
42  * points.
43  */
44 public class Stringprep
45 {
46   private static final RangeSet.Range[] NODEPREP_PASSTHROUGH_RANGES =
47 	  new RangeSet.Range[] { new RangeSet.Range(0x5B, 0x7E),
48 				 new RangeSet.Range(0x30, 0x39),
49 				 new RangeSet.Range(0x28, 0x2E)};
50 
51   private static final RangeSet.Range[] NAMEPREP_PASSTHROUGH_RANGES =
52 	  new RangeSet.Range[] { new RangeSet.Range(0x5B, 0x7F),
53 				 new RangeSet.Range(0x00, 0x40)};
54 
55   private static final RangeSet.Range[] RESOURCEPREP_PASSTHROUGH_RANGES =
56 	  new RangeSet.Range[] { new RangeSet.Range(0x20, 0x7E)};
57 
58 
59   private static final RangeSet RANGE_A1 =
60 	  RangeSet.builder().addRanges(RFC3454.A1)
61 		  .build();
62 
63   private static final RangeSet RANGE_B1 =
64 	  RangeSet.builder().addRanges(RFC3454.B1)
65 		  .build();
66 
67   private static final RangeSet RANGE_D1 =
68 	  RangeSet.builder().addRanges(RFC3454.D1)
69 		  .build();
70 
71   private static final RangeSet RANGE_D2 =
72 	  RangeSet.builder().addRanges(RFC3454.D2)
73 		  .build();
74 
75 
76   private static final RangeSet RANGE_C3_to_C8_C12_C22 =
77 	  RangeSet.builder().addRanges(RFC3454.C12)
78 		  .addRanges(RFC3454.C22)
79 		  .addRanges(RFC3454.C3)
80 		  .addRanges(RFC3454.C4)
81 		  .addRanges(RFC3454.C5)
82 		  .addRanges(RFC3454.C6)
83 		  .addRanges(RFC3454.C7)
84 		  .addRanges(RFC3454.C8)
85 		  // TODO Add C9 table now, proper unicode support now
86 		  // Temporary rejection of all "unsupported" in java 1.4
87 		  .addRange(new RangeSet.Range(0xffff, 0x10ffff))
88 		  .build();
89 
90   /**
91    * Characters prohibited by RFC3920 nodeprep that aren't defined as
92    * part of the RFC3454 tables.
93    */
94   private static final char [] RFC3920_NODEPREP_PROHIBIT = new char [] {
95 	  '\u0022', '\u0026', '\'',     '\u002F',
96 	  '\u003A', '\u003C', '\u003E', '\u0040'
97   };
98 
99   private static final RangeSet RANGE_C3_TO_C8_C11_12_21_22_NP_PROHIB =
100 	  RangeSet.builder().addRanges(RFC3454.C3)
101 		  .addRanges(RFC3454.C4)
102 		  .addRanges(RFC3454.C5)
103 		  .addRanges(RFC3454.C6)
104 		  .addRanges(RFC3454.C7)
105 		  .addRanges(RFC3454.C8)
106 		  .addRanges(RFC3454.C11)
107 		  .addRanges(RFC3454.C12)
108 		  .addRanges(RFC3454.C21)
109 		  .addRanges(RFC3454.C22)
110 		  .addRanges(RFC3920_NODEPREP_PROHIBIT)
111 		  // TODO Add C9 table now, proper unicode support now
112 		  // Temporary rejection of all "unsupported" in java 1.4
113 		  .addRange(new RangeSet.Range(0xffff, 0x10ffff))
114 		  .build();
115 
116   private static final RangeSet RANGE_C3_to_C8_C12_C21_C22 =
117 	  RangeSet.builder().addRanges(RFC3454.C12)
118 		  .addRanges(RFC3454.C21)
119 		  .addRanges(RFC3454.C22)
120 		  .addRanges(RFC3454.C3)
121 		  .addRanges(RFC3454.C4)
122 		  .addRanges(RFC3454.C5)
123 		  .addRanges(RFC3454.C6)
124 		  .addRanges(RFC3454.C7)
125 		  .addRanges(RFC3454.C8)
126 		  // TODO Add C9 table now, proper unicode support now
127 		  // Temporary rejection of all "unsupported" in java 1.4
128 		  .addRange(new RangeSet.Range(0xffff, 0x10ffff))
129 		  .build();
130 
131 
132   /**
133    * Preps a name according to the Stringprep profile defined in
134    * RFC3491. Unassigned code points are not allowed.
135    *
136    * @param input the name to prep.
137    * @return the prepped name.
138    * @throws StringprepException If the name cannot be prepped with
139    * this profile.
140    * @throws NullPointerException If the name is null.
141    */
nameprep(String input)142   public static String nameprep(String input)
143     throws StringprepException,
144 	   NullPointerException
145   {
146     return nameprep(input, false);
147   }
148 
149   /**
150    * Preps a name according to the Stringprep profile defined in
151    * RFC3491.
152    *
153    * @param input the name to prep.
154    * @param allowUnassigned true if the name may contain unassigned
155    * code points.
156    * @return the prepped name.
157    * @throws StringprepException If the name cannot be prepped with
158    * this profile.
159    * @throws NullPointerException If the name is null.
160    */
nameprep(String input, boolean allowUnassigned)161   public static String nameprep(String input, boolean allowUnassigned)
162     throws StringprepException,
163 	   NullPointerException
164   {
165     if (input == null) {
166       throw new NullPointerException();
167     }
168 
169     final RangeSet.Range inputRange = RangeSet.createTextRange(input);
170     if (onlyPassThrough(NAMEPREP_PASSTHROUGH_RANGES, inputRange)) {
171       return input;
172     }
173     if (!allowUnassigned && RANGE_A1.containsAnyCodePoint(input, inputRange)) {
174       throw new StringprepException(StringprepException.CONTAINS_UNASSIGNED);
175     }
176 
177     StringBuilder s = new StringBuilder(input);
178 
179     filter(s, RANGE_B1);
180     map(s, RFC3454.B2search, RFC3454.B2replace);
181 
182     s = new StringBuilder(NFKC.normalizeNFKC(s.toString()));
183     final RangeSet.Range normalizedRange = RangeSet.createTextRange(s);
184     // B.3 is only needed if NFKC is not used, right?
185     // map(s, RFC3454.B3search, RFC3454.B3replace);
186     if (RANGE_C3_to_C8_C12_C22.containsAnyCodePoint(s, normalizedRange)) {
187       // Table C.9 only contains code points > 0xFFFF which Java
188       // doesn't handle
189       throw new StringprepException(StringprepException.CONTAINS_PROHIBITED);
190     }
191 
192     // Bidi handling
193     boolean r = RANGE_D1.containsAnyCodePoint(s, normalizedRange);
194     boolean l = RANGE_D2.containsAnyCodePoint(s, normalizedRange);
195 
196     // RFC 3454, section 6, requirement 1: already handled above (table C.8)
197 
198     // RFC 3454, section 6, requirement 2
199     if (r && l) {
200       throw new StringprepException(StringprepException.BIDI_BOTHRAL);
201     }
202 
203     // RFC 3454, section 6, requirement 3
204     if (r) {
205       if (!RANGE_D1.contains(s.charAt(0)) ||
206 	  !RANGE_D1.contains(s.charAt(s.length()-1))) {
207 	throw new StringprepException(StringprepException.BIDI_LTRAL);
208       }
209     }
210 
211     return s.toString();
212   }
213 
214   /**
215    * Preps a node name according to the Stringprep profile defined in
216    * RFC3920. Unassigned code points are not allowed.
217    *
218    * @param input the node name to prep.
219    * @return the prepped node name.
220    * @throws StringprepException If the node name cannot be prepped
221    * with this profile.
222    * @throws NullPointerException If the node name is null.
223    */
nodeprep(String input)224   public static String nodeprep(String input)
225     throws StringprepException,
226 	   NullPointerException
227   {
228     return nodeprep(input, false);
229   }
230 
231   /**
232    * Preps a node name according to the Stringprep profile defined in
233    * RFC3920.
234    *
235    * @param input the node name to prep.
236    * @param allowUnassigned true if the node name may contain
237    * unassigned code points.
238    * @return the prepped node name.
239    * @throws StringprepException If the node name cannot be prepped
240    * with this profile.
241    * @throws NullPointerException If the node name is null.
242    */
nodeprep(String input, boolean allowUnassigned)243   public static String nodeprep(String input, boolean allowUnassigned)
244     throws StringprepException,
245 	   NullPointerException
246   {
247     if (input == null) {
248       throw new NullPointerException();
249     }
250 
251     final RangeSet.Range inputRange = RangeSet.createTextRange(input);
252     if (onlyPassThrough(NODEPREP_PASSTHROUGH_RANGES, inputRange)) {
253       return input;
254     }
255     if (!allowUnassigned && RANGE_A1.containsAnyCodePoint(input, inputRange)) {
256       throw new StringprepException(StringprepException.CONTAINS_UNASSIGNED);
257     }
258 
259     StringBuilder s = new StringBuilder(input);
260 
261     filter(s, RANGE_B1);
262     map(s, RFC3454.B2search, RFC3454.B2replace);
263 
264     s = new StringBuilder(NFKC.normalizeNFKC(s.toString()));
265     final RangeSet.Range normalizedRange = RangeSet.createTextRange(s);
266     if (RANGE_C3_TO_C8_C11_12_21_22_NP_PROHIB.containsAnyCodePoint(s, normalizedRange))
267     {
268       throw new StringprepException(StringprepException.CONTAINS_PROHIBITED);
269     }
270 
271     // Bidi handling
272     boolean r = RANGE_D1.containsAnyCodePoint(s, normalizedRange);
273     boolean l = RANGE_D2.containsAnyCodePoint(s, normalizedRange);
274 
275     // RFC 3454, section 6, requirement 1: already handled above (table C.8)
276 
277     // RFC 3454, section 6, requirement 2
278     if (r && l) {
279       throw new	StringprepException(StringprepException.BIDI_BOTHRAL);
280     }
281 
282     // RFC 3454, section 6, requirement 3
283     if (r) {
284       if (!RANGE_D1.contains(s.charAt(0)) ||
285 	  !RANGE_D1.contains(s.charAt(s.length() - 1))) {
286 	throw new StringprepException(StringprepException.BIDI_LTRAL);
287       }
288     }
289 
290     return s.toString();
291   }
292 
293   /**
294    * Preps a resource name according to the Stringprep profile defined
295    * in RFC3920. Unassigned code points are not allowed.
296    *
297    * @param input the resource name to prep.
298    * @return the prepped node name.
299    * @throws StringprepException If the resource name cannot be prepped
300    * with this profile.
301    * @throws NullPointerException If the resource name is null.
302    */
resourceprep(String input)303   public static String resourceprep(String input)
304     throws StringprepException,
305 	   NullPointerException
306   {
307     return resourceprep(input, false);
308   }
309 
310   /**
311    * Preps a resource name according to the Stringprep profile defined
312    * in RFC3920.
313    *
314    * @param input the resource name to prep.
315    * @param allowUnassigned true if the resource name may contain
316    * unassigned code points.
317    * @return the prepped node name.
318    * @throws StringprepException If the resource name cannot be prepped
319    * with this profile.
320    * @throws NullPointerException If the resource name is null.
321    */
resourceprep(String input, boolean allowUnassigned)322   public static String resourceprep(String input, boolean allowUnassigned)
323     throws StringprepException,
324 	   NullPointerException
325   {
326     if (input == null) {
327       throw new NullPointerException();
328     }
329 
330     final RangeSet.Range inputRange = RangeSet.createTextRange(input);
331     if (onlyPassThrough(RESOURCEPREP_PASSTHROUGH_RANGES, inputRange)) {
332       return input;
333     }
334     if (!allowUnassigned && RANGE_A1.containsAnyCodePoint(input)) {
335       throw new StringprepException(StringprepException.CONTAINS_UNASSIGNED);
336     }
337 
338     StringBuilder s = new StringBuilder(input);
339 
340     filter(s, RANGE_B1);
341 
342     s = new StringBuilder(NFKC.normalizeNFKC(s.toString()));
343     final RangeSet.Range normalizedRange = RangeSet.createTextRange(s);
344 
345     if (RANGE_C3_to_C8_C12_C21_C22.containsAnyCodePoint(s, normalizedRange)) {
346       // Table C.9 only contains code points > 0xFFFF which Java
347       // doesn't handle
348 
349       throw new StringprepException(StringprepException.CONTAINS_PROHIBITED);
350     }
351 
352     // Bidi handling
353     boolean r = RANGE_D1.containsAnyCodePoint(s, normalizedRange);
354     boolean l = RANGE_D2.containsAnyCodePoint(s, normalizedRange);
355 
356     // RFC 3454, section 6, requirement 1: already handled above (table C.8)
357 
358     // RFC 3454, section 6, requirement 2
359     if (r && l) {
360       throw new	StringprepException(StringprepException.BIDI_BOTHRAL);
361     }
362 
363     // RFC 3454, section 6, requirement 3
364     if (r) {
365       if (!RANGE_D1.contains(s.charAt(0)) ||
366 	  !RANGE_D1.contains(s.charAt(s.length() - 1))) {
367 	throw new StringprepException(StringprepException.BIDI_LTRAL);
368       }
369     }
370 
371     return s.toString();
372   }
373 
onlyPassThrough(final RangeSet.Range[] passThroughs, final RangeSet.Range inputRange)374   private static boolean onlyPassThrough(final RangeSet.Range[] passThroughs,
375 					 final RangeSet.Range inputRange) {
376     for (final RangeSet.Range passThrough : passThroughs) {
377       if (passThrough.contains(inputRange)) {
378 	return true;
379       }
380     }
381     return false;
382   }
383 
filter(StringBuilder s, RangeSet f)384   static void filter(StringBuilder s, RangeSet f)
385   {
386     for (int j = 0; j < s.length(); ) {
387       if (f.contains(s.charAt(j))) {
388 	s.deleteCharAt(j);
389       } else {
390 	j++;
391       }
392     }
393   }
394 
map(StringBuilder s, char[] search, String[] replace)395   static void map(StringBuilder s, char[] search, String[] replace)
396   {
397     for (int i = 0; i < s.length(); i++) {
398       char c = s.charAt(i);
399       int mapIndex = Arrays.binarySearch(search, c);
400       if (mapIndex >= 0) {
401 	String replacement = replace[mapIndex];
402 	s.replace(i, i + 1, replacement);
403 	i += replacement.length() - 1;
404       }
405     }
406   }
407 }
408