1 /*
2  * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/util/URIUtil.java,v 1.27 2004/05/05 20:34:01 olegk Exp $
3  * $Revision: 507321 $
4  * $Date: 2007-02-14 01:10:51 +0100 (Wed, 14 Feb 2007) $
5  *
6  * ====================================================================
7  *
8  *  Licensed to the Apache Software Foundation (ASF) under one or more
9  *  contributor license agreements.  See the NOTICE file distributed with
10  *  this work for additional information regarding copyright ownership.
11  *  The ASF licenses this file to You under the Apache License, Version 2.0
12  *  (the "License"); you may not use this file except in compliance with
13  *  the License.  You may obtain a copy of the License at
14  *
15  *      http://www.apache.org/licenses/LICENSE-2.0
16  *
17  *  Unless required by applicable law or agreed to in writing, software
18  *  distributed under the License is distributed on an "AS IS" BASIS,
19  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20  *  See the License for the specific language governing permissions and
21  *  limitations under the License.
22  * ====================================================================
23  *
24  * This software consists of voluntary contributions made by many
25  * individuals on behalf of the Apache Software Foundation.  For more
26  * information on the Apache Software Foundation, please see
27  * <http://www.apache.org/>.
28  *
29  */
30 
31 package org.apache.commons.httpclient.util;
32 
33 import java.util.BitSet;
34 
35 import org.apache.commons.codec.DecoderException;
36 import org.apache.commons.codec.net.URLCodec;
37 import org.apache.commons.httpclient.URI;
38 import org.apache.commons.httpclient.URIException;
39 
40 /**
41  * The URI escape and character encoding and decoding utility.
42  * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
43  * than {@link org.apache.commons.httpclient.URI}.
44  *
45  * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
46  * @version $Revision: 507321 $ $Date: 2002/03/14 15:14:01
47  */
48 public class URIUtil {
49 
50     // ----------------------------------------------------- Instance variables
51 
52     protected static final BitSet empty = new BitSet(1);
53 
54     // ---------------------------------------------------------- URI utilities
55 
56     /**
57      * Get the basename of an URI.   It's possibly an empty string.
58      *
59      * @param uri a string regarded an URI
60      * @return the basename string; an empty string if the path ends with slash
61      */
getName(String uri)62     public static String getName(String uri) {
63         if (uri == null || uri.length() == 0) { return uri; }
64         String path = URIUtil.getPath(uri);
65         int at = path.lastIndexOf("/");
66         int to = path.length();
67         return (at >= 0) ? path.substring(at + 1, to) : path;
68     }
69 
70 
71     /**
72      * Get the query of an URI.
73      *
74      * @param uri a string regarded an URI
75      * @return the query string; <code>null</code> if empty or undefined
76      */
getQuery(String uri)77     public static String getQuery(String uri) {
78         if (uri == null || uri.length() == 0) { return null; }
79         // consider of net_path
80         int at = uri.indexOf("//");
81         int from = uri.indexOf(
82             "/",
83             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
84         );
85         // the authority part of URI ignored
86         int to = uri.length();
87         // reuse the at and from variables to consider the query
88         at = uri.indexOf("?", from);
89         if (at >= 0) {
90             from = at + 1;
91         } else {
92             return null;
93         }
94         // check the fragment
95         if (uri.lastIndexOf("#") > from) {
96             to = uri.lastIndexOf("#");
97         }
98         // get the path and query.
99         return (from < 0 || from == to) ? null : uri.substring(from, to);
100     }
101 
102 
103     /**
104      * Get the path of an URI.
105      *
106      * @param uri a string regarded an URI
107      * @return the path string
108      */
getPath(String uri)109     public static String getPath(String uri) {
110         if (uri == null) {
111             return null;
112         }
113         // consider of net_path
114         int at = uri.indexOf("//");
115         int from = uri.indexOf(
116             "/",
117             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
118         );
119         // the authority part of URI ignored
120         int to = uri.length();
121         // check the query
122         if (uri.indexOf('?', from) != -1) {
123             to = uri.indexOf('?', from);
124         }
125         // check the fragment
126         if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
127             to = uri.lastIndexOf("#");
128         }
129         // get only the path.
130         return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
131     }
132 
133 
134     /**
135      * Get the path and query of an URI.
136      *
137      * @param uri a string regarded an URI
138      * @return the path and query string
139      */
getPathQuery(String uri)140     public static String getPathQuery(String uri) {
141         if (uri == null) {
142             return null;
143         }
144         // consider of net_path
145         int at = uri.indexOf("//");
146         int from = uri.indexOf(
147             "/",
148             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
149         );
150         // the authority part of URI ignored
151         int to = uri.length();
152         // Ignore the '?' mark so to ignore the query.
153         // check the fragment
154         if (uri.lastIndexOf("#") > from) {
155             to = uri.lastIndexOf("#");
156         }
157         // get the path and query.
158         return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
159     }
160 
161 
162     /**
163      * Get the path of an URI and its rest part.
164      *
165      * @param uri a string regarded an URI
166      * @return the string from the path part
167      */
getFromPath(String uri)168     public static String getFromPath(String uri) {
169         if (uri == null) {
170             return null;
171         }
172         // consider of net_path
173         int at = uri.indexOf("//");
174         int from = uri.indexOf(
175             "/",
176             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
177         );
178         // get the path and its rest.
179         return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
180     }
181 
182     // ----------------------------------------------------- Encoding utilities
183 
184     /**
185      * Get the all escaped and encoded string with the default protocl charset.
186      * It's the same function to use <code>encode(String unescaped, Bitset
187      * empty, URI.getDefaultProtocolCharset())</code>.
188      *
189      * @param unescaped an unescaped string
190      * @return the escaped string
191      *
192      * @throws URIException if the default protocol charset is not supported
193      *
194      * @see URI#getDefaultProtocolCharset
195      * @see #encode
196      */
encodeAll(String unescaped)197     public static String encodeAll(String unescaped) throws URIException {
198         return encodeAll(unescaped, URI.getDefaultProtocolCharset());
199     }
200 
201 
202     /**
203      * Get the all escaped and encoded string with a given charset.
204      * It's the same function to use <code>encode(String unescaped, Bitset
205      * empty, String charset)</code>.
206      *
207      * @param unescaped an unescaped string
208      * @param charset the charset
209      * @return the escaped string
210      *
211      * @throws URIException if the charset is not supported
212      *
213      * @see #encode
214      */
encodeAll(String unescaped, String charset)215     public static String encodeAll(String unescaped, String charset)
216         throws URIException {
217 
218         return encode(unescaped, empty, charset);
219     }
220 
221 
222     /**
223      * Escape and encode a string regarded as within the authority component of
224      * an URI with the default protocol charset.
225      * Within the authority component, the characters ";", ":", "@", "?", and
226      * "/" are reserved.
227      *
228      * @param unescaped an unescaped string
229      * @return the escaped string
230      *
231      * @throws URIException if the default protocol charset is not supported
232      *
233      * @see URI#getDefaultProtocolCharset
234      * @see #encode
235      */
encodeWithinAuthority(String unescaped)236     public static String encodeWithinAuthority(String unescaped)
237         throws URIException {
238 
239         return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset());
240     }
241 
242 
243     /**
244      * Escape and encode a string regarded as within the authority component of
245      * an URI with a given charset.
246      * Within the authority component, the characters ";", ":", "@", "?", and
247      * "/" are reserved.
248      *
249      * @param unescaped an unescaped string
250      * @param charset the charset
251      * @return the escaped string
252      *
253      * @throws URIException if the charset is not supported
254      *
255      * @see #encode
256      */
encodeWithinAuthority(String unescaped, String charset)257     public static String encodeWithinAuthority(String unescaped, String charset)
258         throws URIException {
259 
260         return encode(unescaped, URI.allowed_within_authority, charset);
261     }
262 
263 
264     /**
265      * Escape and encode a string regarded as the path and query components of
266      * an URI with the default protocol charset.
267      *
268      * @param unescaped an unescaped string
269      * @return the escaped string
270      *
271      * @throws URIException if the default protocol charset is not supported
272      *
273      * @see URI#getDefaultProtocolCharset
274      * @see #encode
275      */
encodePathQuery(String unescaped)276     public static String encodePathQuery(String unescaped) throws URIException {
277         return encodePathQuery(unescaped, URI.getDefaultProtocolCharset());
278     }
279 
280 
281     /**
282      * Escape and encode a string regarded as the path and query components of
283      * an URI with a given charset.
284      *
285      * @param unescaped an unescaped string
286      * @param charset the charset
287      * @return the escaped string
288      *
289      * @throws URIException if the charset is not supported
290      *
291      * @see #encode
292      */
encodePathQuery(String unescaped, String charset)293     public static String encodePathQuery(String unescaped, String charset)
294         throws URIException {
295 
296         int at = unescaped.indexOf('?');
297         if (at < 0) {
298             return encode(unescaped, URI.allowed_abs_path, charset);
299         }
300         // else
301         return  encode(unescaped.substring(0, at), URI.allowed_abs_path, charset)
302             + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset);
303     }
304 
305 
306     /**
307      * Escape and encode a string regarded as within the path component of an
308      * URI with the default protocol charset.
309      * The path may consist of a sequence of path segments separated by a
310      * single slash "/" character.  Within a path segment, the characters
311      * "/", ";", "=", and "?" are reserved.
312      *
313      * @param unescaped an unescaped string
314      * @return the escaped string
315      *
316      * @throws URIException if the default protocol charset is not supported
317      *
318      * @see URI#getDefaultProtocolCharset
319      * @see #encode
320      */
encodeWithinPath(String unescaped)321     public static String encodeWithinPath(String unescaped)
322         throws URIException {
323 
324         return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset());
325     }
326 
327 
328     /**
329      * Escape and encode a string regarded as within the path component of an
330      * URI with a given charset.
331      * The path may consist of a sequence of path segments separated by a
332      * single slash "/" character.  Within a path segment, the characters
333      * "/", ";", "=", and "?" are reserved.
334      *
335      * @param unescaped an unescaped string
336      * @param charset the charset
337      * @return the escaped string
338      *
339      * @throws URIException if the charset is not supported
340      *
341      * @see #encode
342      */
encodeWithinPath(String unescaped, String charset)343     public static String encodeWithinPath(String unescaped, String charset)
344         throws URIException {
345 
346         return encode(unescaped, URI.allowed_within_path, charset);
347     }
348 
349 
350     /**
351      * Escape and encode a string regarded as the path component of an URI with
352      * the default protocol charset.
353      *
354      * @param unescaped an unescaped string
355      * @return the escaped string
356      *
357      * @throws URIException if the default protocol charset is not supported
358      *
359      * @see URI#getDefaultProtocolCharset
360      * @see #encode
361      */
encodePath(String unescaped)362     public static String encodePath(String unescaped) throws URIException {
363         return encodePath(unescaped, URI.getDefaultProtocolCharset());
364     }
365 
366 
367     /**
368      * Escape and encode a string regarded as the path component of an URI with
369      * a given charset.
370      *
371      * @param unescaped an unescaped string
372      * @param charset the charset
373      * @return the escaped string
374      *
375      * @throws URIException if the charset is not supported
376      *
377      * @see #encode
378      */
encodePath(String unescaped, String charset)379     public static String encodePath(String unescaped, String charset)
380         throws URIException {
381 
382         return encode(unescaped, URI.allowed_abs_path, charset);
383     }
384 
385 
386     /**
387      * Escape and encode a string regarded as within the query component of an
388      * URI with the default protocol charset.
389      * When a query comprise the name and value pairs, it is used in order
390      * to encode each name and value string.  The reserved special characters
391      * within a query component are being included in encoding the query.
392      *
393      * @param unescaped an unescaped string
394      * @return the escaped string
395      *
396      * @throws URIException if the default protocol charset is not supported
397      *
398      * @see URI#getDefaultProtocolCharset
399      * @see #encode
400      */
encodeWithinQuery(String unescaped)401     public static String encodeWithinQuery(String unescaped)
402         throws URIException {
403 
404         return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset());
405     }
406 
407 
408     /**
409      * Escape and encode a string regarded as within the query component of an
410      * URI with a given charset.
411      * When a query comprise the name and value pairs, it is used in order
412      * to encode each name and value string.  The reserved special characters
413      * within a query component are being included in encoding the query.
414      *
415      * @param unescaped an unescaped string
416      * @param charset the charset
417      * @return the escaped string
418      *
419      * @throws URIException if the charset is not supported
420      *
421      * @see #encode
422      */
encodeWithinQuery(String unescaped, String charset)423     public static String encodeWithinQuery(String unescaped, String charset)
424         throws URIException {
425 
426         return encode(unescaped, URI.allowed_within_query, charset);
427     }
428 
429 
430     /**
431      * Escape and encode a string regarded as the query component of an URI with
432      * the default protocol charset.
433      * When a query string is not misunderstood the reserved special characters
434      * ("&amp;", "=", "+", ",", and "$") within a query component, this method
435      * is recommended to use in encoding the whole query.
436      *
437      * @param unescaped an unescaped string
438      * @return the escaped string
439      *
440      * @throws URIException if the default protocol charset is not supported
441      *
442      * @see URI#getDefaultProtocolCharset
443      * @see #encode
444      */
encodeQuery(String unescaped)445     public static String encodeQuery(String unescaped) throws URIException {
446         return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
447     }
448 
449 
450     /**
451      * Escape and encode a string regarded as the query component of an URI with
452      * a given charset.
453      * When a query string is not misunderstood the reserved special characters
454      * ("&amp;", "=", "+", ",", and "$") within a query component, this method
455      * is recommended to use in encoding the whole query.
456      *
457      * @param unescaped an unescaped string
458      * @param charset the charset
459      * @return the escaped string
460      *
461      * @throws URIException if the charset is not supported
462      *
463      * @see #encode
464      */
encodeQuery(String unescaped, String charset)465     public static String encodeQuery(String unescaped, String charset)
466         throws URIException {
467 
468         return encode(unescaped, URI.allowed_query, charset);
469     }
470 
471 
472     /**
473      * Escape and encode a given string with allowed characters not to be
474      * escaped and the default protocol charset.
475      *
476      * @param unescaped a string
477      * @param allowed allowed characters not to be escaped
478      * @return the escaped string
479      *
480      * @throws URIException if the default protocol charset is not supported
481      *
482      * @see URI#getDefaultProtocolCharset
483      */
encode(String unescaped, BitSet allowed)484     public static String encode(String unescaped, BitSet allowed)
485         throws URIException {
486 
487         return encode(unescaped, allowed, URI.getDefaultProtocolCharset());
488     }
489 
490 
491     /**
492      * Escape and encode a given string with allowed characters not to be
493      * escaped and a given charset.
494      *
495      * @param unescaped a string
496      * @param allowed allowed characters not to be escaped
497      * @param charset the charset
498      * @return the escaped string
499      */
encode(String unescaped, BitSet allowed, String charset)500     public static String encode(String unescaped, BitSet allowed,
501             String charset) throws URIException {
502         byte[] rawdata = URLCodec.encodeUrl(allowed,
503             EncodingUtil.getBytes(unescaped, charset));
504         return EncodingUtil.getAsciiString(rawdata);
505     }
506 
507 
508     /**
509      * Unescape and decode a given string regarded as an escaped string with the
510      * default protocol charset.
511      *
512      * @param escaped a string
513      * @return the unescaped string
514      *
515      * @throws URIException if the string cannot be decoded (invalid)
516      *
517      * @see URI#getDefaultProtocolCharset
518      */
decode(String escaped)519     public static String decode(String escaped) throws URIException {
520         try {
521             byte[] rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(escaped));
522             return EncodingUtil.getString(rawdata, URI.getDefaultProtocolCharset());
523         } catch (DecoderException e) {
524             throw new URIException(e.getMessage());
525         }
526     }
527 
528     /**
529      * Unescape and decode a given string regarded as an escaped string.
530      *
531      * @param escaped a string
532      * @param charset the charset
533      * @return the unescaped string
534      *
535      * @throws URIException if the charset is not supported
536      *
537      * @see Coder#decode
538      */
decode(String escaped, String charset)539     public static String decode(String escaped, String charset)
540         throws URIException {
541 
542         return Coder.decode(escaped.toCharArray(), charset);
543     }
544 
545     // ---------------------------------------------------------- Inner classes
546 
547     /**
548      * The basic and internal utility for URI escape and character encoding and
549      * decoding.
550      *
551      * @deprecated use org.apache.commons.codec.net.URLCodec
552      */
553     protected static class Coder extends URI {
554 
555         /**
556          * Escape and encode a given string with allowed characters not to be
557          * escaped.
558          *
559          * @param unescapedComponent an unescaped component
560          * @param allowed allowed characters not to be escaped
561          * @param charset the charset to encode
562          * @return the escaped and encoded string
563          *
564          * @throws URIException if the charset is not supported
565          *
566          * @deprecated use org.apache.commons.codec.net.URLCodec
567          */
encode(String unescapedComponent, BitSet allowed, String charset)568         public static char[] encode(String unescapedComponent, BitSet allowed, String charset)
569             throws URIException {
570 
571             return URI.encode(unescapedComponent, allowed, charset);
572         }
573 
574 
575         /**
576          * Unescape and decode a given string.
577          *
578          * @param escapedComponent an being-unescaped component
579          * @param charset the charset to decode
580          * @return the escaped and encoded string
581          *
582          * @throws URIException if the charset is not supported
583          *
584          * @deprecated use org.apache.commons.codec.net.URLCodec
585          */
decode(char[] escapedComponent, String charset)586         public static String decode(char[] escapedComponent, String charset)
587             throws URIException {
588 
589             return URI.decode(escapedComponent, charset);
590         }
591 
592 
593         /**
594          * Verify whether a given string is escaped or not
595          *
596          * @param original given characters
597          * @return true if the given character array is 7 bit ASCII-compatible.
598          */
verifyEscaped(char[] original)599         public static boolean verifyEscaped(char[] original) {
600             for (int i = 0; i < original.length; i++) {
601                 int c = original[i];
602                 if (c > 128) {
603                     return false;
604                 } else if (c == '%') {
605                     if (Character.digit(original[++i], 16) == -1
606                         || Character.digit(original[++i], 16) == -1) {
607                         return false;
608                     }
609                 }
610             }
611             return true;
612         }
613 
614 
615         /**
616          * Replace from a given character to given character in an array order
617          * for a given string.
618          *
619          * @param original a given string
620          * @param from a replacing character array
621          * @param to a replaced character array
622          * @return the replaced string
623          */
replace(String original, char[] from, char[] to)624         public static String replace(String original, char[] from, char[] to) {
625             for (int i = from.length; i > 0; --i) {
626                 original = replace(original, from[i], to[i]);
627             }
628             return original;
629         }
630 
631 
632         /**
633          * Replace from a given character to given character for a given string.
634          *
635          * @param original a given string
636          * @param from a replacing character array
637          * @param to a replaced character array
638          * @return the replaced string
639          */
replace(String original, char from, char to)640         public static String replace(String original, char from, char to) {
641             StringBuffer result = new StringBuffer(original.length());
642             int at, saved = 0;
643             do {
644                 at = original.indexOf(from);
645                 if (at >= 0) {
646                     result.append(original.substring(0, at));
647                     result.append(to);
648                 } else {
649                     result.append(original.substring(saved));
650                 }
651                 saved = at;
652             } while (at >= 0);
653             return result.toString();
654         }
655     }
656 
657 }
658 
659