1 /*
2  * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.net;
27 
28 import java.io.*;
29 import java.nio.charset.Charset;
30 import java.nio.charset.IllegalCharsetNameException;
31 import java.nio.charset.UnsupportedCharsetException;
32 import java.util.Objects;
33 
34 /**
35  * Utility class for HTML form decoding. This class contains static methods
36  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
37  * MIME format.
38  * <p>
39  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
40  * that all characters in the encoded string are one of the following:
41  * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
42  * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
43  * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
44  * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
45  * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
46  * character &quot;{@code %}&quot; is allowed but is interpreted
47  * as the start of a special escaped sequence.
48  * <p>
49  * The following rules are applied in the conversion:
50  *
51  * <ul>
52  * <li>The alphanumeric characters &quot;{@code a}&quot; through
53  *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
54  *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
55  *     through &quot;{@code 9}&quot; remain the same.
56  * <li>The special characters &quot;{@code .}&quot;,
57  *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
58  *     &quot;{@code _}&quot; remain the same.
59  * <li>The plus sign &quot;{@code +}&quot; is converted into a
60  *     space character &quot; &nbsp; &quot; .
61  * <li>A sequence of the form "<i>{@code %xy}</i>" will be
62  *     treated as representing a byte where <i>xy</i> is the two-digit
63  *     hexadecimal representation of the 8 bits. Then, all substrings
64  *     that contain one or more of these byte sequences consecutively
65  *     will be replaced by the character(s) whose encoding would result
66  *     in those consecutive bytes.
67  *     The encoding scheme used to decode these characters may be specified,
68  *     or if unspecified, the default encoding of the platform will be used.
69  * </ul>
70  * <p>
71  * There are two possible ways in which this decoder could deal with
72  * illegal strings.  It could either leave illegal characters alone or
73  * it could throw an {@link java.lang.IllegalArgumentException}.
74  * Which approach the decoder takes is left to the
75  * implementation.
76  *
77  * @author  Mark Chamness
78  * @author  Michael McCloskey
79  * @since   1.2
80  */
81 
82 public class URLDecoder {
83 
84     // The platform default encoding
85     static String dfltEncName = URLEncoder.dfltEncName;
86 
87     /**
88      * Decodes a {@code x-www-form-urlencoded} string.
89      * The platform's default encoding is used to determine what characters
90      * are represented by any consecutive sequences of the form
91      * "<i>{@code %xy}</i>".
92      * @param s the {@code String} to decode
93      * @deprecated The resulting string may vary depending on the platform's
94      *          default encoding. Instead, use the decode(String,String) method
95      *          to specify the encoding.
96      * @return the newly decoded {@code String}
97      */
98     @Deprecated
decode(String s)99     public static String decode(String s) {
100 
101         String str = null;
102 
103         try {
104             str = decode(s, dfltEncName);
105         } catch (UnsupportedEncodingException e) {
106             // The system should always have the platform default
107         }
108 
109         return str;
110     }
111 
112     /**
113      * Decodes an {@code application/x-www-form-urlencoded} string using
114      * a specific encoding scheme.
115      *
116      * <p>
117      * This method behaves the same as {@linkplain decode(String s, Charset charset)}
118      * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
119      * using the given encoding name.
120      *
121      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
122      * when illegal strings are encountered.
123      *
124      * @param s the {@code String} to decode
125      * @param enc   The name of a supported
126      *    <a href="../lang/package-summary.html#charenc">character
127      *    encoding</a>.
128      * @return the newly decoded {@code String}
129      * @throws UnsupportedEncodingException
130      *             If character encoding needs to be consulted, but
131      *             named character encoding is not supported
132      * @see URLEncoder#encode(java.lang.String, java.lang.String)
133      * @since 1.4
134      */
decode(String s, String enc)135     public static String decode(String s, String enc) throws UnsupportedEncodingException {
136         if (enc.isEmpty()) {
137             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
138         }
139 
140         try {
141             Charset charset = Charset.forName(enc);
142             return decode(s, charset);
143         } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
144             throw new UnsupportedEncodingException(enc);
145         }
146     }
147 
148     /**
149      * Decodes an {@code application/x-www-form-urlencoded} string using
150      * a specific {@linkplain java.nio.charset.Charset Charset}.
151      * The supplied charset is used to determine
152      * what characters are represented by any consecutive sequences of the
153      * form "<i>{@code %xy}</i>".
154      * <p>
155      * <em><strong>Note:</strong> The <a href=
156      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
157      * World Wide Web Consortium Recommendation</a> states that
158      * UTF-8 should be used. Not doing so may introduce
159      * incompatibilities.</em>
160      *
161      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
162      * when illegal strings are encountered.
163      *
164      * @param s the {@code String} to decode
165      * @param charset the given charset
166      * @return the newly decoded {@code String}
167      * @throws NullPointerException if {@code s} or {@code charset} is {@code null}
168      * @throws IllegalArgumentException if the implementation encounters illegal
169      * characters
170      * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
171      * @since 10
172      */
decode(String s, Charset charset)173     public static String decode(String s, Charset charset) {
174         Objects.requireNonNull(charset, "Charset");
175         boolean needToChange = false;
176         int numChars = s.length();
177         StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
178         int i = 0;
179 
180         char c;
181         byte[] bytes = null;
182         while (i < numChars) {
183             c = s.charAt(i);
184             switch (c) {
185             case '+':
186                 sb.append(' ');
187                 i++;
188                 needToChange = true;
189                 break;
190             case '%':
191                 /*
192                  * Starting with this instance of %, process all
193                  * consecutive substrings of the form %xy. Each
194                  * substring %xy will yield a byte. Convert all
195                  * consecutive  bytes obtained this way to whatever
196                  * character(s) they represent in the provided
197                  * encoding.
198                  */
199 
200                 try {
201 
202                     // (numChars-i)/3 is an upper bound for the number
203                     // of remaining bytes
204                     if (bytes == null)
205                         bytes = new byte[(numChars-i)/3];
206                     int pos = 0;
207 
208                     while ( ((i+2) < numChars) &&
209                             (c=='%')) {
210                         int v = Integer.parseInt(s, i + 1, i + 3, 16);
211                         if (v < 0)
212                             throw new IllegalArgumentException(
213                                     "URLDecoder: Illegal hex characters in escape "
214                                             + "(%) pattern - negative value");
215                         bytes[pos++] = (byte) v;
216                         i+= 3;
217                         if (i < numChars)
218                             c = s.charAt(i);
219                     }
220 
221                     // A trailing, incomplete byte encoding such as
222                     // "%x" will cause an exception to be thrown
223 
224                     if ((i < numChars) && (c=='%'))
225                         throw new IllegalArgumentException(
226                          "URLDecoder: Incomplete trailing escape (%) pattern");
227 
228                     sb.append(new String(bytes, 0, pos, charset));
229                 } catch (NumberFormatException e) {
230                     throw new IllegalArgumentException(
231                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
232                     + e.getMessage());
233                 }
234                 needToChange = true;
235                 break;
236             default:
237                 sb.append(c);
238                 i++;
239                 break;
240             }
241         }
242 
243         return (needToChange? sb.toString() : s);
244     }
245 }
246