1 /* 2 * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.net; 27 28 import java.io.*; 29 import java.nio.charset.Charset; 30 import java.nio.charset.IllegalCharsetNameException; 31 import java.nio.charset.UnsupportedCharsetException; 32 import java.util.Objects; 33 34 /** 35 * Utility class for HTML form decoding. This class contains static methods 36 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE> 37 * MIME format. 38 * <p> 39 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed 40 * that all characters in the encoded string are one of the following: 41 * "{@code a}" through "{@code z}", 42 * "{@code A}" through "{@code Z}", 43 * "{@code 0}" through "{@code 9}", and 44 * "{@code -}", "{@code _}", 45 * "{@code .}", and "{@code *}". The 46 * character "{@code %}" is allowed but is interpreted 47 * as the start of a special escaped sequence. 48 * <p> 49 * The following rules are applied in the conversion: 50 * 51 * <ul> 52 * <li>The alphanumeric characters "{@code a}" through 53 * "{@code z}", "{@code A}" through 54 * "{@code Z}" and "{@code 0}" 55 * through "{@code 9}" remain the same. 56 * <li>The special characters "{@code .}", 57 * "{@code -}", "{@code *}", and 58 * "{@code _}" remain the same. 59 * <li>The plus sign "{@code +}" is converted into a 60 * space character " " . 61 * <li>A sequence of the form "<i>{@code %xy}</i>" will be 62 * treated as representing a byte where <i>xy</i> is the two-digit 63 * hexadecimal representation of the 8 bits. Then, all substrings 64 * that contain one or more of these byte sequences consecutively 65 * will be replaced by the character(s) whose encoding would result 66 * in those consecutive bytes. 67 * The encoding scheme used to decode these characters may be specified, 68 * or if unspecified, the default encoding of the platform will be used. 69 * </ul> 70 * <p> 71 * There are two possible ways in which this decoder could deal with 72 * illegal strings. It could either leave illegal characters alone or 73 * it could throw an {@link java.lang.IllegalArgumentException}. 74 * Which approach the decoder takes is left to the 75 * implementation. 76 * 77 * @author Mark Chamness 78 * @author Michael McCloskey 79 * @since 1.2 80 */ 81 82 public class URLDecoder { 83 84 // The platform default encoding 85 static String dfltEncName = URLEncoder.dfltEncName; 86 87 /** 88 * Decodes a {@code x-www-form-urlencoded} string. 89 * The platform's default encoding is used to determine what characters 90 * are represented by any consecutive sequences of the form 91 * "<i>{@code %xy}</i>". 92 * @param s the {@code String} to decode 93 * @deprecated The resulting string may vary depending on the platform's 94 * default encoding. Instead, use the decode(String,String) method 95 * to specify the encoding. 96 * @return the newly decoded {@code String} 97 */ 98 @Deprecated decode(String s)99 public static String decode(String s) { 100 101 String str = null; 102 103 try { 104 str = decode(s, dfltEncName); 105 } catch (UnsupportedEncodingException e) { 106 // The system should always have the platform default 107 } 108 109 return str; 110 } 111 112 /** 113 * Decodes an {@code application/x-www-form-urlencoded} string using 114 * a specific encoding scheme. 115 * 116 * <p> 117 * This method behaves the same as {@linkplain decode(String s, Charset charset)} 118 * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset} 119 * using the given encoding name. 120 * 121 * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException} 122 * when illegal strings are encountered. 123 * 124 * @param s the {@code String} to decode 125 * @param enc The name of a supported 126 * <a href="../lang/package-summary.html#charenc">character 127 * encoding</a>. 128 * @return the newly decoded {@code String} 129 * @throws UnsupportedEncodingException 130 * If character encoding needs to be consulted, but 131 * named character encoding is not supported 132 * @see URLEncoder#encode(java.lang.String, java.lang.String) 133 * @since 1.4 134 */ decode(String s, String enc)135 public static String decode(String s, String enc) throws UnsupportedEncodingException { 136 if (enc.isEmpty()) { 137 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter"); 138 } 139 140 try { 141 Charset charset = Charset.forName(enc); 142 return decode(s, charset); 143 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 144 throw new UnsupportedEncodingException(enc); 145 } 146 } 147 148 /** 149 * Decodes an {@code application/x-www-form-urlencoded} string using 150 * a specific {@linkplain java.nio.charset.Charset Charset}. 151 * The supplied charset is used to determine 152 * what characters are represented by any consecutive sequences of the 153 * form "<i>{@code %xy}</i>". 154 * <p> 155 * <em><strong>Note:</strong> The <a href= 156 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> 157 * World Wide Web Consortium Recommendation</a> states that 158 * UTF-8 should be used. Not doing so may introduce 159 * incompatibilities.</em> 160 * 161 * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException} 162 * when illegal strings are encountered. 163 * 164 * @param s the {@code String} to decode 165 * @param charset the given charset 166 * @return the newly decoded {@code String} 167 * @throws NullPointerException if {@code s} or {@code charset} is {@code null} 168 * @throws IllegalArgumentException if the implementation encounters illegal 169 * characters 170 * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset) 171 * @since 10 172 */ decode(String s, Charset charset)173 public static String decode(String s, Charset charset) { 174 Objects.requireNonNull(charset, "Charset"); 175 boolean needToChange = false; 176 int numChars = s.length(); 177 StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars); 178 int i = 0; 179 180 char c; 181 byte[] bytes = null; 182 while (i < numChars) { 183 c = s.charAt(i); 184 switch (c) { 185 case '+': 186 sb.append(' '); 187 i++; 188 needToChange = true; 189 break; 190 case '%': 191 /* 192 * Starting with this instance of %, process all 193 * consecutive substrings of the form %xy. Each 194 * substring %xy will yield a byte. Convert all 195 * consecutive bytes obtained this way to whatever 196 * character(s) they represent in the provided 197 * encoding. 198 */ 199 200 try { 201 202 // (numChars-i)/3 is an upper bound for the number 203 // of remaining bytes 204 if (bytes == null) 205 bytes = new byte[(numChars-i)/3]; 206 int pos = 0; 207 208 while ( ((i+2) < numChars) && 209 (c=='%')) { 210 int v = Integer.parseInt(s, i + 1, i + 3, 16); 211 if (v < 0) 212 throw new IllegalArgumentException( 213 "URLDecoder: Illegal hex characters in escape " 214 + "(%) pattern - negative value"); 215 bytes[pos++] = (byte) v; 216 i+= 3; 217 if (i < numChars) 218 c = s.charAt(i); 219 } 220 221 // A trailing, incomplete byte encoding such as 222 // "%x" will cause an exception to be thrown 223 224 if ((i < numChars) && (c=='%')) 225 throw new IllegalArgumentException( 226 "URLDecoder: Incomplete trailing escape (%) pattern"); 227 228 sb.append(new String(bytes, 0, pos, charset)); 229 } catch (NumberFormatException e) { 230 throw new IllegalArgumentException( 231 "URLDecoder: Illegal hex characters in escape (%) pattern - " 232 + e.getMessage()); 233 } 234 needToChange = true; 235 break; 236 default: 237 sb.append(c); 238 i++; 239 break; 240 } 241 } 242 243 return (needToChange? sb.toString() : s); 244 } 245 } 246