1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 * 4 * Copyright (c) 1997-2017 Oracle and/or its affiliates. All rights reserved. 5 * 6 * The contents of this file are subject to the terms of either the GNU 7 * General Public License Version 2 only ("GPL") or the Common Development 8 * and Distribution License("CDDL") (collectively, the "License"). You 9 * may not use this file except in compliance with the License. You can 10 * obtain a copy of the License at 11 * https://oss.oracle.com/licenses/CDDL+GPL-1.1 12 * or LICENSE.txt. See the License for the specific 13 * language governing permissions and limitations under the License. 14 * 15 * When distributing the software, include this License Header Notice in each 16 * file and include the License file at LICENSE.txt. 17 * 18 * GPL Classpath Exception: 19 * Oracle designates this particular file as subject to the "Classpath" 20 * exception as provided by Oracle in the GPL Version 2 section of the License 21 * file that accompanied this code. 22 * 23 * Modifications: 24 * If applicable, add the following below the License Header, with the fields 25 * enclosed by brackets [] replaced by your own identifying information: 26 * "Portions Copyright [year] [name of copyright owner]" 27 * 28 * Contributor(s): 29 * If you wish your version of this file to be governed by only the CDDL or 30 * only the GPL Version 2, indicate your decision by adding "[Contributor] 31 * elects to include this software in this distribution under the [CDDL or GPL 32 * Version 2] license." If you don't indicate a single choice of license, a 33 * recipient has the option to distribute your version of this file under 34 * either the CDDL, the GPL Version 2 or to extend the choice of license to 35 * its licensees as provided above. However, if you add GPL Version 2 code 36 * and therefore, elected the GPL Version 2 license, then the option applies 37 * only if the new code is made subject to such option by the copyright 38 * holder. 39 */ 40 41 package com.sun.activation.registries; 42 43 /** 44 * A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ". 45 * Useful for parsing MIME content types. 46 */ 47 public class MailcapTokenizer { 48 49 public static final int UNKNOWN_TOKEN = 0; 50 public static final int START_TOKEN = 1; 51 public static final int STRING_TOKEN = 2; 52 public static final int EOI_TOKEN = 5; 53 public static final int SLASH_TOKEN = '/'; 54 public static final int SEMICOLON_TOKEN = ';'; 55 public static final int EQUALS_TOKEN = '='; 56 57 /** 58 * Constructor 59 * 60 * @param inputString the string to tokenize 61 */ MailcapTokenizer(String inputString)62 public MailcapTokenizer(String inputString) { 63 data = inputString; 64 dataIndex = 0; 65 dataLength = inputString.length(); 66 67 currentToken = START_TOKEN; 68 currentTokenValue = ""; 69 70 isAutoquoting = false; 71 autoquoteChar = ';'; 72 } 73 74 /** 75 * Set whether auto-quoting is on or off. 76 * 77 * Auto-quoting means that all characters after the first 78 * non-whitespace, non-control character up to the auto-quote 79 * terminator character or EOI (minus any whitespace immediatley 80 * preceeding it) is considered a token. 81 * 82 * This is required for handling command strings in a mailcap entry. 83 */ setIsAutoquoting(boolean value)84 public void setIsAutoquoting(boolean value) { 85 isAutoquoting = value; 86 } 87 88 /** 89 * Retrieve current token. 90 * 91 * @return The current token value 92 */ getCurrentToken()93 public int getCurrentToken() { 94 return currentToken; 95 } 96 97 /* 98 * Get a String that describes the given token. 99 */ nameForToken(int token)100 public static String nameForToken(int token) { 101 String name = "really unknown"; 102 103 switch(token) { 104 case UNKNOWN_TOKEN: 105 name = "unknown"; 106 break; 107 case START_TOKEN: 108 name = "start"; 109 break; 110 case STRING_TOKEN: 111 name = "string"; 112 break; 113 case EOI_TOKEN: 114 name = "EOI"; 115 break; 116 case SLASH_TOKEN: 117 name = "'/'"; 118 break; 119 case SEMICOLON_TOKEN: 120 name = "';'"; 121 break; 122 case EQUALS_TOKEN: 123 name = "'='"; 124 break; 125 } 126 127 return name; 128 } 129 130 /* 131 * Retrieve current token value. 132 * 133 * @return A String containing the current token value 134 */ getCurrentTokenValue()135 public String getCurrentTokenValue() { 136 return currentTokenValue; 137 } 138 139 /* 140 * Process the next token. 141 * 142 * @return the next token 143 */ nextToken()144 public int nextToken() { 145 if (dataIndex < dataLength) { 146 // skip white space 147 while ((dataIndex < dataLength) && 148 (isWhiteSpaceChar(data.charAt(dataIndex)))) { 149 ++dataIndex; 150 } 151 152 if (dataIndex < dataLength) { 153 // examine the current character and see what kind of token we have 154 char c = data.charAt(dataIndex); 155 if (isAutoquoting) { 156 if (c == ';' || c == '=') { 157 currentToken = c; 158 currentTokenValue = new Character(c).toString(); 159 ++dataIndex; 160 } else { 161 processAutoquoteToken(); 162 } 163 } else { 164 if (isStringTokenChar(c)) { 165 processStringToken(); 166 } else if ((c == '/') || (c == ';') || (c == '=')) { 167 currentToken = c; 168 currentTokenValue = new Character(c).toString(); 169 ++dataIndex; 170 } else { 171 currentToken = UNKNOWN_TOKEN; 172 currentTokenValue = new Character(c).toString(); 173 ++dataIndex; 174 } 175 } 176 } else { 177 currentToken = EOI_TOKEN; 178 currentTokenValue = null; 179 } 180 } else { 181 currentToken = EOI_TOKEN; 182 currentTokenValue = null; 183 } 184 185 return currentToken; 186 } 187 processStringToken()188 private void processStringToken() { 189 // capture the initial index 190 int initialIndex = dataIndex; 191 192 // skip to 1st non string token character 193 while ((dataIndex < dataLength) && 194 isStringTokenChar(data.charAt(dataIndex))) { 195 ++dataIndex; 196 } 197 198 currentToken = STRING_TOKEN; 199 currentTokenValue = data.substring(initialIndex, dataIndex); 200 } 201 processAutoquoteToken()202 private void processAutoquoteToken() { 203 // capture the initial index 204 int initialIndex = dataIndex; 205 206 // now skip to the 1st non-escaped autoquote termination character 207 // XXX - doesn't actually consider escaping 208 boolean foundTerminator = false; 209 while ((dataIndex < dataLength) && !foundTerminator) { 210 char c = data.charAt(dataIndex); 211 if (c != autoquoteChar) { 212 ++dataIndex; 213 } else { 214 foundTerminator = true; 215 } 216 } 217 218 currentToken = STRING_TOKEN; 219 currentTokenValue = 220 fixEscapeSequences(data.substring(initialIndex, dataIndex)); 221 } 222 isSpecialChar(char c)223 private static boolean isSpecialChar(char c) { 224 boolean lAnswer = false; 225 226 switch(c) { 227 case '(': 228 case ')': 229 case '<': 230 case '>': 231 case '@': 232 case ',': 233 case ';': 234 case ':': 235 case '\\': 236 case '"': 237 case '/': 238 case '[': 239 case ']': 240 case '?': 241 case '=': 242 lAnswer = true; 243 break; 244 } 245 246 return lAnswer; 247 } 248 isControlChar(char c)249 private static boolean isControlChar(char c) { 250 return Character.isISOControl(c); 251 } 252 isWhiteSpaceChar(char c)253 private static boolean isWhiteSpaceChar(char c) { 254 return Character.isWhitespace(c); 255 } 256 isStringTokenChar(char c)257 private static boolean isStringTokenChar(char c) { 258 return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c); 259 } 260 fixEscapeSequences(String inputString)261 private static String fixEscapeSequences(String inputString) { 262 int inputLength = inputString.length(); 263 StringBuffer buffer = new StringBuffer(); 264 buffer.ensureCapacity(inputLength); 265 266 for (int i = 0; i < inputLength; ++i) { 267 char currentChar = inputString.charAt(i); 268 if (currentChar != '\\') { 269 buffer.append(currentChar); 270 } else { 271 if (i < inputLength - 1) { 272 char nextChar = inputString.charAt(i + 1); 273 buffer.append(nextChar); 274 275 // force a skip over the next character too 276 ++i; 277 } else { 278 buffer.append(currentChar); 279 } 280 } 281 } 282 283 return buffer.toString(); 284 } 285 286 private String data; 287 private int dataIndex; 288 private int dataLength; 289 private int currentToken; 290 private String currentTokenValue; 291 private boolean isAutoquoting; 292 private char autoquoteChar; 293 294 /* 295 public static void main(String[] args) { 296 for (int i = 0; i < args.length; ++i) { 297 MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]); 298 299 System.out.println("Original: |" + args[i] + "|"); 300 301 int currentToken = tokenizer.nextToken(); 302 while (currentToken != EOI_TOKEN) { 303 switch(currentToken) { 304 case UNKNOWN_TOKEN: 305 System.out.println(" Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|"); 306 break; 307 case START_TOKEN: 308 System.out.println(" Start Token: |" + tokenizer.getCurrentTokenValue() + "|"); 309 break; 310 case STRING_TOKEN: 311 System.out.println(" String Token: |" + tokenizer.getCurrentTokenValue() + "|"); 312 break; 313 case EOI_TOKEN: 314 System.out.println(" EOI Token: |" + tokenizer.getCurrentTokenValue() + "|"); 315 break; 316 case SLASH_TOKEN: 317 System.out.println(" Slash Token: |" + tokenizer.getCurrentTokenValue() + "|"); 318 break; 319 case SEMICOLON_TOKEN: 320 System.out.println(" Semicolon Token: |" + tokenizer.getCurrentTokenValue() + "|"); 321 break; 322 case EQUALS_TOKEN: 323 System.out.println(" Equals Token: |" + tokenizer.getCurrentTokenValue() + "|"); 324 break; 325 default: 326 System.out.println(" Really Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|"); 327 break; 328 } 329 330 currentToken = tokenizer.nextToken(); 331 } 332 333 System.out.println(""); 334 } 335 } 336 */ 337 } 338