1 /*
2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3  *
4  * Copyright (c) 1997-2017 Oracle and/or its affiliates. All rights reserved.
5  *
6  * The contents of this file are subject to the terms of either the GNU
7  * General Public License Version 2 only ("GPL") or the Common Development
8  * and Distribution License("CDDL") (collectively, the "License").  You
9  * may not use this file except in compliance with the License.  You can
10  * obtain a copy of the License at
11  * https://oss.oracle.com/licenses/CDDL+GPL-1.1
12  * or LICENSE.txt.  See the License for the specific
13  * language governing permissions and limitations under the License.
14  *
15  * When distributing the software, include this License Header Notice in each
16  * file and include the License file at LICENSE.txt.
17  *
18  * GPL Classpath Exception:
19  * Oracle designates this particular file as subject to the "Classpath"
20  * exception as provided by Oracle in the GPL Version 2 section of the License
21  * file that accompanied this code.
22  *
23  * Modifications:
24  * If applicable, add the following below the License Header, with the fields
25  * enclosed by brackets [] replaced by your own identifying information:
26  * "Portions Copyright [year] [name of copyright owner]"
27  *
28  * Contributor(s):
29  * If you wish your version of this file to be governed by only the CDDL or
30  * only the GPL Version 2, indicate your decision by adding "[Contributor]
31  * elects to include this software in this distribution under the [CDDL or GPL
32  * Version 2] license."  If you don't indicate a single choice of license, a
33  * recipient has the option to distribute your version of this file under
34  * either the CDDL, the GPL Version 2 or to extend the choice of license to
35  * its licensees as provided above.  However, if you add GPL Version 2 code
36  * and therefore, elected the GPL Version 2 license, then the option applies
37  * only if the new code is made subject to such option by the copyright
38  * holder.
39  */
40 
41 package	com.sun.activation.registries;
42 
43 /**
44  *	A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
45  *	Useful for parsing MIME content types.
46  */
47 public class MailcapTokenizer {
48 
49     public static final int UNKNOWN_TOKEN = 0;
50     public static final int START_TOKEN = 1;
51     public static final int STRING_TOKEN = 2;
52     public static final int EOI_TOKEN = 5;
53     public static final int SLASH_TOKEN = '/';
54     public static final int SEMICOLON_TOKEN = ';';
55     public static final int EQUALS_TOKEN = '=';
56 
57     /**
58      *  Constructor
59      *
60      *  @param  inputString the string to tokenize
61      */
MailcapTokenizer(String inputString)62     public MailcapTokenizer(String inputString) {
63 	data = inputString;
64 	dataIndex = 0;
65 	dataLength = inputString.length();
66 
67 	currentToken = START_TOKEN;
68 	currentTokenValue = "";
69 
70 	isAutoquoting = false;
71 	autoquoteChar = ';';
72     }
73 
74     /**
75      *  Set whether auto-quoting is on or off.
76      *
77      *  Auto-quoting means that all characters after the first
78      *  non-whitespace, non-control character up to the auto-quote
79      *  terminator character or EOI (minus any whitespace immediatley
80      *  preceeding it) is considered a token.
81      *
82      *  This is required for handling command strings in a mailcap entry.
83      */
setIsAutoquoting(boolean value)84     public void setIsAutoquoting(boolean value) {
85 	isAutoquoting = value;
86     }
87 
88     /**
89      *  Retrieve current token.
90      *
91      *  @return    The current token value
92      */
getCurrentToken()93     public int getCurrentToken() {
94 	return currentToken;
95     }
96 
97     /*
98      *  Get a String that describes the given token.
99      */
nameForToken(int token)100     public static String nameForToken(int token) {
101 	String name = "really unknown";
102 
103 	switch(token) {
104 	    case UNKNOWN_TOKEN:
105 		name = "unknown";
106 		break;
107 	    case START_TOKEN:
108 		name = "start";
109 		break;
110 	    case STRING_TOKEN:
111 		name = "string";
112 		break;
113 	    case EOI_TOKEN:
114 		name = "EOI";
115 		break;
116 	    case SLASH_TOKEN:
117 		name = "'/'";
118 		break;
119 	    case SEMICOLON_TOKEN:
120 		name = "';'";
121 		break;
122 	    case EQUALS_TOKEN:
123 		name = "'='";
124 		break;
125 	}
126 
127 	return name;
128     }
129 
130     /*
131      *  Retrieve current token value.
132      *
133      *  @return    A String containing the current token value
134      */
getCurrentTokenValue()135     public String getCurrentTokenValue() {
136 	return currentTokenValue;
137     }
138 
139     /*
140      *  Process the next token.
141      *
142      *  @return    the next token
143      */
nextToken()144     public int nextToken() {
145 	if (dataIndex < dataLength) {
146 	    //  skip white space
147 	    while ((dataIndex < dataLength) &&
148 		    (isWhiteSpaceChar(data.charAt(dataIndex)))) {
149 		++dataIndex;
150 	    }
151 
152 	    if (dataIndex < dataLength) {
153 		//  examine the current character and see what kind of token we have
154 		char c = data.charAt(dataIndex);
155 		if (isAutoquoting) {
156 		    if (c == ';' || c == '=') {
157 			currentToken = c;
158 			currentTokenValue = new Character(c).toString();
159 			++dataIndex;
160 		    } else {
161 			processAutoquoteToken();
162 		    }
163 		} else {
164 		    if (isStringTokenChar(c)) {
165 			processStringToken();
166 		    } else if ((c == '/') || (c == ';') || (c == '=')) {
167 			currentToken = c;
168 			currentTokenValue = new Character(c).toString();
169 			++dataIndex;
170 		    } else {
171 			currentToken = UNKNOWN_TOKEN;
172 			currentTokenValue = new Character(c).toString();
173 			++dataIndex;
174 		    }
175 		}
176 	    } else {
177 		currentToken = EOI_TOKEN;
178 		currentTokenValue = null;
179 	    }
180 	} else {
181 	    currentToken = EOI_TOKEN;
182 	    currentTokenValue = null;
183 	}
184 
185 	return currentToken;
186     }
187 
processStringToken()188     private void processStringToken() {
189 	//  capture the initial index
190 	int initialIndex = dataIndex;
191 
192 	//  skip to 1st non string token character
193 	while ((dataIndex < dataLength) &&
194 		isStringTokenChar(data.charAt(dataIndex))) {
195 	    ++dataIndex;
196 	}
197 
198 	currentToken = STRING_TOKEN;
199 	currentTokenValue = data.substring(initialIndex, dataIndex);
200     }
201 
processAutoquoteToken()202     private void processAutoquoteToken() {
203 	//  capture the initial index
204 	int initialIndex = dataIndex;
205 
206 	//  now skip to the 1st non-escaped autoquote termination character
207 	//  XXX - doesn't actually consider escaping
208 	boolean foundTerminator = false;
209 	while ((dataIndex < dataLength) && !foundTerminator) {
210 	    char c = data.charAt(dataIndex);
211 	    if (c != autoquoteChar) {
212 		++dataIndex;
213 	    } else {
214 		foundTerminator = true;
215 	    }
216 	}
217 
218 	currentToken = STRING_TOKEN;
219 	currentTokenValue =
220 	    fixEscapeSequences(data.substring(initialIndex, dataIndex));
221     }
222 
isSpecialChar(char c)223     private static boolean isSpecialChar(char c) {
224 	boolean lAnswer = false;
225 
226 	switch(c) {
227 	    case '(':
228 	    case ')':
229 	    case '<':
230 	    case '>':
231 	    case '@':
232 	    case ',':
233 	    case ';':
234 	    case ':':
235 	    case '\\':
236 	    case '"':
237 	    case '/':
238 	    case '[':
239 	    case ']':
240 	    case '?':
241 	    case '=':
242 		lAnswer = true;
243 		break;
244 	}
245 
246 	return lAnswer;
247     }
248 
isControlChar(char c)249     private static boolean isControlChar(char c) {
250 	return Character.isISOControl(c);
251     }
252 
isWhiteSpaceChar(char c)253     private static boolean isWhiteSpaceChar(char c) {
254 	return Character.isWhitespace(c);
255     }
256 
isStringTokenChar(char c)257     private static boolean isStringTokenChar(char c) {
258 	return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
259     }
260 
fixEscapeSequences(String inputString)261     private static String fixEscapeSequences(String inputString) {
262 	int inputLength = inputString.length();
263 	StringBuffer buffer = new StringBuffer();
264 	buffer.ensureCapacity(inputLength);
265 
266 	for (int i = 0; i < inputLength; ++i) {
267 	    char currentChar = inputString.charAt(i);
268 	    if (currentChar != '\\') {
269 		buffer.append(currentChar);
270 	    } else {
271 		if (i < inputLength - 1) {
272 		    char nextChar = inputString.charAt(i + 1);
273 		    buffer.append(nextChar);
274 
275 		    //  force a skip over the next character too
276 		    ++i;
277 		} else {
278 		    buffer.append(currentChar);
279 		}
280 	    }
281 	}
282 
283 	return buffer.toString();
284     }
285 
286     private String  data;
287     private int     dataIndex;
288     private int     dataLength;
289     private int     currentToken;
290     private String  currentTokenValue;
291     private boolean isAutoquoting;
292     private char    autoquoteChar;
293 
294     /*
295     public static void main(String[] args) {
296 	for (int i = 0; i < args.length; ++i) {
297 	    MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
298 
299 	    System.out.println("Original: |" + args[i] + "|");
300 
301 	    int currentToken = tokenizer.nextToken();
302 	    while (currentToken != EOI_TOKEN) {
303 		switch(currentToken) {
304 		    case UNKNOWN_TOKEN:
305 			System.out.println("  Unknown Token:           |" + tokenizer.getCurrentTokenValue() + "|");
306 			break;
307 		    case START_TOKEN:
308 			System.out.println("  Start Token:             |" + tokenizer.getCurrentTokenValue() + "|");
309 			break;
310 		    case STRING_TOKEN:
311 			System.out.println("  String Token:            |" + tokenizer.getCurrentTokenValue() + "|");
312 			break;
313 		    case EOI_TOKEN:
314 			System.out.println("  EOI Token:               |" + tokenizer.getCurrentTokenValue() + "|");
315 			break;
316 		    case SLASH_TOKEN:
317 			System.out.println("  Slash Token:             |" + tokenizer.getCurrentTokenValue() + "|");
318 			break;
319 		    case SEMICOLON_TOKEN:
320 			System.out.println("  Semicolon Token:         |" + tokenizer.getCurrentTokenValue() + "|");
321 			break;
322 		    case EQUALS_TOKEN:
323 			System.out.println("  Equals Token:            |" + tokenizer.getCurrentTokenValue() + "|");
324 			break;
325 		    default:
326 			System.out.println("  Really Unknown Token:    |" + tokenizer.getCurrentTokenValue() + "|");
327 			break;
328 		}
329 
330 		currentToken = tokenizer.nextToken();
331 	    }
332 
333 	    System.out.println("");
334 	}
335     }
336     */
337 }
338