1 /*
2  * Copyright (c) 1997, 2006, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package com.sun.activation.registries;
27 
28 /**
29  *      A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
30  *      Useful for parsing MIME content types.
31  */
32 public class MailcapTokenizer {
33 
34     public static final int UNKNOWN_TOKEN = 0;
35     public static final int START_TOKEN = 1;
36     public static final int STRING_TOKEN = 2;
37     public static final int EOI_TOKEN = 5;
38     public static final int SLASH_TOKEN = '/';
39     public static final int SEMICOLON_TOKEN = ';';
40     public static final int EQUALS_TOKEN = '=';
41 
42     /**
43      *  Constructor
44      *
45      *  @parameter  inputString the string to tokenize
46      */
MailcapTokenizer(String inputString)47     public MailcapTokenizer(String inputString) {
48         data = inputString;
49         dataIndex = 0;
50         dataLength = inputString.length();
51 
52         currentToken = START_TOKEN;
53         currentTokenValue = "";
54 
55         isAutoquoting = false;
56         autoquoteChar = ';';
57     }
58 
59     /**
60      *  Set whether auto-quoting is on or off.
61      *
62      *  Auto-quoting means that all characters after the first
63      *  non-whitespace, non-control character up to the auto-quote
64      *  terminator character or EOI (minus any whitespace immediatley
65      *  preceeding it) is considered a token.
66      *
67      *  This is required for handling command strings in a mailcap entry.
68      */
setIsAutoquoting(boolean value)69     public void setIsAutoquoting(boolean value) {
70         isAutoquoting = value;
71     }
72 
73     /**
74      *  Retrieve current token.
75      *
76      *  @returns    The current token value
77      */
getCurrentToken()78     public int getCurrentToken() {
79         return currentToken;
80     }
81 
82     /*
83      *  Get a String that describes the given token.
84      */
nameForToken(int token)85     public static String nameForToken(int token) {
86         String name = "really unknown";
87 
88         switch(token) {
89             case UNKNOWN_TOKEN:
90                 name = "unknown";
91                 break;
92             case START_TOKEN:
93                 name = "start";
94                 break;
95             case STRING_TOKEN:
96                 name = "string";
97                 break;
98             case EOI_TOKEN:
99                 name = "EOI";
100                 break;
101             case SLASH_TOKEN:
102                 name = "'/'";
103                 break;
104             case SEMICOLON_TOKEN:
105                 name = "';'";
106                 break;
107             case EQUALS_TOKEN:
108                 name = "'='";
109                 break;
110         }
111 
112         return name;
113     }
114 
115     /*
116      *  Retrieve current token value.
117      *
118      *  @returns    A String containing the current token value
119      */
getCurrentTokenValue()120     public String getCurrentTokenValue() {
121         return currentTokenValue;
122     }
123     /*
124      *  Process the next token.
125      *
126      *  @returns    the next token
127      */
nextToken()128     public int nextToken() {
129         if (dataIndex < dataLength) {
130             //  skip white space
131             while ((dataIndex < dataLength) &&
132                     (isWhiteSpaceChar(data.charAt(dataIndex)))) {
133                 ++dataIndex;
134             }
135 
136             if (dataIndex < dataLength) {
137                 //  examine the current character and see what kind of token we have
138                 char c = data.charAt(dataIndex);
139                 if (isAutoquoting) {
140                     if (c == ';' || c == '=') {
141                         currentToken = c;
142                         currentTokenValue = new Character(c).toString();
143                         ++dataIndex;
144                     } else {
145                         processAutoquoteToken();
146                     }
147                 } else {
148                     if (isStringTokenChar(c)) {
149                         processStringToken();
150                     } else if ((c == '/') || (c == ';') || (c == '=')) {
151                         currentToken = c;
152                         currentTokenValue = new Character(c).toString();
153                         ++dataIndex;
154                     } else {
155                         currentToken = UNKNOWN_TOKEN;
156                         currentTokenValue = new Character(c).toString();
157                         ++dataIndex;
158                     }
159                 }
160             } else {
161                 currentToken = EOI_TOKEN;
162                 currentTokenValue = null;
163             }
164         } else {
165             currentToken = EOI_TOKEN;
166             currentTokenValue = null;
167         }
168 
169         return currentToken;
170     }
171 
processStringToken()172     private void processStringToken() {
173         //  capture the initial index
174         int initialIndex = dataIndex;
175 
176         //  skip to 1st non string token character
177         while ((dataIndex < dataLength) &&
178                 isStringTokenChar(data.charAt(dataIndex))) {
179             ++dataIndex;
180         }
181 
182         currentToken = STRING_TOKEN;
183         currentTokenValue = data.substring(initialIndex, dataIndex);
184     }
185 
processAutoquoteToken()186     private void processAutoquoteToken() {
187         //  capture the initial index
188         int initialIndex = dataIndex;
189 
190         //  now skip to the 1st non-escaped autoquote termination character
191         //  XXX - doesn't actually consider escaping
192         boolean foundTerminator = false;
193         while ((dataIndex < dataLength) && !foundTerminator) {
194             char c = data.charAt(dataIndex);
195             if (c != autoquoteChar) {
196                 ++dataIndex;
197             } else {
198                 foundTerminator = true;
199             }
200         }
201 
202         currentToken = STRING_TOKEN;
203         currentTokenValue =
204             fixEscapeSequences(data.substring(initialIndex, dataIndex));
205     }
206 
isSpecialChar(char c)207     private static boolean isSpecialChar(char c) {
208         boolean lAnswer = false;
209 
210         switch(c) {
211             case '(':
212             case ')':
213             case '<':
214             case '>':
215             case '@':
216             case ',':
217             case ';':
218             case ':':
219             case '\\':
220             case '"':
221             case '/':
222             case '[':
223             case ']':
224             case '?':
225             case '=':
226                 lAnswer = true;
227                 break;
228         }
229 
230         return lAnswer;
231     }
232 
isControlChar(char c)233     private static boolean isControlChar(char c) {
234         return Character.isISOControl(c);
235     }
236 
isWhiteSpaceChar(char c)237     private static boolean isWhiteSpaceChar(char c) {
238         return Character.isWhitespace(c);
239     }
240 
isStringTokenChar(char c)241     private static boolean isStringTokenChar(char c) {
242         return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
243     }
244 
fixEscapeSequences(String inputString)245     private static String fixEscapeSequences(String inputString) {
246         int inputLength = inputString.length();
247         StringBuffer buffer = new StringBuffer();
248         buffer.ensureCapacity(inputLength);
249 
250         for (int i = 0; i < inputLength; ++i) {
251             char currentChar = inputString.charAt(i);
252             if (currentChar != '\\') {
253                 buffer.append(currentChar);
254             } else {
255                 if (i < inputLength - 1) {
256                     char nextChar = inputString.charAt(i + 1);
257                     buffer.append(nextChar);
258 
259                     //  force a skip over the next character too
260                     ++i;
261                 } else {
262                     buffer.append(currentChar);
263                 }
264             }
265         }
266 
267         return buffer.toString();
268     }
269 
270     private String  data;
271     private int     dataIndex;
272     private int     dataLength;
273     private int     currentToken;
274     private String  currentTokenValue;
275     private boolean isAutoquoting;
276     private char    autoquoteChar;
277 
278     /*
279     public static void main(String[] args) {
280         for (int i = 0; i < args.length; ++i) {
281             MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
282 
283             System.out.println("Original: |" + args[i] + "|");
284 
285             int currentToken = tokenizer.nextToken();
286             while (currentToken != EOI_TOKEN) {
287                 switch(currentToken) {
288                     case UNKNOWN_TOKEN:
289                         System.out.println("  Unknown Token:           |" + tokenizer.getCurrentTokenValue() + "|");
290                         break;
291                     case START_TOKEN:
292                         System.out.println("  Start Token:             |" + tokenizer.getCurrentTokenValue() + "|");
293                         break;
294                     case STRING_TOKEN:
295                         System.out.println("  String Token:            |" + tokenizer.getCurrentTokenValue() + "|");
296                         break;
297                     case EOI_TOKEN:
298                         System.out.println("  EOI Token:               |" + tokenizer.getCurrentTokenValue() + "|");
299                         break;
300                     case SLASH_TOKEN:
301                         System.out.println("  Slash Token:             |" + tokenizer.getCurrentTokenValue() + "|");
302                         break;
303                     case SEMICOLON_TOKEN:
304                         System.out.println("  Semicolon Token:         |" + tokenizer.getCurrentTokenValue() + "|");
305                         break;
306                     case EQUALS_TOKEN:
307                         System.out.println("  Equals Token:            |" + tokenizer.getCurrentTokenValue() + "|");
308                         break;
309                     default:
310                         System.out.println("  Really Unknown Token:    |" + tokenizer.getCurrentTokenValue() + "|");
311                         break;
312                 }
313 
314                 currentToken = tokenizer.nextToken();
315             }
316 
317             System.out.println("");
318         }
319     }
320     */
321 }
322