1 /*
2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package jdk.nashorn.internal.runtime;
27 
28 import java.util.LinkedList;
29 import java.util.Stack;
30 import java.util.StringTokenizer;
31 
32 /**
33  * A string tokenizer that supports entries with quotes and nested quotes. If
34  * the separators are quoted either by ' and ", or whatever quotes the user
35  * supplies they will be ignored and considered part of another token
36  */
37 public final class QuotedStringTokenizer {
38     private final LinkedList<String> tokens;
39 
40     private final char quotes[];
41 
42     /**
43      * Constructor
44      *
45      * @param str string to tokenize
46      */
QuotedStringTokenizer(final String str)47     public QuotedStringTokenizer(final String str) {
48         this(str, " ");
49     }
50 
51     /**
52      * Create a quoted string tokenizer
53      *
54      * @param str
55      *            a string to tokenize
56      * @param delim
57      *            delimiters between tokens
58      *
59      */
QuotedStringTokenizer(final String str, final String delim)60     public QuotedStringTokenizer(final String str, final String delim) {
61         this(str, delim, new char[] { '"', '\'' });
62     }
63 
64     /**
65      * Create a quoted string tokenizer
66      *
67      * @param str
68      *            a string to tokenize
69      * @param delim
70      *            delimiters between tokens
71      * @param quotes
72      *            all the characters that should be accepted as quotes, default
73      *            is ' or "
74      */
QuotedStringTokenizer(final String str, final String delim, final char[] quotes)75     private QuotedStringTokenizer(final String str, final String delim, final char[] quotes) {
76         this.quotes = quotes;
77 
78         boolean delimIsWhitespace = true;
79         for (int i = 0; i < delim.length(); i++) {
80             if (!Character.isWhitespace(delim.charAt(i))) {
81                 delimIsWhitespace = false;
82                 break;
83             }
84         }
85 
86         final StringTokenizer st = new StringTokenizer(str, delim);
87         tokens = new LinkedList<>();
88         while (st.hasMoreTokens()) {
89             String token = st.nextToken();
90 
91             while (unmatchedQuotesIn(token)) {
92                 if (!st.hasMoreTokens()) {
93                     throw new IndexOutOfBoundsException(token);
94                 }
95                 token += (delimIsWhitespace ? " " : delim) + st.nextToken();
96             }
97             tokens.add(stripQuotes(token));
98         }
99     }
100 
101     /**
102      * @return the number of tokens in the tokenizer
103      */
countTokens()104     public int countTokens() {
105         return tokens.size();
106     }
107 
108     /**
109      * @return true if there are tokens left
110      */
hasMoreTokens()111     public boolean hasMoreTokens() {
112         return countTokens() > 0;
113     }
114 
115     /**
116      * @return the next token in the tokenizer
117      */
nextToken()118     public String nextToken() {
119         return tokens.removeFirst();
120     }
121 
stripQuotes(final String value0)122     private String stripQuotes(final String value0) {
123         String value = value0.trim();
124         for (final char q : quotes) {
125             if (value.length() >= 2 && value.startsWith("" + q) && value.endsWith("" + q)) {
126                 // also go over the value and remove \q sequences. they are just
127                 // plain q now
128                 value = value.substring(1, value.length() - 1);
129                 value = value.replace("\\" + q, "" + q);
130             }
131         }
132         return value;
133     }
134 
unmatchedQuotesIn(final String str)135     private boolean unmatchedQuotesIn(final String str) {
136         final Stack<Character> quoteStack = new Stack<>();
137         for (int i = 0; i < str.length(); i++) {
138             final char c = str.charAt(i);
139             for (final char q : this.quotes) {
140                 if (c == q) {
141                     if (quoteStack.isEmpty()) {
142                         quoteStack.push(c);
143                     } else {
144                         final char top = quoteStack.pop();
145                         if (top != c) {
146                             quoteStack.push(top);
147                             quoteStack.push(c);
148                         }
149                     }
150                 }
151             }
152         }
153 
154         return !quoteStack.isEmpty();
155     }
156 }
157