1 /*
2  * Copyright (c) 1996, 2000, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 /*
27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28  * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
29  *
30  *   The original version of this source code and documentation is copyrighted
31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32  * materials are provided under terms of a License Agreement between Taligent
33  * and Sun. This technology is protected by multiple US and International
34  * patents. This notice and attribution to Taligent may not be removed.
35  *   Taligent is a registered trademark of Taligent, Inc.
36  *
37  */
38 
39 package java.text;
40 
41 import java.lang.Character;
42 
43 /**
44  * Utility class for normalizing and merging patterns for collation.
45  * This is to be used with MergeCollation for adding patterns to an
46  * existing rule table.
47  * @see        MergeCollation
48  * @author     Mark Davis, Helena Shih
49  */
50 
51 class PatternEntry {
52     /**
53      * Gets the current extension, quoted
54      */
appendQuotedExtension(StringBuffer toAddTo)55     public void appendQuotedExtension(StringBuffer toAddTo) {
56         appendQuoted(extension,toAddTo);
57     }
58 
59     /**
60      * Gets the current chars, quoted
61      */
appendQuotedChars(StringBuffer toAddTo)62     public void appendQuotedChars(StringBuffer toAddTo) {
63         appendQuoted(chars,toAddTo);
64     }
65 
66     /**
67      * WARNING this is used for searching in a Vector.
68      * Because Vector.indexOf doesn't take a comparator,
69      * this method is ill-defined and ignores strength.
70      */
equals(Object obj)71     public boolean equals(Object obj) {
72         if (obj == null) return false;
73         PatternEntry other = (PatternEntry) obj;
74         boolean result = chars.equals(other.chars);
75         return result;
76     }
77 
hashCode()78     public int hashCode() {
79         return chars.hashCode();
80     }
81 
82     /**
83      * For debugging.
84      */
toString()85     public String toString() {
86         StringBuffer result = new StringBuffer();
87         addToBuffer(result, true, false, null);
88         return result.toString();
89     }
90 
91     /**
92      * Gets the strength of the entry.
93      */
getStrength()94     final int getStrength() {
95         return strength;
96     }
97 
98     /**
99      * Gets the expanding characters of the entry.
100      */
getExtension()101     final String getExtension() {
102         return extension;
103     }
104 
105     /**
106      * Gets the core characters of the entry.
107      */
getChars()108     final String getChars() {
109         return chars;
110     }
111 
112     // ===== privates =====
113 
addToBuffer(StringBuffer toAddTo, boolean showExtension, boolean showWhiteSpace, PatternEntry lastEntry)114     void addToBuffer(StringBuffer toAddTo,
115                      boolean showExtension,
116                      boolean showWhiteSpace,
117                      PatternEntry lastEntry)
118     {
119         if (showWhiteSpace && toAddTo.length() > 0)
120             if (strength == Collator.PRIMARY || lastEntry != null)
121                 toAddTo.append('\n');
122             else
123                 toAddTo.append(' ');
124         if (lastEntry != null) {
125             toAddTo.append('&');
126             if (showWhiteSpace)
127                 toAddTo.append(' ');
128             lastEntry.appendQuotedChars(toAddTo);
129             appendQuotedExtension(toAddTo);
130             if (showWhiteSpace)
131                 toAddTo.append(' ');
132         }
133         switch (strength) {
134         case Collator.IDENTICAL: toAddTo.append('='); break;
135         case Collator.TERTIARY:  toAddTo.append(','); break;
136         case Collator.SECONDARY: toAddTo.append(';'); break;
137         case Collator.PRIMARY:   toAddTo.append('<'); break;
138         case RESET: toAddTo.append('&'); break;
139         case UNSET: toAddTo.append('?'); break;
140         }
141         if (showWhiteSpace)
142             toAddTo.append(' ');
143         appendQuoted(chars,toAddTo);
144         if (showExtension && extension.length() != 0) {
145             toAddTo.append('/');
146             appendQuoted(extension,toAddTo);
147         }
148     }
149 
appendQuoted(String chars, StringBuffer toAddTo)150     static void appendQuoted(String chars, StringBuffer toAddTo) {
151         boolean inQuote = false;
152         char ch = chars.charAt(0);
153         if (Character.isSpaceChar(ch)) {
154             inQuote = true;
155             toAddTo.append('\'');
156         } else {
157           if (PatternEntry.isSpecialChar(ch)) {
158                 inQuote = true;
159                 toAddTo.append('\'');
160             } else {
161                 switch (ch) {
162                     case 0x0010: case '\f': case '\r':
163                     case '\t': case '\n':  case '@':
164                     inQuote = true;
165                     toAddTo.append('\'');
166                     break;
167                 case '\'':
168                     inQuote = true;
169                     toAddTo.append('\'');
170                     break;
171                 default:
172                     if (inQuote) {
173                         inQuote = false; toAddTo.append('\'');
174                     }
175                     break;
176                 }
177            }
178         }
179         toAddTo.append(chars);
180         if (inQuote)
181             toAddTo.append('\'');
182     }
183 
184     //========================================================================
185     // Parsing a pattern into a list of PatternEntries....
186     //========================================================================
187 
PatternEntry(int strength, StringBuffer chars, StringBuffer extension)188     PatternEntry(int strength,
189                  StringBuffer chars,
190                  StringBuffer extension)
191     {
192         this.strength = strength;
193         this.chars = chars.toString();
194         this.extension = (extension.length() > 0) ? extension.toString()
195                                                   : "";
196     }
197 
198     static class Parser {
199         private String pattern;
200         private int i;
201 
Parser(String pattern)202         public Parser(String pattern) {
203             this.pattern = pattern;
204             this.i = 0;
205         }
206 
next()207         public PatternEntry next() throws ParseException {
208             int newStrength = UNSET;
209 
210             newChars.setLength(0);
211             newExtension.setLength(0);
212 
213             boolean inChars = true;
214             boolean inQuote = false;
215         mainLoop:
216             while (i < pattern.length()) {
217                 char ch = pattern.charAt(i);
218                 if (inQuote) {
219                     if (ch == '\'') {
220                         inQuote = false;
221                     } else {
222                         if (newChars.length() == 0) newChars.append(ch);
223                         else if (inChars) newChars.append(ch);
224                         else newExtension.append(ch);
225                     }
226                 } else switch (ch) {
227                 case '=': if (newStrength != UNSET) break mainLoop;
228                     newStrength = Collator.IDENTICAL; break;
229                 case ',': if (newStrength != UNSET) break mainLoop;
230                     newStrength = Collator.TERTIARY; break;
231                 case ';': if (newStrength != UNSET) break mainLoop;
232                     newStrength = Collator.SECONDARY; break;
233                 case '<': if (newStrength != UNSET) break mainLoop;
234                     newStrength = Collator.PRIMARY; break;
235                 case '&': if (newStrength != UNSET) break mainLoop;
236                     newStrength = RESET; break;
237                 case '\t':
238                 case '\n':
239                 case '\f':
240                 case '\r':
241                 case ' ': break; // skip whitespace TODO use Character
242                 case '/': inChars = false; break;
243                 case '\'':
244                     inQuote = true;
245                     ch = pattern.charAt(++i);
246                     if (newChars.length() == 0) newChars.append(ch);
247                     else if (inChars) newChars.append(ch);
248                     else newExtension.append(ch);
249                     break;
250                 default:
251                     if (newStrength == UNSET) {
252                         throw new ParseException
253                             ("missing char (=,;<&) : " +
254                              pattern.substring(i,
255                                 (i+10 < pattern.length()) ?
256                                  i+10 : pattern.length()),
257                              i);
258                     }
259                     if (PatternEntry.isSpecialChar(ch) && (inQuote == false))
260                         throw new ParseException
261                             ("Unquoted punctuation character : " + Integer.toString(ch, 16), i);
262                     if (inChars) {
263                         newChars.append(ch);
264                     } else {
265                         newExtension.append(ch);
266                     }
267                     break;
268                 }
269                 i++;
270             }
271             if (newStrength == UNSET)
272                 return null;
273             if (newChars.length() == 0) {
274                 throw new ParseException
275                     ("missing chars (=,;<&): " +
276                       pattern.substring(i,
277                           (i+10 < pattern.length()) ?
278                            i+10 : pattern.length()),
279                      i);
280             }
281 
282             return new PatternEntry(newStrength, newChars, newExtension);
283         }
284 
285         // We re-use these objects in order to improve performance
286         private StringBuffer newChars = new StringBuffer();
287         private StringBuffer newExtension = new StringBuffer();
288 
289     }
290 
isSpecialChar(char ch)291     static boolean isSpecialChar(char ch) {
292         return ((ch == '\u0020') ||
293                 ((ch <= '\u002F') && (ch >= '\u0022')) ||
294                 ((ch <= '\u003F') && (ch >= '\u003A')) ||
295                 ((ch <= '\u0060') && (ch >= '\u005B')) ||
296                 ((ch <= '\u007E') && (ch >= '\u007B')));
297     }
298 
299 
300     static final int RESET = -2;
301     static final int UNSET = -1;
302 
303     int strength = UNSET;
304     String chars = "";
305     String extension = "";
306 }
307