1 /*
2  * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 /*
27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29  *
30  *   The original version of this source code and documentation is copyrighted
31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32  * materials are provided under terms of a License Agreement between Taligent
33  * and Sun. This technology is protected by multiple US and International
34  * patents. This notice and attribution to Taligent may not be removed.
35  *   Taligent is a registered trademark of Taligent, Inc.
36  *
37  */
38 
39 package java.text;
40 
41 import java.util.Vector;
42 import sun.text.UCompactIntArray;
43 import sun.text.IntHashtable;
44 
45 /**
46  * This class contains the static state of a RuleBasedCollator: The various
47  * tables that are used by the collation routines.  Several RuleBasedCollators
48  * can share a single RBCollationTables object, easing memory requirements and
49  * improving performance.
50  */
51 final class RBCollationTables {
52     //===========================================================================================
53     //  The following diagram shows the data structure of the RBCollationTables object.
54     //  Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
55     //  "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
56     //  What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
57     //  sorts 'o-umlaut' as if it's always expanded with 'e'.
58     //
59     // mapping table                     contracting list           expanding list
60     // (contains all unicode char
61     //  entries)                   ___    ____________       _________________________
62     //  ________                +>|_*_|->|'c' |v('c') |  +>|v('o')|v('umlaut')|v('e')|
63     // |_\u0001_|-> v('\u0001') | |_:_|  |------------|  | |-------------------------|
64     // |_\u0002_|-> v('\u0002') | |_:_|  |'ch'|v('ch')|  | |             :           |
65     // |____:___|               | |_:_|  |------------|  | |-------------------------|
66     // |____:___|               |        |'cH'|v('cH')|  | |             :           |
67     // |__'a'___|-> v('a')      |        |------------|  | |-------------------------|
68     // |__'b'___|-> v('b')      |        |'Ch'|v('Ch')|  | |             :           |
69     // |____:___|               |        |------------|  | |-------------------------|
70     // |____:___|               |        |'CH'|v('CH')|  | |             :           |
71     // |___'c'__|----------------         ------------   | |-------------------------|
72     // |____:___|                                        | |             :           |
73     // |o-umlaut|----------------------------------------  |_________________________|
74     // |____:___|
75     //
76     // Noted by Helena Shih on 6/23/97
77     //============================================================================================
78 
RBCollationTables(String rules, int decmp)79     public RBCollationTables(String rules, int decmp) throws ParseException {
80         this.rules = rules;
81 
82         RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
83         builder.build(rules, decmp); // this object is filled in through
84                                             // the BuildAPI object
85     }
86 
87     final class BuildAPI {
88         /**
89          * Private constructor.  Prevents anyone else besides RBTableBuilder
90          * from gaining direct access to the internals of this class.
91          */
BuildAPI()92         private BuildAPI() {
93         }
94 
95         /**
96          * This function is used by RBTableBuilder to fill in all the members of this
97          * object.  (Effectively, the builder class functions as a "friend" of this
98          * class, but to avoid changing too much of the logic, it carries around "shadow"
99          * copies of all these variables until the end of the build process and then
100          * copies them en masse into the actual tables object once all the construction
101          * logic is complete.  This function does that "copying en masse".
102          * @param f2ary The value for frenchSec (the French-secondary flag)
103          * @param swap The value for SE Asian swapping rule
104          * @param map The collator's character-mapping table (the value for mapping)
105          * @param cTbl The collator's contracting-character table (the value for contractTable)
106          * @param eTbl The collator's expanding-character table (the value for expandTable)
107          * @param cFlgs The hash table of characters that participate in contracting-
108          *              character sequences (the value for contractFlags)
109          * @param mso The value for maxSecOrder
110          * @param mto The value for maxTerOrder
111          */
fillInTables(boolean f2ary, boolean swap, UCompactIntArray map, Vector<Vector<EntryPair>> cTbl, Vector<int[]> eTbl, IntHashtable cFlgs, short mso, short mto)112         void fillInTables(boolean f2ary,
113                           boolean swap,
114                           UCompactIntArray map,
115                           Vector<Vector<EntryPair>> cTbl,
116                           Vector<int[]> eTbl,
117                           IntHashtable cFlgs,
118                           short mso,
119                           short mto) {
120             frenchSec = f2ary;
121             seAsianSwapping = swap;
122             mapping = map;
123             contractTable = cTbl;
124             expandTable = eTbl;
125             contractFlags = cFlgs;
126             maxSecOrder = mso;
127             maxTerOrder = mto;
128         }
129     }
130 
131     /**
132      * Gets the table-based rules for the collation object.
133      * @return returns the collation rules that the table collation object
134      * was created from.
135      */
getRules()136     public String getRules()
137     {
138         return rules;
139     }
140 
isFrenchSec()141     public boolean isFrenchSec() {
142         return frenchSec;
143     }
144 
isSEAsianSwapping()145     public boolean isSEAsianSwapping() {
146         return seAsianSwapping;
147     }
148 
149     // ==============================================================
150     // internal (for use by CollationElementIterator)
151     // ==============================================================
152 
153     /**
154      *  Get the entry of hash table of the contracting string in the collation
155      *  table.
156      *  @param ch the starting character of the contracting string
157      */
getContractValues(int ch)158     Vector<EntryPair> getContractValues(int ch)
159     {
160         int index = mapping.elementAt(ch);
161         return getContractValuesImpl(index - CONTRACTCHARINDEX);
162     }
163 
164     //get contract values from contractTable by index
getContractValuesImpl(int index)165     private Vector<EntryPair> getContractValuesImpl(int index)
166     {
167         if (index >= 0)
168         {
169             return contractTable.elementAt(index);
170         }
171         else // not found
172         {
173             return null;
174         }
175     }
176 
177     /**
178      * Returns true if this character appears anywhere in a contracting
179      * character sequence.  (Used by CollationElementIterator.setOffset().)
180      */
usedInContractSeq(int c)181     boolean usedInContractSeq(int c) {
182         return contractFlags.get(c) == 1;
183     }
184 
185     /**
186       * Return the maximum length of any expansion sequences that end
187       * with the specified comparison order.
188       *
189       * @param order a collation order returned by previous or next.
190       * @return the maximum length of any expansion seuences ending
191       *         with the specified order.
192       *
193       * @see CollationElementIterator#getMaxExpansion
194       */
getMaxExpansion(int order)195     int getMaxExpansion(int order) {
196         int result = 1;
197 
198         if (expandTable != null) {
199             // Right now this does a linear search through the entire
200             // expansion table.  If a collator had a large number of expansions,
201             // this could cause a performance problem, but in practise that
202             // rarely happens
203             for (int i = 0; i < expandTable.size(); i++) {
204                 int[] valueList = expandTable.elementAt(i);
205                 int length = valueList.length;
206 
207                 if (length > result && valueList[length-1] == order) {
208                     result = length;
209                 }
210             }
211         }
212 
213         return result;
214     }
215 
216     /**
217      * Get the entry of hash table of the expanding string in the collation
218      * table.
219      * @param idx the index of the expanding string value list
220      */
getExpandValueList(int idx)221     final int[] getExpandValueList(int idx) {
222         return expandTable.elementAt(idx - EXPANDCHARINDEX);
223     }
224 
225     /**
226      * Get the comarison order of a character from the collation table.
227      * @return the comparison order of a character.
228      */
getUnicodeOrder(int ch)229     int getUnicodeOrder(int ch) {
230         return mapping.elementAt(ch);
231     }
232 
getMaxSecOrder()233     short getMaxSecOrder() {
234         return maxSecOrder;
235     }
236 
getMaxTerOrder()237     short getMaxTerOrder() {
238         return maxTerOrder;
239     }
240 
241     /**
242      * Reverse a string.
243      */
244     //shemran/Note: this is used for secondary order value reverse, no
245     //              need to consider supplementary pair.
reverse(StringBuffer result, int from, int to)246     static void reverse (StringBuffer result, int from, int to)
247     {
248         int i = from;
249         char swap;
250 
251         int j = to - 1;
252         while (i < j) {
253             swap =  result.charAt(i);
254             result.setCharAt(i, result.charAt(j));
255             result.setCharAt(j, swap);
256             i++;
257             j--;
258         }
259     }
260 
getEntry(Vector<EntryPair> list, String name, boolean fwd)261     static final int getEntry(Vector<EntryPair> list, String name, boolean fwd) {
262         for (int i = 0; i < list.size(); i++) {
263             EntryPair pair = list.elementAt(i);
264             if (pair.fwd == fwd && pair.entryName.equals(name)) {
265                 return i;
266             }
267         }
268         return UNMAPPED;
269     }
270 
271     // ==============================================================
272     // constants
273     // ==============================================================
274     //sherman/Todo: is the value big enough?????
275     static final int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
276     static final int CONTRACTCHARINDEX = 0x7F000000;  // contract indexes follow
277     static final int UNMAPPED = 0xFFFFFFFF;
278 
279     static final int PRIMARYORDERMASK = 0xffff0000;
280     static final int SECONDARYORDERMASK = 0x0000ff00;
281     static final int TERTIARYORDERMASK = 0x000000ff;
282     static final int PRIMARYDIFFERENCEONLY = 0xffff0000;
283     static final int SECONDARYDIFFERENCEONLY = 0xffffff00;
284     static final int PRIMARYORDERSHIFT = 16;
285     static final int SECONDARYORDERSHIFT = 8;
286 
287     // ==============================================================
288     // instance variables
289     // ==============================================================
290     private String rules = null;
291     private boolean frenchSec = false;
292     private boolean seAsianSwapping = false;
293 
294     private UCompactIntArray mapping = null;
295     private Vector<Vector<EntryPair>> contractTable = null;
296     private Vector<int[]> expandTable = null;
297     private IntHashtable contractFlags = null;
298 
299     private short maxSecOrder = 0;
300     private short maxTerOrder = 0;
301 }
302