1 /*
2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /*
25  * @test 1.1 02/09/11
26  * @bug 4176141 4655819
27  * @summary Regression tests for Japanese Collation
28  * @modules jdk.localedata
29  */
30 
31 import java.text.*;
32 import java.util.*;
33 
34 public class JapaneseTest {
35 
36     // NOTE:
37     //   Golden data in this test case is locale data dependent and
38     //   may need to be changed if the Japanese locale collation rules
39     //   are changed.
40 
41     /*
42      *                    | NO_DECOMP(default) | CANONICAL_DECOMP | FULL_DECOMP
43      * -------------------+--------------------+------------------+-------------
44      *  PRIMARY           | s1 < s2 (-1)       | s1 < s2 (-1)     | s1 < s2 (-1)
45      *  SECONDARY         | s1 < s2 (-1)       | s1 < s2 (-1)     | s1 < s2 (-1)
46      *  TERTIARY(default) | S1 < s2 (-1)       | s1 < s2 (-1)     | s1 < s2 (-1)
47      */
48     static final int[][] results1 = {
49         { -1, -1, -1},
50         { -1, -1, -1},
51         { -1, -1, -1},
52     };
53     static final String[][] compData1 = {
54         /*
55          * Data to verify '<' relationship in LocaleElements_ja.java
56          */
57         {"\u3084", "\u30E6",
58          "Hiragana \"YA\"(0x3084) <---> Katakana \"YU\"(0x30E6)"},
59         {"\u30E6", "\u3088",
60          "Katakana \"YU\"(0x30E6) <---> Hiragana \"YO\"(0x3088)"},
61         {"\u00B1", "\u2260",
62          "Plus-Minus Sign(0x00B1) <---> Not Equal To(0x2260)"},
63         {"\u3011", "\u2260",
64          "Right Black Lenticular Bracket(0x3011) <---> Not Equal To(0x2260)"},
65         {"\u2260", "\u2103",
66          "Not Equal To(0x2260) <---> Degree Celsius(0x2103)"},
67         {"\u2260", "\u2606",
68          "Not Equal To(0x2260) <---> White Star(0x2606)"},
69         {"\u30FD", "\u309E",
70          "Katakana Iteration Mark(0x30FD) <---> Hiragana Voiced Iteration Mark(0x309E)"},
71         {"\u3059\u309D", "\u3059\u309E",
72          "Hiragana \"SU\"(0x3059)Hiragana Iteration Mark(0x309D) <---> Hiragana \"SU\"(0x3059)Hiragana Voiced Iteration Mark(0x309E)"},
73         {"\u821E", "\u798F",
74          "CJK Unified Ideograph(0x821E) <---> CJK Unified Ideograph(0x798F)"},
75 
76         /*
77          * Data to verify normalization
78          */
79         {"\u2260", "\u225F",
80          "Not Equal To(0x2260) <---> Questioned Equal To(0x225F)"},
81         {"\u226E", "\u2260",
82          "Not Less-than(0x226E) <---> Not Equal To(0x2260)"},
83         {"\u226E", "\u226D",
84          "Not Less-than(0x226E) <---> Not Equivalent To(0x226D)"},
85     };
86 
87     /*
88      *                    | NO_DECOMP(default) | CANONICAL_DECOMP | FULL_DECOMP
89      * -------------------+--------------------+------------------+-------------
90      *  PRIMARY           | s1 = s2 (0)        | s1 = s2 (0)      | s1 = s2 (0)
91      *  SECONDARY         | s1 < s2 (-1)       | s1 < s2 (-1)     | s1 < s2 (-1)
92      *  TERTIARY(default) | S1 < s2 (-1)       | s1 < s2 (-1)     | s1 < s2 (-1)
93      */
94     static final int[][] results2 = {
95         {  0,  0,  0},
96         { -1, -1, -1},
97         { -1, -1, -1},
98     };
99     static final String[][] compData2 = {
100         /*
101          * Data to verify ';' relationship in LocaleElements_ja.java
102          */
103         {"\u3099", "\u309A",
104          "Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Combining Katakana-Hiragana Semi-voiced Sound Mark(0x309A)"},
105         {"\u3053\u3046\u3068\u3046", "\u3053\u3046\u3068\u3099\u3046",
106          "Hiragana \"KOUTOU\"(0x3053 0x3046 0x3068 0x3046) <---> Hiragana \"KOUTO\"(0x3053 0x3046 0x3068)Combining Katakana-Hiragana Voiced Sound Mark(0X3099)\"U\"(0x3046)"},
107         {"\u3053\u3046\u3068\u3046", "\u3053\u3046\u3069\u3046",
108          "Hiragana \"KOUTOU\"(0x3053 0x3046 0x3068 0x3046) <---> Hiragana \"KOUDOU\"(0x3053 0x3046 0x3069 0x3046)"},
109         {"\u3053\u3046\u3069\u3046", "\u3054\u3046\u3068\u3046",
110          "Hiragana \"KOUTOU\"(0x3053 0x3046 0x3069 0x3046) <---> Hiragana \"GOUTOU\"(0x3054 0x3046 0x3068 0x3046)"},
111         {"\u3054\u3046\u3068\u3046", "\u3054\u3046\u3069\u3046",
112          "Hiragana \"GOUTOU\"(0x3054 0x3046 0x3068 0x3046) <---> Hiragana \"GOUDOU\"(0x3054 0x3046 0x3069 0x3046)"},
113     };
114 
115     /*
116      *                    | NO_DECOMP(default) | CANONICAL_DECOMP | FULL_DECOMP
117      * -------------------+--------------------+------------------+-------------
118      *  PRIMARY           | s1 = s2 (0)        | s1 = s2 (0)      | s1 = s2 (0)
119      *  SECONDARY         | s1 = s2 (0)        | s1 = s2 (0)      | s1 = s2 (0)
120      *  TERTIARY(default) | S1 < s2 (-1)       | s1 < s2 (-1)     | s1 < s2 (-1)
121      */
122     static final int[][] results3 = {
123         {  0,  0,  0},
124         {  0,  0,  0},
125         { -1, -1, -1},
126     };
127     static final String[][] compData3 = {
128         /*
129          * Data to verify ',' relationship in LocaleElements_ja.java
130          */
131         {"\u3042", "\u3041",
132          "Hiragana \"A\"(0x3042) <---> Hiragana \"a\"(0x3041)"},
133         {"\u3041", "\u30A2",
134          "Hiragana \"a\"(0x3041) <---> Katakana \"A\"(0x30A2)"},
135         {"\u30A2", "\u30A1",
136          "Katakana \"A\"(0x30A2) <---> Katakana \"a\"(0x30A1)"},
137         {"\u3094", "\u30F4",
138          "Hiragana \"VU\"(0x3094) <---> Katakana \"VU\"(0x30F4)"},
139         {"\u3094", "\u30A6\u3099",
140          "Hiragana \"VU\"(0x3094) <---> Katakana \"U\"(0x30A6)Combining Katakana-Hiragana Voiced Sound Mark(0x3099)"},
141         {"\u3046\u3099", "\u30F4",
142          "Hiragana \"U\"(0x3046)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Katakana \"VU\"(0x30F4)"},
143         {"\u3046\u3099", "\u30A6\u3099",
144          "Hiragana \"U\"(0x3046)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Katakana \"U\"(0x30A6)Combining Katakana-Hiragana Voiced Sound Mark(0x3099)"},
145         {"\u30AB\u30A2", "\u30AB\u30FC",
146          "Katakana \"KAA\"(0x30AB 0x30A2) <---> Katakana \"KA-\"(0x30AB 0x30FC)"},
147         {"\u30CB\u30A1\u30A2", "\u30CB\u30A1\u30FC",
148          "Katakana \"NyaA\"(0x30CB 0x30A1 0x30A2) <---> Katakana \"Nya-\"(0x30CB 0x30A1 0x30FC)"},
149         {"\u30B3\u30AA\u30D2\u30A4", "\u30B3\u30FC\u30D2\u30FC",
150          "Katakana \"KOOHII\"(0x30B3 0x30AA 0x30D2 0x30A4) <---> Katakana \"KO-HI-\"(0x30B3 0x30FC 0x30D2 0x30FC)"},
151         {"\u308A\u3088\u3046", "\u308A\u3087\u3046",
152          "Hiragana \"RIYOU\"(0x308A 0x3088 0x3046) <---> Hiragana \"Ryou\"(0x308A 0x3087 0x3046)"},
153         {"\u3081\u3064\u304D", "\u3081\u3063\u304D",
154          "Hiragana \"METSUKI\"(0x3081 0x3064 0x304D) <---> Hiragana \"MEKKI\"(0x3081 0x3063 0x304D)"},
155         {"\u3075\u3042\u3093", "\u30D5\u30A1\u30F3",
156          "Hiragana \"FUAN\"(0x3075 0x3042 0x3093) <---> Katakana \"FUaN\"(0x30D5 0x30A1 0x30F3)"},
157         {"\u3075\u3041\u3093", "\u30D5\u30A2\u30F3",
158          "Hiragana \"FUaN\"(0x3075 0x3041 0x3093) <---> Katakana \"FUAN\"(0x30D5 0x30A2 0x30F3)"},
159         {"\u30D5\u30A2\u30F3", "\u30D5\u30A1\u30F3",
160          "Katakana \"FUAN\"(0x30D5 0x30A2 0x30F3) <---> Katakana \"FUaN\"(0x30D5 0x30A1 0x30F3)"},
161     };
162 
163     /*
164      *                    | NO_DECOMP(default) | CANONICAL_DECOMP | FULL_DECOMP
165      * -------------------+--------------------+------------------+-------------
166      *  PRIMARY           | s1 = s2 (0)        | s1 = s2 (0)      | s1 = s2 (0)
167      *  SECONDARY         | s1 = s2 (0)        | s1 = s2 (0)      | s1 = s2 (0)
168      *  TERTIARY(default) | S1 = s2 (0)        | s1 = s2 (0)      | s1 = s2 (0)
169      */
170     static final int[][] results4 = {
171         {  0,  0,  0},
172         {  0,  0,  0},
173         {  0,  0,  0},
174     };
175     static final String[][] compData4 = {
176         /*
177          * Data to verify Japanese normalization
178          */
179         {"\u309E", "\u309D\u3099",
180          "Hiragana Voiced Iteration Mark(0x309E) <---> Hiragana Iteration Mark(0x309D)Combining Katakana-Hiragana Voiced Sound Mark(0x3099)"},
181         {"\u30FE", "\u30FD\u3099",
182          "Katakana Voiced Iteration Mark(0x30FE) <---> Katakana iteration mark(0x30FD)Combining Katakana-Hiragana Voiced Sound Mark(0x3099)"},
183         {"\u306F\u3099", "\u3070",
184          "Hiragana \"HA\"(0x306F)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Hiragana \"BA\"(0x3070)"},
185         {"\u306F\u309A", "\u3071",
186          "Hiragana \"HA\"(0x306F)Combining Katakana-Hiragana Semi-voiced Sound Mark(0x309A) <---> Hiragana \"PA\"(0x3071)"},
187         {"\u30EF\u3099", "\u30F7",
188          "Katakana \"WA\"(0x306F)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Katakana \"VA\"(0x30F7)"},
189         {"\u30F0\u3099", "\u30F8",
190          "Katakana \"WI\"(0x30F0)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Katakana \"VI\"(0x30F8)"},
191         {"\u30F1\u3099", "\u30F9",
192          "Katakana \"WE\"(0x30F1)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Katakana \"VE\"(0x30F9)"},
193         {"\u30F2\u3099", "\u30FA",
194          "Katakana \"WO\"(0x30F2)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Katakana \"VO\"(0x30FA)"},
195         {"\u3046\u3099", "\u3094",
196          "Hiragana \"U\"(0x3046)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Hiragana \"VU\"(0x3094)"},
197         {"\u30A6\u3099", "\u30F4",
198          "Katakana \"U\"(0x30A6)Combining Katakana-Hiragana Voiced Sound Mark(0x3099) <---> Katakana \"VU\"(0x30F4)"},
199 
200         // verify normalization
201         {"\u2260", "\u003D\u0338",
202          "Not Equal To(0x2260) <---> Equal(0x003D)Combining Long Solidus Overlay(0x0338)"},
203         {"\u2262", "\u2261\u0338",
204          "Not Identical To(0x2262) <---> Identical To(0x2261)Combining Long Solidus Overlay(0x0338)"},
205         {"\u226E", "\u003C\u0338",
206          "Not Less-than(0x226E) <---> Less-than Sign(0x003C)Combining Long Solidus Overlay(0x0338)"},
207 
208         // Verify a character which has been added since Unicode 2.1.X.
209         {"\u798F", "\uFA1B",
210          "CJK Unified Ideograph \"FUKU\"(0x798F) <---> CJK Compatibility Ideograph \"FUKU\"(0xFA1B)"},
211     };
212 
213     /*
214      *                    | NO_DECOMP(default) | CANONICAL_DECOMP | FULL_DECOMP
215      * -------------------+--------------------+------------------+-------------
216      *  PRIMARY           | s1 > s2 (1)        | s1 = s2 (0)      | s1 = s2 (0)
217      *  SECONDARY         | s1 > s2 (1)        | s1 = s2 (0)      | s1 = s2 (0)
218      *  TERTIARY(default) | S1 > s2 (1)        | s1 = s2 (0)      | s1 = s2 (0)
219      */
220     static final int[][] results5 = {
221         {  1,  0,  0},
222         {  1,  0,  0},
223         {  1,  0,  0},
224     };
225     static final String[][] compData5 = {
226         /*
227          * Data to verify normalization
228          */
229         {"\u226D", "\u224D\u0338",
230          "Not Equivalent To(0x226D) <---> Equivalent To(0x224D)Combining Long Solidus Overlay(0x0338)"},
231     };
232 
233     static final int[][] results6 = {
234         {  1, -1, -1},
235         {  1, -1, -1},
236         {  1, -1, -1},
237     };
238     static final String[][] compData6 = {
239         /*
240          * Data to verify normalization
241          */
242         {"\u226D", "\u226C",
243          "Not Equivalent To(0x226D) <---> Between(0x226C)"},
244         {"\u226D", "\u225F",
245          "Not Equivalent To(0x226D) <---> Questioned Equal To(0x225F)"},
246     };
247 
248 
249     /*
250      * The following data isn't used at the moment because iteration marks
251      * aren't supported now.
252      */
253     static final String[][] compData0 = {
254         {"\u307F\u307F", "\u307F\u309D",
255          "Hiragana \"MIMI\"(0x307F 0x307F) <---> Hiragana \"MI\"(0x307F)Hiragana Iteration Mark(0x309D)"},
256         {"\u3044\u3059\u305A", "\u3044\u3059\u309E",
257          "Hiragana \"ISUZU\"(0x3044 0x3059 0x305A) <---> Hiragana \"ISU\"(0x3044 0x3059)Hiragana Voiced Iteration Mark(0x309E)"},
258         {"\u30DF\u30DF", "\u30DF\u30FD",
259          "Katakana \"MIMI\"(0x30DF 0x30DF) <---> Katakana \"MI\"(0x30DF)Katakana Iteration Mark(0x30FD)"},
260         {"\u30A4\u30B9\u30BA", "\u30A4\u30B9\u30FE",
261          "Katakana \"ISUZU\"(0x30A4 0x30B9 0x30BA) <---> Katakana \"ISU\"(0x30A4 0x30B9)Katakana Voiced Iteration Mark(0x30FE)"},
262     };
263 
264 
265     static final String[] decomp_name = {
266         "NO_DECOMP", "CANONICAL_DECOMP", "FULL_DECOMP"
267     };
268 
269     static final String[] strength_name = {
270         "PRIMARY", "SECONDARY", "TERTIARY"
271     };
272 
273 
274     Collator col = Collator.getInstance(Locale.JAPAN);
275     int result = 0;
276 
main(String[] args)277     public static void main(String[] args) throws Exception {
278         new JapaneseTest().run();
279     }
280 
run()281     public void run() {
282         // Use all available localse on the initial testing....
283         // Locale[] locales = Locale.getAvailableLocales();
284         Locale[] locales = { Locale.getDefault() };
285 
286         for (int l = 0; l < locales.length; l++) {
287             Locale.setDefault(locales[l]);
288 
289             for (int decomp = 0; decomp < 3; decomp++) {// See decomp_name.
290                 col.setDecomposition(decomp);
291 
292                 for (int strength = 0; strength < 3; strength++) {// See strength_name.
293 //                  System.err.println("\n" + locales[l] + ": " + strength_name[strength] + " --- " + decomp_name[decomp]);
294 
295                     col.setStrength(strength);
296                     doCompare(compData1, results1[strength][decomp], strength, decomp);
297                     doCompare(compData2, results2[strength][decomp], strength, decomp);
298                     doCompare(compData3, results3[strength][decomp], strength, decomp);
299                     doCompare(compData4, results4[strength][decomp], strength, decomp);
300                     doCompare(compData5, results5[strength][decomp], strength, decomp);
301                     doCompare(compData6, results6[strength][decomp], strength, decomp);
302                 }
303             }
304         }
305 
306         /* Check result */
307         if (result !=0) {
308             throw new RuntimeException("Unexpected results on Japanese collation.");
309         }
310     }
311 
doCompare(String[][] s, int expectedValue, int strength, int decomp)312     void doCompare(String[][] s, int expectedValue, int strength, int decomp) {
313         int value;
314         for (int i=0; i < s.length; i++) {
315             if ((value = col.compare(s[i][0], s[i][1])) != expectedValue) {
316                 result++;
317                 System.err.println(strength_name[strength] +
318                                    ": compare() returned unexpected value.(" +
319                                    value + ") on " + decomp_name[decomp] +
320                                    "     Expected(" + expectedValue +
321                                    ") for " + s[i][2]);
322             }
323         }
324     }
325 }
326