1 /*
2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /*
25  * @test
26  * @library /java/text/testlib
27  * @summary test Dummy Collation
28  */
29 
30 import java.text.Collator;
31 import java.text.RuleBasedCollator;
32 
33 /*
34 (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
35 (C) Copyright IBM Corp. 1996 - All Rights Reserved
36 
37   The original version of this source code and documentation is copyrighted and
38 owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
39 provided under terms of a License Agreement between Taligent and Sun. This
40 technology is protected by multiple US and International patents. This notice and
41 attribution to Taligent may not be removed.
42   Taligent is a registered trademark of Taligent, Inc.
43 */
44 
45 public class DummyTest extends CollatorTest {
46 
main(String[] args)47     public static void main(String[] args) throws Exception {
48         new DummyTest().run(args);
49     }
50 
51     private static final String DEFAULTRULES =
52         "='\u200B'=\u200C=\u200D=\u200E=\u200F"
53         // Control Characters
54         + "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot
55         + "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ...
56         + "='\u000b' =\u000e" //vt,, so
57         + "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3
58         + "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can
59         + "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs
60         + "=\u001e =\u001f =\u007f"                   //rs, us, del
61         //....then the C1 Latin 1 reserved control codes
62         + "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085"
63         + "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b"
64         + "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091"
65         + "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097"
66         + "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d"
67         + "=\u009e =\u009f"
68         // IGNORE except for secondary, tertiary difference
69         // Spaces
70         + ";'\u0020';'\u00A0'"                  // spaces
71         + ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'"  // spaces
72         + ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'"  // spaces
73         + ";'\u200A';'\u3000';'\uFEFF'"                // spaces
74         + ";'\r' ;'\t' ;'\n';'\f';'\u000b'"  // whitespace
75 
76         // Non-spacing accents
77 
78         + ";\u0301"          // non-spacing acute accent
79         + ";\u0300"          // non-spacing grave accent
80         + ";\u0306"          // non-spacing breve accent
81         + ";\u0302"          // non-spacing circumflex accent
82         + ";\u030c"          // non-spacing caron/hacek accent
83         + ";\u030a"          // non-spacing ring above accent
84         + ";\u030d"          // non-spacing vertical line above
85         + ";\u0308"          // non-spacing diaeresis accent
86         + ";\u030b"          // non-spacing double acute accent
87         + ";\u0303"          // non-spacing tilde accent
88         + ";\u0307"          // non-spacing dot above/overdot accent
89         + ";\u0304"          // non-spacing macron accent
90         + ";\u0337"          // non-spacing short slash overlay (overstruck diacritic)
91         + ";\u0327"          // non-spacing cedilla accent
92         + ";\u0328"          // non-spacing ogonek accent
93         + ";\u0323"          // non-spacing dot-below/underdot accent
94         + ";\u0332"          // non-spacing underscore/underline accent
95         // with the rest of the general diacritical marks in binary order
96         + ";\u0305"          // non-spacing overscore/overline
97         + ";\u0309"          // non-spacing hook above
98         + ";\u030e"          // non-spacing double vertical line above
99         + ";\u030f"          // non-spacing double grave
100         + ";\u0310"          // non-spacing chandrabindu
101         + ";\u0311"          // non-spacing inverted breve
102         + ";\u0312"          // non-spacing turned comma above/cedilla above
103         + ";\u0313"          // non-spacing comma above
104         + ";\u0314"          // non-spacing reversed comma above
105         + ";\u0315"          // non-spacing comma above right
106         + ";\u0316"          // non-spacing grave below
107         + ";\u0317"          // non-spacing acute below
108         + ";\u0318"          // non-spacing left tack below
109         + ";\u0319"          // non-spacing tack below
110         + ";\u031a"          // non-spacing left angle above
111         + ";\u031b"          // non-spacing horn
112         + ";\u031c"          // non-spacing left half ring below
113         + ";\u031d"          // non-spacing up tack below
114         + ";\u031e"          // non-spacing down tack below
115         + ";\u031f"          // non-spacing plus sign below
116         + ";\u0320"          // non-spacing minus sign below
117         + ";\u0321"          // non-spacing palatalized hook below
118         + ";\u0322"          // non-spacing retroflex hook below
119         + ";\u0324"          // non-spacing double dot below
120         + ";\u0325"          // non-spacing ring below
121         + ";\u0326"          // non-spacing comma below
122         + ";\u0329"          // non-spacing vertical line below
123         + ";\u032a"          // non-spacing bridge below
124         + ";\u032b"          // non-spacing inverted double arch below
125         + ";\u032c"          // non-spacing hacek below
126         + ";\u032d"          // non-spacing circumflex below
127         + ";\u032e"          // non-spacing breve below
128         + ";\u032f"          // non-spacing inverted breve below
129         + ";\u0330"          // non-spacing tilde below
130         + ";\u0331"          // non-spacing macron below
131         + ";\u0333"          // non-spacing double underscore
132         + ";\u0334"          // non-spacing tilde overlay
133         + ";\u0335"          // non-spacing short bar overlay
134         + ";\u0336"          // non-spacing long bar overlay
135         + ";\u0338"          // non-spacing long slash overlay
136         + ";\u0339"          // non-spacing right half ring below
137         + ";\u033a"          // non-spacing inverted bridge below
138         + ";\u033b"          // non-spacing square below
139         + ";\u033c"          // non-spacing seagull below
140         + ";\u033d"          // non-spacing x above
141         + ";\u033e"          // non-spacing vertical tilde
142         + ";\u033f"          // non-spacing double overscore
143         + ";\u0340"          // non-spacing grave tone mark
144         + ";\u0341"          // non-spacing acute tone mark
145         + ";\u0342;\u0343;\u0344;\u0345;\u0360;\u0361"    // newer
146         + ";\u0483;\u0484;\u0485;\u0486"    // Cyrillic accents
147 
148         + ";\u20D0;\u20D1;\u20D2"           // symbol accents
149         + ";\u20D3;\u20D4;\u20D5"           // symbol accents
150         + ";\u20D6;\u20D7;\u20D8"           // symbol accents
151         + ";\u20D9;\u20DA;\u20DB"           // symbol accents
152         + ";\u20DC;\u20DD;\u20DE"           // symbol accents
153         + ";\u20DF;\u20E0;\u20E1"           // symbol accents
154 
155         + ",'\u002D';\u00AD"                // dashes
156         + ";\u2010;\u2011;\u2012"           // dashes
157         + ";\u2013;\u2014;\u2015"           // dashes
158         + ";\u2212"                         // dashes
159 
160         // other punctuation
161 
162         + "<'\u005f'"        // underline/underscore (spacing)
163         + "<\u00af"          // overline or macron (spacing)
164 //        + "<\u00ad"        // syllable hyphen (SHY) or soft hyphen
165         + "<'\u002c'"        // comma (spacing)
166         + "<'\u003b'"        // semicolon
167         + "<'\u003a'"        // colon
168         + "<'\u0021'"        // exclamation point
169         + "<\u00a1"          // inverted exclamation point
170         + "<'\u003f'"        // question mark
171         + "<\u00bf"          // inverted question mark
172         + "<'\u002f'"        // slash
173         + "<'\u002e'"        // period/full stop
174         + "<\u00b4"          // acute accent (spacing)
175         + "<'\u0060'"        // grave accent (spacing)
176         + "<'\u005e'"        // circumflex accent (spacing)
177         + "<\u00a8"          // diaresis/umlaut accent (spacing)
178         + "<'\u007e'"        // tilde accent (spacing)
179         + "<\u00b7"          // middle dot (spacing)
180         + "<\u00b8"          // cedilla accent (spacing)
181         + "<'\u0027'"        // apostrophe
182         + "<'\"'"            // quotation marks
183         + "<\u00ab"          // left angle quotes
184         + "<\u00bb"          // right angle quotes
185         + "<'\u0028'"        // left parenthesis
186         + "<'\u0029'"        // right parenthesis
187         + "<'\u005b'"        // left bracket
188         + "<'\u005d'"        // right bracket
189         + "<'\u007b'"        // left brace
190         + "<'\u007d'"        // right brace
191         + "<\u00a7"          // section symbol
192         + "<\u00b6"          // paragraph symbol
193         + "<\u00a9"          // copyright symbol
194         + "<\u00ae"          // registered trademark symbol
195         + "<'\u0040'"          // at sign
196         + "<\u00a4"          // international currency symbol
197         + "<\u00a2"          // cent sign
198         + "<'\u0024'"        // dollar sign
199         + "<\u00a3"          // pound-sterling sign
200         + "<\u00a5"          // yen sign
201         + "<'\u002a'"        // asterisk
202         + "<'\\u005c'"       // backslash
203         + "<'\u0026'"        // ampersand
204         + "<'\u0023'"        // number sign
205         + "<'\u0025'"        // percent sign
206         + "<'\u002b'"        // plus sign
207 //        + "<\u002d"        // hyphen or minus sign
208         + "<\u00b1"          // plus-or-minus sign
209         + "<\u00f7"          // divide sign
210         + "<\u00d7"          // multiply sign
211         + "<'\u003c'"        // less-than sign
212         + "<'\u003d'"        // equal sign
213         + "<'\u003e'"        // greater-than sign
214         + "<\u00ac"          // end of line symbol/logical NOT symbol
215         + "<'\u007c'"          // vertical line/logical OR symbol
216         + "<\u00a6"          // broken vertical line
217         + "<\u00b0"          // degree symbol
218         + "<\u00b5"          // micro symbol
219 
220         // NUMERICS
221 
222         + "<0<1<2<3<4<5<6<7<8<9"
223         + "<\u00bc<\u00bd<\u00be"   // 1/4,1/2,3/4 fractions
224 
225         // NON-IGNORABLES
226         + "<a,A"
227         + "<b,B"
228         + "<c,C"
229         + "<d,D"
230         + "<\u00F0,\u00D0"                  // eth
231         + "<e,E"
232         + "<f,F"
233         + "<g,G"
234         + "<h,H"
235         + "<i,I"
236         + "<j,J"
237         + "<k,K"
238         + "<l,L"
239         + "<m,M"
240         + "<n,N"
241         + "<o,O"
242         + "<p,P"
243         + "<q,Q"
244         + "<r,R"
245         + "<s, S & SS,\u00DF"             // s-zet
246         + "<t,T"
247         + "&th, \u00FE & TH, \u00DE"           // thorn
248         + "<u,U"
249         + "<v,V"
250         + "<w,W"
251         + "<x,X"
252         + "<y,Y"
253         + "<z,Z"
254         + "&AE,\u00C6"                    // ae & AE ligature
255         + "&AE,\u00E6"
256         + "&OE,\u0152"                    // oe & OE ligature
257         + "&OE,\u0153";
258 
259     /*
260      * Data for TestPrimary()
261      */
262     private static final String[] primarySourceData = {
263         "p\u00EAche",
264         "abc",
265         "abc",
266         "abc",
267         "abc",
268         "abc",
269         "a\u00E6c",
270         "acHc",
271         "black"
272     };
273 
274     private static final String[] primaryTargetData = {
275         "p\u00E9ch\u00E9",
276         "abc",
277         "aBC",
278         "abch",
279         "abd",
280         "\u00E4bc",
281         "a\u00C6c",
282         "aCHc",
283         "black-bird"
284     };
285 
286     private static final int[] primaryResults = {
287          0,  0,  0, -1, -1,  0,  0,  0, -1
288     };
289 
290     /*
291      * Data for TestSecondary()
292      */
293     private static final String[] secondarySourceData = {
294         "four",
295         "five",
296         "1",
297         "abc",
298         "abc",
299         "abcH",
300         "abc",
301         "acHc"
302     };
303 
304     private static final String[] secondaryTargetData = {
305 
306         "4",
307         "5",
308         "one",
309         "abc",
310         "aBc",
311         "abch",
312         "abd",
313         "aCHc"
314     };
315 
316     private static final int[] secondaryResults = {
317          0,  0,  0,  0,  0,  0, -1,  0
318     };
319 
320     /*
321      * Data for TestTertiary()
322      */
323     private static final String[] tertiarySourceData = {
324         "ab'c",
325         "co-op",
326         "ab",
327         "ampersad",
328         "all",
329         "four",
330         "five",
331         "1",
332         "1",
333         "1",
334         "2",
335         "2",
336         "Hello",
337         "a<b",
338         "a<b",
339         "acc",
340         "acHc"
341     };
342 
343     private static final String[] tertiaryTargetData = {
344         "abc",
345         "COOP",
346         "abc",
347         "&",
348         "&",
349         "4",
350         "5",
351         "one",
352         "nne",
353         "pne",
354         "two",
355         "uwo",
356         "hellO",
357         "a<=b",
358         "abc",
359         "aCHc",
360         "aCHc"
361     };
362 
363     private static final int[] tertiaryResults = {
364         -1,  1, -1, -1, -1, -1, -1,  1,  1, -1,
365          1, -1,  1,  1, -1, -1, -1
366     };
367 
368 
369     private static final String[] testData = {
370         "a",
371         "A",
372         "\u00e4",
373         "\u00c4",
374         "ae",
375         "aE",
376         "Ae",
377         "AE",
378         "\u00e6",
379         "\u00c6",
380         "b",
381         "c",
382         "z"
383     };
384 
TestPrimary()385     public void TestPrimary() {
386         doTest(getCollator(), Collator.PRIMARY,
387                primarySourceData, primaryTargetData, primaryResults);
388     }
389 
TestSecondary()390     public void TestSecondary() {
391         doTest(getCollator(), Collator.SECONDARY,
392                secondarySourceData, secondaryTargetData, secondaryResults);
393     }
394 
TestTertiary()395     public void TestTertiary() {
396         Collator col = getCollator();
397 
398         doTest(col, Collator.TERTIARY,
399                tertiarySourceData, tertiaryTargetData, tertiaryResults);
400 
401         for (int i = 0; i < testData.length-1; i++) {
402             for (int j = i+1; j < testData.length; j++) {
403                 doTest(col, testData[i], testData[j], -1);
404             }
405         }
406     }
407 
408     private RuleBasedCollator myCollation = null;
getCollator()409     private Collator getCollator() {
410         if (myCollation == null) {
411             try {
412                 myCollation = new RuleBasedCollator
413                     (DEFAULTRULES + "& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ");
414             } catch (Exception foo) {
415                 errln("Collator creation failed.");
416                 myCollation = (RuleBasedCollator)Collator.getInstance();
417             }
418         }
419         return myCollation;
420     }
421 }
422