1 /*
2  * This file is part of the LibreOffice project.
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7  *
8  * This file incorporates work covered by the following license notice:
9  *
10  *   Licensed to the Apache Software Foundation (ASF) under one or more
11  *   contributor license agreements. See the NOTICE file distributed
12  *   with this work for additional information regarding copyright
13  *   ownership. The ASF licenses this file to you under the Apache
14  *   License, Version 2.0 (the "License"); you may not use this file
15  *   except in compliance with the License. You may obtain a copy of
16  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
17  */
18 
19 package ifc.i18n;
20 
21 import lib.MultiMethodTest;
22 
23 import com.sun.star.i18n.KParseTokens;
24 import com.sun.star.i18n.KParseType;
25 import com.sun.star.i18n.ParseResult;
26 import com.sun.star.i18n.XCharacterClassification;
27 import com.sun.star.lang.Locale;
28 
29 /**
30  * Testing <code>com.sun.star.i18n.XCharacterClassification</code>
31  * interface methods:
32  * <ul>
33  *  <li><code> toUpper() </code></li>
34  *  <li><code> toLower() </code></li>
35  *  <li><code> toTitle() </code></li>
36  *  <li><code> getType() </code></li>
37  *  <li><code> getCharacterType() </code></li>
38  *  <li><code> getStringType() </code></li>
39  *  <li><code> getCharacterDirection() </code></li>
40  *  <li><code> getScript() </code></li>
41  *  <li><code> parseAnyToken() </code></li>
42  *  <li><code> parsePredefinedToken() </code></li>
43  * </ul><p>
44  * Test is <b> NOT </b> multithread compliant. <p>
45  * @see com.sun.star.i18n.XCharacterClassification
46  */
47 public class _XCharacterClassification extends MultiMethodTest {
48     public XCharacterClassification oObj = null;
49     public String[] languages = new String[]{"de","en","es","fr","ja","ko","zh"};
50     public String[] countries = new String[]{"DE","US","ES","FR","JP","KR","CN"};
51 
52     public String[] charstyles_office = new String[] {"UNASSIGNED","UPPERCASE_LETTER",
53         "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER",
54         "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK",
55         "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR",
56         "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","PRIVATE_USE",
57         "OTHER_PUNCTUATION","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION",
58         "CONNECTOR_PUNCTUATION",
59         "OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL","MODIFIER_SYMBOL",
60         "OTHER_SYMBOL","INITIAL_PUNCTUATION","FINAL_PUNCTUATION","GENERAL_TYPES_COUNT"};
61 
62     public String[] unicode_script = new String[] {"U_BASIC_LATIN","U_LATIN_1_SUPPLEMENT",
63         "U_LATIN_EXTENDED_A","U_LATIN_EXTENDED_B","U_IPA_EXTENSIONS","U_SPACING_MODIFIER_LETTERS",
64         "U_COMBINING_DIACRITICAL_MARKS","U_GREEK","U_CYRILLIC","U_ARMENIAN","U_HEBREW",
65         "U_ARABIC","U_SYRIAC","U_THAANA","U_DEVANAGARI","U_BENGALI","U_GURMUKHI",
66         "U_GUJARATI","U_ORIYA","U_TAMIL","U_TELUGU","U_KANNADA","U_MALAYALAM",
67         "U_SINHALA","U_THAI","U_LAO","U_TIBETAN","U_MYANMAR","U_GEORGIAN",
68         "U_HANGUL_JAMO","U_ETHIOPIC","U_CHEROKEE","U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
69         "U_OGHAM","U_RUNIC","U_KHMER","U_MONGOLIAN","U_LATIN_EXTENDED_ADDITIONAL",
70         "U_GREEK_EXTENDED","U_GENERAL_PUNCTUATION","U_SUPERSCRIPTS_AND_SUBSCRIPTS",
71         "U_CURRENCY_SYMBOLS","U_COMBINING_MARKS_FOR_SYMBOLS","U_LETTERLIKE_SYMBOLS",
72         "U_NUMBER_FORMS","U_ARROWS","U_MATHEMATICAL_OPERATORS","U_MISCELLANEOUS_TECHNICAL",
73         "U_CONTROL_PICTURES","U_OPTICAL_CHARACTER_RECOGNITION","U_ENCLOSED_ALPHANUMERICS",
74         "U_BOX_DRAWING","U_BLOCK_ELEMENTS","U_GEOMETRIC_SHAPES","U_MISCELLANEOUS_SYMBOLS",
75         "U_DINGBATS","U_BRAILLE_PATTERNS","U_CJK_RADICALS_SUPPLEMENT","U_KANGXI_RADICALS",
76         "U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS","U_CJK_SYMBOLS_AND_PUNCTUATION",
77         "U_HIRAGANA","U_KATAKANA","U_BOPOMOFO","U_HANGUL_COMPATIBILITY_JAMO","U_KANBUN",
78         "U_BOPOMOFO_EXTENDED","U_ENCLOSED_CJK_LETTERS_AND_MONTHS","U_CJK_COMPATIBILITY",
79         "U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A","U_CJK_UNIFIED_IDEOGRAPHS","U_YI_SYLLABLES",
80         "U_YI_RADICALS","U_HANGUL_SYLLABLES","U_HIGH_SURROGATES","U_HIGH_PRIVATE_USE_SURROGATES",
81         "U_LOW_SURROGATES","U_PRIVATE_USE_AREA","U_CJK_COMPATIBILITY_IDEOGRAPHS",
82         "U_ALPHABETIC_PRESENTATION_FORMS","U_ARABIC_PRESENTATION_FORMS_A","U_COMBINING_HALF_MARKS",
83         "U_CJK_COMPATIBILITY_FORMS","U_SMALL_FORM_VARIANTS","U_ARABIC_PRESENTATION_FORMS_B",
84         "U_SPECIALS","U_HALFWIDTH_AND_FULLWIDTH_FORMS","U_CHAR_SCRIPT_COUNT","U_NO_SCRIPT"};
85 
86     /**
87     * Test calls the method for different locales. Then each result is compared
88     * with a string, converted to a upper case using
89     * <code>String</code> method <code>toUpperCase()</code>.<p>
90     * Has <b> OK </b> status if string, returned by the method is equal to
91     * a string that is returned by String.toUpperCase() for all locales.
92     */
_toUpper()93     public void _toUpper() {
94         boolean res = true;
95         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404};
96         String toCheck = new String(characters);
97         String get = "";
98         String exp = "";
99 
100         for (int i=0;i<7;i++) {
101             get = oObj.toUpper(toCheck, 0, toCheck.length(), getLocale(i));
102             exp = toCheck.toUpperCase(
103                 new java.util.Locale(languages[i], countries[i]));
104             res &= get.equals(exp);
105             if (!res) {
106                 log.println("FAILED for: language=" + languages[i] +
107                     " ; country=" + countries[i]);
108                 log.println("Expected: " + exp);
109                 log.println("Gained : " + get);
110             }
111         }
112         tRes.tested("toUpper()", res);
113     }
114 
115     /**
116     * Test calls the method for different locales. Then each result is compared
117     * with a string, converted to a lower case using
118     * <code>String</code> method <code>toLowerCase()</code>.<p>
119     * Has <b> OK </b> status if string, returned by the method is equal to
120     * a string that is returned by String.toLowerCase() for all locales.
121     */
_toLower()122     public void _toLower() {
123         boolean res = true;
124         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404};
125         String toCheck = new String(characters);
126         String get = "";
127         String exp = "";
128 
129         for (int i=0;i<7;i++) {
130             get = oObj.toLower(toCheck,0,toCheck.length(),getLocale(i));
131             exp = toCheck.toLowerCase(
132                 new java.util.Locale(languages[i],countries[i]));
133             res &= get.equals(exp);
134             if (!res) {
135                 log.println("FAILED for: language=" + languages[i]
136                     + " ; country=" + countries[i]);
137                 log.println("Expected: " + exp);
138                 log.println("Gained : " + get);
139             }
140         }
141         tRes.tested("toLower()", res);
142     }
143 
144     /**
145     * Test calls the method for different locales. Then each result is compared
146     * with a string, converted to a title case using
147     * <code>java.lang.Character</code> method <code>toTitleCase()</code>.<p>
148     * Has <b> OK </b> status if string, returned by the method is equal to
149     * a string that was converted using Character.toTitleCase() for all locales.
150     */
_toTitle()151     public void _toTitle() {
152         boolean res = true;
153         String toCheck = new String(new char[]{8112});
154         String get = "";
155         String exp = "";
156 
157         for (int i=0;i<7;i++) {
158             get = oObj.toTitle(toCheck, 0, 1, getLocale(i));
159             exp = new String(
160                 new char[]{Character.toTitleCase(toCheck.toCharArray()[0])});
161             res &= get.equals(exp);
162             if (!res) {
163                 log.println("FAILED for: language=" + languages[i]
164                     + " ; country=" + countries[i]);
165                 log.println("Expected: " + exp);
166                 log.println("Gained : " + get);
167             }
168         }
169         tRes.tested("toTitle()", res);
170     }
171 
172     /**
173     * At first we define <code>int[]</code> and <code>char[]</code> arrays of
174     * unicode symbol numbers, arranged as sequences, where symbols are sorted
175     * by type, so the character of <code>i<sup><small>th</small></sup></code>
176     * type is located on <code>i<sup><small>th</small></sup></code> position.<p>
177     * Has <b> OK </b> status if for all 30 types the method returns value, that
178     * is equal to an element number.<p>
179     * @see com.sun.star.i18n.CharType
180     */
_getType()181     public void _getType() {
182         boolean res = true;
183         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404,
184             48,8544,179,32,8232,8233,144,8204,57344,56320,173,40,41,95,3852,247,
185             3647,901,3896,171,187};
186         int[] charsInt = new int[]{586,65,97,498,721,4588,772,8413,3404,48,
187             8544,179,32,8232,8233,144,8204,57344,56320,173,40,41,95,3852,247,
188             3647,901,3896,171,187};
189         String toCheck = new String(characters);
190 
191         for (int i=0;i<characters.length;i++) {
192             int get = oObj.getType(toCheck, i);
193             res &= charstyles_office[get].equals(charstyles_office[i]);
194             if (!res) {
195                 log.println("Code :" + Integer.toHexString(charsInt[i]));
196                 log.println("Gained: " + charstyles_office[get]);
197                 log.println("Expected : " + charstyles_office[i]);
198             }
199         }
200         tRes.tested("getType()", res);
201     }
202 
203     /**
204     * After defining string to be checked and array of expected types, test
205     * calls the method for each character of a string and for all locales.<p>
206     * Has <b> OK </b> status if the method returns type, expected for a given
207     * character and locale.
208     */
_getCharacterType()209     public void _getCharacterType() {
210         boolean res = true;
211         String toCheck = "Ab0)";
212         int[] expected = new int[]{226,228,97,32};
213 
214         for (int i=0;i<toCheck.length();i++) {
215             for (int j=1;j<7;j++) {
216                 int get = oObj.getCharacterType(toCheck, i, getLocale(j));
217                 res &= (get == expected[i]);
218                 if (!res) {
219                     log.println("FAILED for: language=" + languages[j] +
220                         " ; country=" + countries[j]);
221                     log.println("Symbol :" + toCheck.toCharArray()[i]);
222                     log.println("Gained: " + get);
223                     log.println("Expected : " + expected[i]);
224                 }
225             }
226         }
227         tRes.tested("getCharacterType()", res);
228     }
229 
230     /**
231     * After defining array of strings to be checked and array of expected types,
232     * test calls the method for each string of an array and for all locales.<p>
233     * Has <b> OK </b> status if the method returns type, expected for a given
234     * string and locale.
235     */
_getStringType()236     public void _getStringType() {
237         boolean res = true;
238         String[] toCheck = new String[]{"01234","AAAAA","bbbbb","AA()bb"};
239         int[] exp = new int[]{97,226,228,230};
240 
241         for (int j=0;j<toCheck.length;j++) {
242             for (int i=0;i<7;i++) {
243                 int get = oObj.getStringType(toCheck[j], 0,
244                     toCheck[j].length(), getLocale(i));
245                 res &= (get == exp[j]);
246                 if (!res) {
247                     log.println("FAILED for: language=" + languages[i] +
248                         " ; country=" + countries[i]);
249                     log.println("Expected: " + exp[j]);
250                     log.println("Gained : " + get);
251                 }
252             }
253         }
254         tRes.tested("getStringType()", res);
255     }
256 
257     /**
258     * After string to be checked is initialized (all symbols are sorted
259     * by direction, so the character of <code>i<sup><small>th</small></sup></code>
260     * direction is located on <code>i<sup><small>th</small></sup></code>
261     * position), test calls the method for every character of that string. <p>
262     * Has <b> OK </b> status if the method returns direction, that's equal to
263     * a symbol position in the string.
264     */
_getCharacterDirection()265     public void _getCharacterDirection() {
266         boolean res = true;
267         String toCheck = new String(new char[]{65,1470,48,47,35,1632,44,10,
268                                 9,12,33,8234,8237,1563,8235,8238,8236,768,1});
269         for (short i=0;i<19;i++) {
270             short get = oObj.getCharacterDirection(toCheck, i);
271             res &= (get == i);
272             if (!res) {
273                 log.println("Code :" + toCheck.toCharArray()[i]);
274                 log.println("Gained: " + get);
275                 log.println("Expected: " + i);
276             }
277         }
278         tRes.tested("getCharacterDirection()", res);
279     }
280 
281     /**
282     * At first we define <code>int[]</code> and <code>char[]</code> arrays of
283     * unicode symbol numbers, arranged as sequences, where symbols are sorted
284     * by type, so the character of <code>i<sup><small>th</small></sup></code>
285     * type is located on <code>i<sup><small>th</small></sup></code> position.<p>
286     * Has <b> OK </b> status if for each character method returns value, that
287     * is equal to a number where element is located in array. Also method has
288     * <b> OK </b> status for symbol with code 55296, because it doesn't work
289     * since it hasn't the right neighborhood.<p>
290     * @see "http://ppewww.ph.gla.ac.uk/~flavell/unicode/unidata.html"
291     */
_getScript()292     public void _getScript() {
293         boolean res = true;
294         char[] characters = new char[]{65,128,256,384,592,750,773,924,1030,1331,1448,
295             1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590,
296             3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943,
297             8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719,
298             9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727,
299             12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744,
300             64257,64370,65056,65073,65131,65146,65532,65288};
301         int[] charsInt = new int[]{65,128,256,384,592,750,773,924,1030,1331,1448,
302             1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590,
303             3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943,
304             8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719,
305             9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727,
306             12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744,
307             64257,64370,65056,65073,65131,65146,65532,65288};
308         String toCheck = new String(characters);
309 
310         for (int i=0;i<characters.length;i++) {
311             int get = oObj.getScript(toCheck, i);
312             res &= (get == i);
313             //The HIGH_SURROGATE 55296 doesn't work since it hasn't the right
314             //neighborhood
315             if (toCheck.substring(i, i + 1).hashCode() == 55296) res = true;
316             if (!res) {
317                 log.println("-- " + toCheck.substring(i, i + 1).hashCode());
318                 log.println("Code: " + Integer.toHexString(charsInt[i]));
319                 log.println("Gained: " + unicode_script[get]);
320                 log.println("Expected: " + unicode_script[i]);
321             }
322         }
323         tRes.tested("getScript()", res);
324     }
325 
326     /**
327     * After defining a string to be parsed and parse conditions (flags), test
328     * calls the method for different locales three times with different parameters,
329     * checking result after every call.  <p>
330     * Has <b> OK </b> status if the method returns right results all three
331     * times.
332     */
_parseAnyToken()333     public void _parseAnyToken() {
334         int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE;
335         int nContFlags = KParseTokens.ANY_ALNUM | KParseTokens.ASC_UNDERSCORE
336                         | KParseTokens.ASC_DOT;
337         String toCheck = " 18 i18n ^";
338         ParseResult pRes = null;
339         boolean res = true;
340 
341         for (int i=0;i<7;i++) {
342             pRes = oObj.parseAnyToken(toCheck, 1, getLocale(i),
343                 nStartFlags, "", nContFlags, "");
344             res = ( (pRes.CharLen==2)
345                  && (pRes.TokenType==32)
346                  && (pRes.Value==18.0) );
347             pRes = oObj.parseAnyToken(toCheck, 4, getLocale(i),
348                 nStartFlags, "", nContFlags, "");
349             res &= ( (pRes.CharLen==4)
350                   && (pRes.TokenType==4)
351                   && (pRes.Value==0.0) );
352             pRes = oObj.parseAnyToken(toCheck, 9, getLocale(i),
353                 nStartFlags, "", nContFlags, "");
354             res &= ( (pRes.CharLen==1)
355                   && (pRes.TokenType==1)
356                   && (pRes.Value==0.0) );
357         }
358         tRes.tested("parseAnyToken()", res);
359     }
360 
361     /**
362     * After defining a string to be parsed and parse conditions (flags), test
363     * calls the method for different locales two times with different parameters,
364     * checking result after every call. <p>
365     * Has <b> OK </b> status if the method returns right results.
366     */
_parsePredefinedToken()367     public void _parsePredefinedToken() {
368         int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE;
369         int nContFlags = nStartFlags;
370         String toCheck = " 18 int";
371         ParseResult pRes = null;
372         boolean res = true;
373 
374         for (int i=0;i<7;i++) {
375             pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck,
376                 1, getLocale(i), nStartFlags, "", nContFlags, "");
377             res = (pRes.CharLen==0);
378             pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck,
379                 4, getLocale(i), nStartFlags, "", nContFlags, "");
380             res &= ( (pRes.CharLen==3)
381                   && (pRes.TokenType==4)
382                   && (pRes.Value==0.0) );
383         }
384         tRes.tested("parsePredefinedToken()", res);
385     }
386 
387 
388     /**
389     * Method returns locale for a given language and country.
390     * @param k index of needed locale.
391     */
getLocale(int k)392     private Locale getLocale(int k) {
393         return new Locale(languages[k],countries[k],"");
394     }
395 
396 
397 } // end XCharacterClassification
398 
399