1 /* Unit tests for utilities
2  * Copyright (C) 2010 Red Hat, Inc.
3  * Copyright (C) 2011 Google, Inc.
4  *
5  * This work is provided "as is"; redistribution and modification
6  * in whole or in part, in any medium, physical or electronic is
7  * permitted without restriction.
8  *
9  * This work is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12  *
13  * In no event shall the authors or contributors be liable for any
14  * direct, indirect, incidental, special, exemplary, or consequential
15  * damages (including, but not limited to, procurement of substitute
16  * goods or services; loss of use, data, or profits; or business
17  * interruption) however caused and on any theory of liability, whether
18  * in contract, strict liability, or tort (including negligence or
19  * otherwise) arising in any way out of the use of this software, even
20  * if advised of the possibility of such damage.
21  *
22  * Author: Matthias Clasen, Behdad Esfahbod
23  */
24 
25 /* We are testing some deprecated APIs here */
26 #ifndef GLIB_DISABLE_DEPRECATION_WARNINGS
27 #define GLIB_DISABLE_DEPRECATION_WARNINGS
28 #endif
29 
30 #include <locale.h>
31 
32 #include "glib.h"
33 
34 #include "glib/gunidecomp.h"
35 
36 /* Test that g_unichar_validate() returns the correct value for various
37  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
38 static void
test_unichar_validate(void)39 test_unichar_validate (void)
40 {
41   g_assert_true (g_unichar_validate ('j'));
42   g_assert_true (g_unichar_validate (8356));
43   g_assert_true (g_unichar_validate (8356));
44   g_assert_true (g_unichar_validate (0xFDD1));
45   g_assert_true (g_unichar_validate (917760));
46   g_assert_false (g_unichar_validate (0x110000));
47 }
48 
49 /* Test that g_unichar_type() returns the correct value for various
50  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
51 static void
test_unichar_character_type(void)52 test_unichar_character_type (void)
53 {
54   guint i;
55   struct {
56     GUnicodeType type;
57     gunichar     c;
58   } examples[] = {
59     { G_UNICODE_CONTROL,              0x000D },
60     { G_UNICODE_FORMAT,               0x200E },
61      /* G_UNICODE_UNASSIGNED */
62     { G_UNICODE_PRIVATE_USE,          0xE000 },
63     { G_UNICODE_SURROGATE,            0xD800 },
64     { G_UNICODE_LOWERCASE_LETTER,     0x0061 },
65     { G_UNICODE_MODIFIER_LETTER,      0x02B0 },
66     { G_UNICODE_OTHER_LETTER,         0x3400 },
67     { G_UNICODE_TITLECASE_LETTER,     0x01C5 },
68     { G_UNICODE_UPPERCASE_LETTER,     0xFF21 },
69     { G_UNICODE_SPACING_MARK,         0x0903 },
70     { G_UNICODE_ENCLOSING_MARK,       0x20DD },
71     { G_UNICODE_NON_SPACING_MARK,     0xA806 },
72     { G_UNICODE_DECIMAL_NUMBER,       0xFF10 },
73     { G_UNICODE_LETTER_NUMBER,        0x16EE },
74     { G_UNICODE_OTHER_NUMBER,         0x17F0 },
75     { G_UNICODE_CONNECT_PUNCTUATION,  0x005F },
76     { G_UNICODE_DASH_PUNCTUATION,     0x058A },
77     { G_UNICODE_CLOSE_PUNCTUATION,    0x0F3B },
78     { G_UNICODE_FINAL_PUNCTUATION,    0x2019 },
79     { G_UNICODE_INITIAL_PUNCTUATION,  0x2018 },
80     { G_UNICODE_OTHER_PUNCTUATION,    0x2016 },
81     { G_UNICODE_OPEN_PUNCTUATION,     0x0F3A },
82     { G_UNICODE_CURRENCY_SYMBOL,      0x20A0 },
83     { G_UNICODE_MODIFIER_SYMBOL,      0x309B },
84     { G_UNICODE_MATH_SYMBOL,          0xFB29 },
85     { G_UNICODE_OTHER_SYMBOL,         0x00A6 },
86     { G_UNICODE_LINE_SEPARATOR,       0x2028 },
87     { G_UNICODE_PARAGRAPH_SEPARATOR,  0x2029 },
88     { G_UNICODE_SPACE_SEPARATOR,      0x202F },
89   };
90 
91   for (i = 0; i < G_N_ELEMENTS (examples); i++)
92     {
93       g_assert_cmpint (g_unichar_type (examples[i].c), ==, examples[i].type);
94     }
95 
96   /*** Testing TYPE() border cases ***/
97   g_assert_cmpint (g_unichar_type (0x3FF5), ==, 0x07);
98   /* U+FFEFF Plane 15 Private Use */
99   g_assert_cmpint (g_unichar_type (0xFFEFF), ==, 0x03);
100   /* U+E0001 Language Tag */
101   g_assert_cmpint (g_unichar_type (0xE0001), ==, 0x01);
102   g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR), ==, 0x02);
103   g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR + 1), ==, 0x02);
104   g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1), ==, 0x02);
105   g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1 + 1), ==, 0x02);
106 }
107 
108 /* Test that g_unichar_break_type() returns the correct value for various
109  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
110 static void
test_unichar_break_type(void)111 test_unichar_break_type (void)
112 {
113   guint i;
114   struct {
115     GUnicodeBreakType type;
116     gunichar          c;
117   } examples[] = {
118     { G_UNICODE_BREAK_MANDATORY,           0x2028 },
119     { G_UNICODE_BREAK_CARRIAGE_RETURN,     0x000D },
120     { G_UNICODE_BREAK_LINE_FEED,           0x000A },
121     { G_UNICODE_BREAK_COMBINING_MARK,      0x0300 },
122     { G_UNICODE_BREAK_SURROGATE,           0xD800 },
123     { G_UNICODE_BREAK_ZERO_WIDTH_SPACE,    0x200B },
124     { G_UNICODE_BREAK_INSEPARABLE,         0x2024 },
125     { G_UNICODE_BREAK_NON_BREAKING_GLUE,   0x00A0 },
126     { G_UNICODE_BREAK_CONTINGENT,          0xFFFC },
127     { G_UNICODE_BREAK_SPACE,               0x0020 },
128     { G_UNICODE_BREAK_AFTER,               0x05BE },
129     { G_UNICODE_BREAK_BEFORE,              0x02C8 },
130     { G_UNICODE_BREAK_BEFORE_AND_AFTER,    0x2014 },
131     { G_UNICODE_BREAK_HYPHEN,              0x002D },
132     { G_UNICODE_BREAK_NON_STARTER,         0x17D6 },
133     { G_UNICODE_BREAK_OPEN_PUNCTUATION,    0x0028 },
134     { G_UNICODE_BREAK_CLOSE_PARENTHESIS,   0x0029 },
135     { G_UNICODE_BREAK_CLOSE_PUNCTUATION,   0x007D },
136     { G_UNICODE_BREAK_QUOTATION,           0x0022 },
137     { G_UNICODE_BREAK_EXCLAMATION,         0x0021 },
138     { G_UNICODE_BREAK_IDEOGRAPHIC,         0x2E80 },
139     { G_UNICODE_BREAK_NUMERIC,             0x0030 },
140     { G_UNICODE_BREAK_INFIX_SEPARATOR,     0x002C },
141     { G_UNICODE_BREAK_SYMBOL,              0x002F },
142     { G_UNICODE_BREAK_ALPHABETIC,          0x0023 },
143     { G_UNICODE_BREAK_PREFIX,              0x0024 },
144     { G_UNICODE_BREAK_POSTFIX,             0x0025 },
145     { G_UNICODE_BREAK_COMPLEX_CONTEXT,     0x0E01 },
146     { G_UNICODE_BREAK_AMBIGUOUS,           0x00F7 },
147     { G_UNICODE_BREAK_UNKNOWN,             0xE000 },
148     { G_UNICODE_BREAK_NEXT_LINE,           0x0085 },
149     { G_UNICODE_BREAK_WORD_JOINER,         0x2060 },
150     { G_UNICODE_BREAK_HANGUL_L_JAMO,       0x1100 },
151     { G_UNICODE_BREAK_HANGUL_V_JAMO,       0x1160 },
152     { G_UNICODE_BREAK_HANGUL_T_JAMO,       0x11A8 },
153     { G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,  0xAC00 },
154     { G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, 0xAC01 },
155     { G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER, 0x3041 },
156     { G_UNICODE_BREAK_HEBREW_LETTER,                0x05D0 },
157     { G_UNICODE_BREAK_REGIONAL_INDICATOR,           0x1F1F6 },
158     { G_UNICODE_BREAK_EMOJI_BASE,          0x1F466 },
159     { G_UNICODE_BREAK_EMOJI_MODIFIER,      0x1F3FB },
160     { G_UNICODE_BREAK_ZERO_WIDTH_JOINER,   0x200D },
161   };
162 
163   for (i = 0; i < G_N_ELEMENTS (examples); i++)
164     {
165       g_assert_cmpint (g_unichar_break_type (examples[i].c), ==, examples[i].type);
166     }
167 }
168 
169 /* Test that g_unichar_get_script() returns the correct value for various
170  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
171 static void
test_unichar_script(void)172 test_unichar_script (void)
173 {
174   guint i;
175   struct {
176     GUnicodeScript script;
177     gunichar          c;
178   } examples[] = {
179     { G_UNICODE_SCRIPT_COMMON,                  0x002A },
180     { G_UNICODE_SCRIPT_INHERITED,               0x1CED },
181     { G_UNICODE_SCRIPT_INHERITED,               0x0670 },
182     { G_UNICODE_SCRIPT_ARABIC,                  0x060D },
183     { G_UNICODE_SCRIPT_ARMENIAN,                0x0559 },
184     { G_UNICODE_SCRIPT_BENGALI,                 0x09CD },
185     { G_UNICODE_SCRIPT_BOPOMOFO,                0x31B6 },
186     { G_UNICODE_SCRIPT_CHEROKEE,                0x13A2 },
187     { G_UNICODE_SCRIPT_COPTIC,                  0x2CFD },
188     { G_UNICODE_SCRIPT_CYRILLIC,                0x0482 },
189     { G_UNICODE_SCRIPT_DESERET,                0x10401 },
190     { G_UNICODE_SCRIPT_DEVANAGARI,              0x094D },
191     { G_UNICODE_SCRIPT_ETHIOPIC,                0x1258 },
192     { G_UNICODE_SCRIPT_GEORGIAN,                0x10FC },
193     { G_UNICODE_SCRIPT_GOTHIC,                 0x10341 },
194     { G_UNICODE_SCRIPT_GREEK,                   0x0375 },
195     { G_UNICODE_SCRIPT_GUJARATI,                0x0A83 },
196     { G_UNICODE_SCRIPT_GURMUKHI,                0x0A3C },
197     { G_UNICODE_SCRIPT_HAN,                     0x3005 },
198     { G_UNICODE_SCRIPT_HANGUL,                  0x1100 },
199     { G_UNICODE_SCRIPT_HEBREW,                  0x05BF },
200     { G_UNICODE_SCRIPT_HIRAGANA,                0x309F },
201     { G_UNICODE_SCRIPT_KANNADA,                 0x0CBC },
202     { G_UNICODE_SCRIPT_KATAKANA,                0x30FF },
203     { G_UNICODE_SCRIPT_KHMER,                   0x17DD },
204     { G_UNICODE_SCRIPT_LAO,                     0x0EDD },
205     { G_UNICODE_SCRIPT_LATIN,                   0x0061 },
206     { G_UNICODE_SCRIPT_MALAYALAM,               0x0D3D },
207     { G_UNICODE_SCRIPT_MONGOLIAN,               0x1843 },
208     { G_UNICODE_SCRIPT_MYANMAR,                 0x1031 },
209     { G_UNICODE_SCRIPT_OGHAM,                   0x169C },
210     { G_UNICODE_SCRIPT_OLD_ITALIC,             0x10322 },
211     { G_UNICODE_SCRIPT_ORIYA,                   0x0B3C },
212     { G_UNICODE_SCRIPT_RUNIC,                   0x16EF },
213     { G_UNICODE_SCRIPT_SINHALA,                 0x0DBD },
214     { G_UNICODE_SCRIPT_SYRIAC,                  0x0711 },
215     { G_UNICODE_SCRIPT_TAMIL,                   0x0B82 },
216     { G_UNICODE_SCRIPT_TELUGU,                  0x0C03 },
217     { G_UNICODE_SCRIPT_THAANA,                  0x07B1 },
218     { G_UNICODE_SCRIPT_THAI,                    0x0E31 },
219     { G_UNICODE_SCRIPT_TIBETAN,                 0x0FD4 },
220     { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,     0x1400 },
221     { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,     0x1401 },
222     { G_UNICODE_SCRIPT_YI,                      0xA015 },
223     { G_UNICODE_SCRIPT_TAGALOG,                 0x1700 },
224     { G_UNICODE_SCRIPT_HANUNOO,                 0x1720 },
225     { G_UNICODE_SCRIPT_BUHID,                   0x1740 },
226     { G_UNICODE_SCRIPT_TAGBANWA,                0x1760 },
227     { G_UNICODE_SCRIPT_BRAILLE,                 0x2800 },
228     { G_UNICODE_SCRIPT_CYPRIOT,                0x10808 },
229     { G_UNICODE_SCRIPT_LIMBU,                   0x1932 },
230     { G_UNICODE_SCRIPT_OSMANYA,                0x10480 },
231     { G_UNICODE_SCRIPT_SHAVIAN,                0x10450 },
232     { G_UNICODE_SCRIPT_LINEAR_B,               0x10000 },
233     { G_UNICODE_SCRIPT_TAI_LE,                  0x1950 },
234     { G_UNICODE_SCRIPT_UGARITIC,               0x1039F },
235     { G_UNICODE_SCRIPT_NEW_TAI_LUE,             0x1980 },
236     { G_UNICODE_SCRIPT_BUGINESE,                0x1A1F },
237     { G_UNICODE_SCRIPT_GLAGOLITIC,              0x2C00 },
238     { G_UNICODE_SCRIPT_TIFINAGH,                0x2D6F },
239     { G_UNICODE_SCRIPT_SYLOTI_NAGRI,            0xA800 },
240     { G_UNICODE_SCRIPT_OLD_PERSIAN,            0x103D0 },
241     { G_UNICODE_SCRIPT_KHAROSHTHI,             0x10A3F },
242     { G_UNICODE_SCRIPT_UNKNOWN,              0x1111111 },
243     { G_UNICODE_SCRIPT_BALINESE,                0x1B04 },
244     { G_UNICODE_SCRIPT_CUNEIFORM,              0x12000 },
245     { G_UNICODE_SCRIPT_PHOENICIAN,             0x10900 },
246     { G_UNICODE_SCRIPT_PHAGS_PA,                0xA840 },
247     { G_UNICODE_SCRIPT_NKO,                     0x07C0 },
248     { G_UNICODE_SCRIPT_KAYAH_LI,                0xA900 },
249     { G_UNICODE_SCRIPT_LEPCHA,                  0x1C00 },
250     { G_UNICODE_SCRIPT_REJANG,                  0xA930 },
251     { G_UNICODE_SCRIPT_SUNDANESE,               0x1B80 },
252     { G_UNICODE_SCRIPT_SAURASHTRA,              0xA880 },
253     { G_UNICODE_SCRIPT_CHAM,                    0xAA00 },
254     { G_UNICODE_SCRIPT_OL_CHIKI,                0x1C50 },
255     { G_UNICODE_SCRIPT_VAI,                     0xA500 },
256     { G_UNICODE_SCRIPT_CARIAN,                 0x102A0 },
257     { G_UNICODE_SCRIPT_LYCIAN,                 0x10280 },
258     { G_UNICODE_SCRIPT_LYDIAN,                 0x1093F },
259     { G_UNICODE_SCRIPT_AVESTAN,                0x10B00 },
260     { G_UNICODE_SCRIPT_BAMUM,                   0xA6A0 },
261     { G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,   0x13000 },
262     { G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,       0x10840 },
263     { G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,  0x10B60 },
264     { G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, 0x10B40 },
265     { G_UNICODE_SCRIPT_JAVANESE,                0xA980 },
266     { G_UNICODE_SCRIPT_KAITHI,                 0x11082 },
267     { G_UNICODE_SCRIPT_LISU,                    0xA4D0 },
268     { G_UNICODE_SCRIPT_MEETEI_MAYEK,            0xABE5 },
269     { G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,      0x10A60 },
270     { G_UNICODE_SCRIPT_OLD_TURKIC,             0x10C00 },
271     { G_UNICODE_SCRIPT_SAMARITAN,               0x0800 },
272     { G_UNICODE_SCRIPT_TAI_THAM,                0x1A20 },
273     { G_UNICODE_SCRIPT_TAI_VIET,                0xAA80 },
274     { G_UNICODE_SCRIPT_BATAK,                   0x1BC0 },
275     { G_UNICODE_SCRIPT_BRAHMI,                 0x11000 },
276     { G_UNICODE_SCRIPT_MANDAIC,                 0x0840 },
277     { G_UNICODE_SCRIPT_CHAKMA,                 0x11100 },
278     { G_UNICODE_SCRIPT_MEROITIC_CURSIVE,       0x109A0 },
279     { G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,   0x10980 },
280     { G_UNICODE_SCRIPT_MIAO,                   0x16F00 },
281     { G_UNICODE_SCRIPT_SHARADA,                0x11180 },
282     { G_UNICODE_SCRIPT_SORA_SOMPENG,           0x110D0 },
283     { G_UNICODE_SCRIPT_TAKRI,                  0x11680 },
284     { G_UNICODE_SCRIPT_BASSA_VAH,              0x16AD0 },
285     { G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN,     0x10530 },
286     { G_UNICODE_SCRIPT_DUPLOYAN,               0x1BC00 },
287     { G_UNICODE_SCRIPT_ELBASAN,                0x10500 },
288     { G_UNICODE_SCRIPT_GRANTHA,                0x11301 },
289     { G_UNICODE_SCRIPT_KHOJKI,                 0x11200 },
290     { G_UNICODE_SCRIPT_KHUDAWADI,              0x112B0 },
291     { G_UNICODE_SCRIPT_LINEAR_A,               0x10600 },
292     { G_UNICODE_SCRIPT_MAHAJANI,               0x11150 },
293     { G_UNICODE_SCRIPT_MANICHAEAN,             0x10AC0 },
294     { G_UNICODE_SCRIPT_MENDE_KIKAKUI,          0x1E800 },
295     { G_UNICODE_SCRIPT_MODI,                   0x11600 },
296     { G_UNICODE_SCRIPT_MRO,                    0x16A40 },
297     { G_UNICODE_SCRIPT_NABATAEAN,              0x10880 },
298     { G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN,      0x10A80 },
299     { G_UNICODE_SCRIPT_OLD_PERMIC,             0x10350 },
300     { G_UNICODE_SCRIPT_PAHAWH_HMONG,           0x16B00 },
301     { G_UNICODE_SCRIPT_PALMYRENE,              0x10860 },
302     { G_UNICODE_SCRIPT_PAU_CIN_HAU,            0x11AC0 },
303     { G_UNICODE_SCRIPT_PSALTER_PAHLAVI,        0x10B80 },
304     { G_UNICODE_SCRIPT_SIDDHAM,                0x11580 },
305     { G_UNICODE_SCRIPT_TIRHUTA,                0x11480 },
306     { G_UNICODE_SCRIPT_WARANG_CITI,            0x118A0 },
307     { G_UNICODE_SCRIPT_CHEROKEE,               0x0AB71 },
308     { G_UNICODE_SCRIPT_HATRAN,                 0x108E0 },
309     { G_UNICODE_SCRIPT_OLD_HUNGARIAN,          0x10C80 },
310     { G_UNICODE_SCRIPT_MULTANI,                0x11280 },
311     { G_UNICODE_SCRIPT_AHOM,                   0x11700 },
312     { G_UNICODE_SCRIPT_CUNEIFORM,              0x12480 },
313     { G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS,  0x14400 },
314     { G_UNICODE_SCRIPT_SIGNWRITING,            0x1D800 },
315     { G_UNICODE_SCRIPT_ADLAM,                  0x1E900 },
316     { G_UNICODE_SCRIPT_BHAIKSUKI,              0x11C00 },
317     { G_UNICODE_SCRIPT_MARCHEN,                0x11C70 },
318     { G_UNICODE_SCRIPT_NEWA,                   0x11400 },
319     { G_UNICODE_SCRIPT_OSAGE,                  0x104B0 },
320     { G_UNICODE_SCRIPT_TANGUT,                 0x16FE0 },
321     { G_UNICODE_SCRIPT_MASARAM_GONDI,          0x11D00 },
322     { G_UNICODE_SCRIPT_NUSHU,                  0x1B170 },
323     { G_UNICODE_SCRIPT_SOYOMBO,                0x11A50 },
324     { G_UNICODE_SCRIPT_ZANABAZAR_SQUARE,       0x11A00 },
325     { G_UNICODE_SCRIPT_DOGRA,                  0x11800 },
326     { G_UNICODE_SCRIPT_GUNJALA_GONDI,          0x11D60 },
327     { G_UNICODE_SCRIPT_HANIFI_ROHINGYA,        0x10D00 },
328     { G_UNICODE_SCRIPT_MAKASAR,                0x11EE0 },
329     { G_UNICODE_SCRIPT_MEDEFAIDRIN,            0x16E40 },
330     { G_UNICODE_SCRIPT_OLD_SOGDIAN,            0x10F00 },
331     { G_UNICODE_SCRIPT_SOGDIAN,                0x10F30 },
332     { G_UNICODE_SCRIPT_ELYMAIC,                0x10FE0 },
333     { G_UNICODE_SCRIPT_NANDINAGARI,            0x119A0 },
334     { G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, 0x1E100 },
335     { G_UNICODE_SCRIPT_WANCHO,                 0x1E2C0 },
336     { G_UNICODE_SCRIPT_CHORASMIAN,             0x10FB0 },
337     { G_UNICODE_SCRIPT_DIVES_AKURU,            0x11900 },
338     { G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT,    0x18B00 },
339     { G_UNICODE_SCRIPT_YEZIDI,                 0x10E80 },
340   };
341   for (i = 0; i < G_N_ELEMENTS (examples); i++)
342     g_assert_cmpint (g_unichar_get_script (examples[i].c), ==, examples[i].script);
343 }
344 
345 /* Test that g_unichar_combining_class() returns the correct value for
346  * various ASCII and Unicode alphabetic, numeric, and other, codepoints. */
347 static void
test_combining_class(void)348 test_combining_class (void)
349 {
350   guint i;
351   struct {
352     gint class;
353     gunichar          c;
354   } examples[] = {
355     {   0, 0x0020 },
356     {   1, 0x0334 },
357     {   7, 0x093C },
358     {   8, 0x3099 },
359     {   9, 0x094D },
360     {  10, 0x05B0 },
361     {  11, 0x05B1 },
362     {  12, 0x05B2 },
363     {  13, 0x05B3 },
364     {  14, 0x05B4 },
365     {  15, 0x05B5 },
366     {  16, 0x05B6 },
367     {  17, 0x05B7 },
368     {  18, 0x05B8 },
369     {  19, 0x05B9 },
370     {  20, 0x05BB },
371     {  21, 0x05BC },
372     {  22, 0x05BD },
373     {  23, 0x05BF },
374     {  24, 0x05C1 },
375     {  25, 0x05C2 },
376     {  26, 0xFB1E },
377     {  27, 0x064B },
378     {  28, 0x064C },
379     {  29, 0x064D },
380     /* ... */
381     { 228, 0x05AE },
382     { 230, 0x0300 },
383     { 232, 0x302C },
384     { 233, 0x0362 },
385     { 234, 0x0360 },
386     { 234, 0x1DCD },
387     { 240, 0x0345 }
388   };
389   for (i = 0; i < G_N_ELEMENTS (examples); i++)
390     {
391       g_assert_cmpint (g_unichar_combining_class (examples[i].c), ==, examples[i].class);
392     }
393 }
394 
395 /* Test that g_unichar_get_mirror() returns the correct value for various
396  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
397 static void
test_mirror(void)398 test_mirror (void)
399 {
400   gunichar mirror;
401 
402   g_assert_true (g_unichar_get_mirror_char ('(', &mirror));
403   g_assert_cmpint (mirror, ==, ')');
404   g_assert_true (g_unichar_get_mirror_char (')', &mirror));
405   g_assert_cmpint (mirror, ==, '(');
406   g_assert_true (g_unichar_get_mirror_char ('{', &mirror));
407   g_assert_cmpint (mirror, ==, '}');
408   g_assert_true (g_unichar_get_mirror_char ('}', &mirror));
409   g_assert_cmpint (mirror, ==, '{');
410   g_assert_true (g_unichar_get_mirror_char (0x208D, &mirror));
411   g_assert_cmpint (mirror, ==, 0x208E);
412   g_assert_true (g_unichar_get_mirror_char (0x208E, &mirror));
413   g_assert_cmpint (mirror, ==, 0x208D);
414   g_assert_false (g_unichar_get_mirror_char ('a', &mirror));
415 }
416 
417 /* Test that g_utf8_strup() returns the correct value for various
418  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
419 static void
test_strup(void)420 test_strup (void)
421 {
422   char *str_up = NULL;
423   const char *str = "AaZz09x;\x03\x45"
424     "\xEF\xBD\x81"  /* Unichar 'A' (U+FF21) */
425     "\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
426 
427   /* Testing degenerated cases */
428   if (g_test_undefined ())
429     {
430       g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
431                              "*assertion*!= NULL*");
432       str_up = g_utf8_strup (NULL, 0);
433       g_test_assert_expected_messages ();
434     }
435 
436   str_up = g_utf8_strup (str, strlen (str));
437   /* Tricky, comparing two unicode strings with an ASCII function */
438   g_assert_cmpstr (str_up, ==, "AAZZ09X;\003E\357\274\241\357\274\241");
439   g_free (str_up);
440 }
441 
442 /* Test that g_utf8_strdown() returns the correct value for various
443  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
444 static void
test_strdown(void)445 test_strdown (void)
446 {
447   char *str_down = NULL;
448   const char *str = "AaZz09x;\x03\x07"
449     "\xEF\xBD\x81"  /* Unichar 'A' (U+FF21) */
450     "\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
451 
452   /* Testing degenerated cases */
453   if (g_test_undefined ())
454     {
455       g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
456                              "*assertion*!= NULL*");
457       str_down = g_utf8_strdown (NULL, 0);
458       g_test_assert_expected_messages ();
459     }
460 
461   str_down = g_utf8_strdown (str, strlen (str));
462   /* Tricky, comparing two unicode strings with an ASCII function */
463   g_assert_cmpstr (str_down, ==, "aazz09x;\003\007\357\275\201\357\275\201");
464   g_free (str_down);
465 }
466 
467 /* Test that g_utf8_strup() and g_utf8_strdown() return the correct
468  * value for Turkish 'i' with and without dot above. */
469 static void
test_turkish_strupdown(void)470 test_turkish_strupdown (void)
471 {
472   char *str_up = NULL;
473   char *str_down = NULL;
474   const char *str = "iII"
475                     "\xcc\x87"  /* COMBINING DOT ABOVE (U+307) */
476                     "\xc4\xb1"  /* LATIN SMALL LETTER DOTLESS I (U+131) */
477                     "\xc4\xb0"; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */
478 
479   char *oldlocale = g_strdup (setlocale (LC_ALL, "tr_TR"));
480 
481   if (oldlocale == NULL)
482     {
483       g_test_skip ("locale tr_TR not available");
484       return;
485     }
486 
487   str_up = g_utf8_strup (str, strlen (str));
488   str_down = g_utf8_strdown (str, strlen (str));
489   /* i => LATIN CAPITAL LETTER I WITH DOT ABOVE,
490    * I => I,
491    * I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE,
492    * LATIN SMALL LETTER DOTLESS I => I,
493    * LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */
494   g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0");
495   /* i => i,
496    * I => LATIN SMALL LETTER DOTLESS I,
497    * I + COMBINING DOT ABOVE => i,
498    * LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I,
499    * LATIN CAPITAL LETTER I WITH DOT ABOVE => i */
500   g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i");
501   g_free (str_up);
502   g_free (str_down);
503 
504   setlocale (LC_ALL, oldlocale);
505   g_free (oldlocale);
506 }
507 
508 /* Test that g_utf8_casefold() returns the correct value for various
509  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
510 static void
test_casefold(void)511 test_casefold (void)
512 {
513   char *str_casefold = NULL;
514   const char *str = "AaZz09x;"
515     "\xEF\xBD\x81"  /* Unichar 'A' (U+FF21) */
516     "\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
517 
518   /* Testing degenerated cases */
519   if (g_test_undefined ())
520     {
521       g_test_expect_message (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
522                              "*assertion*!= NULL*");
523       str_casefold = g_utf8_casefold (NULL, 0);
524       g_test_assert_expected_messages ();
525     }
526 
527   str_casefold = g_utf8_casefold (str, strlen (str));
528   /* Tricky, comparing two unicode strings with an ASCII function */
529   g_assert_cmpstr (str_casefold, ==, "aazz09x;\357\275\201\357\275\201");
530   g_free (str_casefold);
531 }
532 
533 /* Test that g_unichar_ismark() returns the correct value for various
534  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
535 static void
test_mark(void)536 test_mark (void)
537 {
538   g_assert_true (g_unichar_ismark (0x0903));
539   g_assert_true (g_unichar_ismark (0x20DD));
540   g_assert_true (g_unichar_ismark (0xA806));
541   g_assert_false (g_unichar_ismark ('a'));
542 
543   /*** Testing TYPE() border cases ***/
544   g_assert_false (g_unichar_ismark (0x3FF5));
545   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
546   g_assert_false (g_unichar_ismark (0xFFEFF));
547   /* U+E0001 Language Tag */
548   g_assert_false (g_unichar_ismark (0xE0001));
549   g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR));
550   g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR + 1));
551   g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1));
552   g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1 + 1));
553 }
554 
555 /* Test that g_unichar_isspace() returns the correct value for various
556  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
557 static void
test_space(void)558 test_space (void)
559 {
560   g_assert_false (g_unichar_isspace ('a'));
561   g_assert_true (g_unichar_isspace (' '));
562   g_assert_true (g_unichar_isspace ('\t'));
563   g_assert_true (g_unichar_isspace ('\n'));
564   g_assert_true (g_unichar_isspace ('\r'));
565   g_assert_true (g_unichar_isspace ('\f'));
566   g_assert_false (g_unichar_isspace (0xff41)); /* Unicode fullwidth 'a' */
567   g_assert_true (g_unichar_isspace (0x202F)); /* Unicode space separator */
568   g_assert_true (g_unichar_isspace (0x2028)); /* Unicode line separator */
569   g_assert_true (g_unichar_isspace (0x2029)); /* Unicode paragraph separator */
570 
571   /*** Testing TYPE() border cases ***/
572   g_assert_false (g_unichar_isspace (0x3FF5));
573   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
574   g_assert_false (g_unichar_isspace (0xFFEFF));
575   /* U+E0001 Language Tag */
576   g_assert_false (g_unichar_isspace (0xE0001));
577   g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR));
578   g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR + 1));
579   g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1));
580   g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1 + 1));
581 }
582 
583 /* Test that g_unichar_isalnum() returns the correct value for various
584  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
585 static void
test_alnum(void)586 test_alnum (void)
587 {
588   g_assert_false (g_unichar_isalnum (' '));
589   g_assert_true (g_unichar_isalnum ('a'));
590   g_assert_true (g_unichar_isalnum ('z'));
591   g_assert_true (g_unichar_isalnum ('0'));
592   g_assert_true (g_unichar_isalnum ('9'));
593   g_assert_true (g_unichar_isalnum ('A'));
594   g_assert_true (g_unichar_isalnum ('Z'));
595   g_assert_false (g_unichar_isalnum ('-'));
596   g_assert_false (g_unichar_isalnum ('*'));
597   g_assert_true (g_unichar_isalnum (0xFF21));  /* Unichar fullwidth 'A' */
598   g_assert_true (g_unichar_isalnum (0xFF3A));  /* Unichar fullwidth 'Z' */
599   g_assert_true (g_unichar_isalnum (0xFF41));  /* Unichar fullwidth 'a' */
600   g_assert_true (g_unichar_isalnum (0xFF5A));  /* Unichar fullwidth 'z' */
601   g_assert_true (g_unichar_isalnum (0xFF10));  /* Unichar fullwidth '0' */
602   g_assert_true (g_unichar_isalnum (0xFF19));  /* Unichar fullwidth '9' */
603   g_assert_false (g_unichar_isalnum (0xFF0A)); /* Unichar fullwidth '*' */
604 
605   /*** Testing TYPE() border cases ***/
606   g_assert_true (g_unichar_isalnum (0x3FF5));
607   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
608   g_assert_false (g_unichar_isalnum (0xFFEFF));
609   /* U+E0001 Language Tag */
610   g_assert_false (g_unichar_isalnum (0xE0001));
611   g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR));
612   g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR + 1));
613   g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1));
614   g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1 + 1));
615 }
616 
617 /* Test that g_unichar_isalpha() returns the correct value for various
618  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
619 static void
test_alpha(void)620 test_alpha (void)
621 {
622   g_assert_false (g_unichar_isalpha (' '));
623   g_assert_true (g_unichar_isalpha ('a'));
624   g_assert_true (g_unichar_isalpha ('z'));
625   g_assert_false (g_unichar_isalpha ('0'));
626   g_assert_false (g_unichar_isalpha ('9'));
627   g_assert_true (g_unichar_isalpha ('A'));
628   g_assert_true (g_unichar_isalpha ('Z'));
629   g_assert_false (g_unichar_isalpha ('-'));
630   g_assert_false (g_unichar_isalpha ('*'));
631   g_assert_true (g_unichar_isalpha (0xFF21));  /* Unichar fullwidth 'A' */
632   g_assert_true (g_unichar_isalpha (0xFF3A));  /* Unichar fullwidth 'Z' */
633   g_assert_true (g_unichar_isalpha (0xFF41));  /* Unichar fullwidth 'a' */
634   g_assert_true (g_unichar_isalpha (0xFF5A));  /* Unichar fullwidth 'z' */
635   g_assert_false (g_unichar_isalpha (0xFF10)); /* Unichar fullwidth '0' */
636   g_assert_false (g_unichar_isalpha (0xFF19)); /* Unichar fullwidth '9' */
637   g_assert_false (g_unichar_isalpha (0xFF0A)); /* Unichar fullwidth '*' */
638 
639   /*** Testing TYPE() border cases ***/
640   g_assert_true (g_unichar_isalpha (0x3FF5));
641   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
642   g_assert_false (g_unichar_isalpha (0xFFEFF));
643   /* U+E0001 Language Tag */
644   g_assert_false (g_unichar_isalpha (0xE0001));
645   g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR));
646   g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR + 1));
647   g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1));
648   g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1 + 1));
649 }
650 
651 /* Test that g_unichar_isdigit() returns the correct value for various
652  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
653 static void
test_digit(void)654 test_digit (void)
655 {
656   g_assert_false (g_unichar_isdigit (' '));
657   g_assert_false (g_unichar_isdigit ('a'));
658   g_assert_true (g_unichar_isdigit ('0'));
659   g_assert_true (g_unichar_isdigit ('9'));
660   g_assert_false (g_unichar_isdigit ('A'));
661   g_assert_false (g_unichar_isdigit ('-'));
662   g_assert_false (g_unichar_isdigit ('*'));
663   g_assert_false (g_unichar_isdigit (0xFF21)); /* Unichar fullwidth 'A' */
664   g_assert_false (g_unichar_isdigit (0xFF3A)); /* Unichar fullwidth 'Z' */
665   g_assert_false (g_unichar_isdigit (0xFF41)); /* Unichar fullwidth 'a' */
666   g_assert_false (g_unichar_isdigit (0xFF5A)); /* Unichar fullwidth 'z' */
667   g_assert_true (g_unichar_isdigit (0xFF10));  /* Unichar fullwidth '0' */
668   g_assert_true (g_unichar_isdigit (0xFF19));  /* Unichar fullwidth '9' */
669   g_assert_false (g_unichar_isdigit (0xFF0A)); /* Unichar fullwidth '*' */
670 
671   /*** Testing TYPE() border cases ***/
672   g_assert_false (g_unichar_isdigit (0x3FF5));
673   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
674   g_assert_false (g_unichar_isdigit (0xFFEFF));
675   /* U+E0001 Language Tag */
676   g_assert_false (g_unichar_isdigit (0xE0001));
677   g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR));
678   g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR + 1));
679   g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1));
680   g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1 + 1));
681 }
682 
683 /* Test that g_unichar_digit_value() returns the correct value for various
684  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
685 static void
test_digit_value(void)686 test_digit_value (void)
687 {
688   g_assert_cmpint (g_unichar_digit_value (' '), ==, -1);
689   g_assert_cmpint (g_unichar_digit_value ('a'), ==, -1);
690   g_assert_cmpint (g_unichar_digit_value ('0'), ==, 0);
691   g_assert_cmpint (g_unichar_digit_value ('9'), ==, 9);
692   g_assert_cmpint (g_unichar_digit_value ('A'), ==, -1);
693   g_assert_cmpint (g_unichar_digit_value ('-'), ==, -1);
694   g_assert_cmpint (g_unichar_digit_value (0xFF21), ==, -1); /* Unichar 'A' */
695   g_assert_cmpint (g_unichar_digit_value (0xFF3A), ==, -1); /* Unichar 'Z' */
696   g_assert_cmpint (g_unichar_digit_value (0xFF41), ==, -1); /* Unichar 'a' */
697   g_assert_cmpint (g_unichar_digit_value (0xFF5A), ==, -1); /* Unichar 'z' */
698   g_assert_cmpint (g_unichar_digit_value (0xFF10), ==, 0);  /* Unichar '0' */
699   g_assert_cmpint (g_unichar_digit_value (0xFF19), ==, 9);  /* Unichar '9' */
700   g_assert_cmpint (g_unichar_digit_value (0xFF0A), ==, -1); /* Unichar '*' */
701 
702   /*** Testing TYPE() border cases ***/
703   g_assert_cmpint (g_unichar_digit_value (0x3FF5), ==, -1);
704    /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
705   g_assert_cmpint (g_unichar_digit_value (0xFFEFF), ==, -1);
706   /* U+E0001 Language Tag */
707   g_assert_cmpint (g_unichar_digit_value (0xE0001), ==, -1);
708   g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR), ==, -1);
709   g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
710   g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
711   g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
712 }
713 
714 /* Test that g_unichar_isxdigit() returns the correct value for various
715  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
716 static void
test_xdigit(void)717 test_xdigit (void)
718 {
719   g_assert_false (g_unichar_isxdigit (' '));
720   g_assert_true (g_unichar_isxdigit ('a'));
721   g_assert_true (g_unichar_isxdigit ('f'));
722   g_assert_false (g_unichar_isxdigit ('g'));
723   g_assert_false (g_unichar_isxdigit ('z'));
724   g_assert_true (g_unichar_isxdigit ('0'));
725   g_assert_true (g_unichar_isxdigit ('9'));
726   g_assert_true (g_unichar_isxdigit ('A'));
727   g_assert_true (g_unichar_isxdigit ('F'));
728   g_assert_false (g_unichar_isxdigit ('G'));
729   g_assert_false (g_unichar_isxdigit ('Z'));
730   g_assert_false (g_unichar_isxdigit ('-'));
731   g_assert_false (g_unichar_isxdigit ('*'));
732   g_assert_true (g_unichar_isxdigit (0xFF21));  /* Unichar fullwidth 'A' */
733   g_assert_true (g_unichar_isxdigit (0xFF26));  /* Unichar fullwidth 'F' */
734   g_assert_false (g_unichar_isxdigit (0xFF27)); /* Unichar fullwidth 'G' */
735   g_assert_false (g_unichar_isxdigit (0xFF3A)); /* Unichar fullwidth 'Z' */
736   g_assert_true (g_unichar_isxdigit (0xFF41));  /* Unichar fullwidth 'a' */
737   g_assert_true (g_unichar_isxdigit (0xFF46));  /* Unichar fullwidth 'f' */
738   g_assert_false (g_unichar_isxdigit (0xFF47)); /* Unichar fullwidth 'g' */
739   g_assert_false (g_unichar_isxdigit (0xFF5A)); /* Unichar fullwidth 'z' */
740   g_assert_true (g_unichar_isxdigit (0xFF10));  /* Unichar fullwidth '0' */
741   g_assert_true (g_unichar_isxdigit (0xFF19));  /* Unichar fullwidth '9' */
742   g_assert_false (g_unichar_isxdigit (0xFF0A)); /* Unichar fullwidth '*' */
743 
744   /*** Testing TYPE() border cases ***/
745   g_assert_false (g_unichar_isxdigit (0x3FF5));
746   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
747   g_assert_false (g_unichar_isxdigit (0xFFEFF));
748   /* U+E0001 Language Tag */
749   g_assert_false (g_unichar_isxdigit (0xE0001));
750   g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR));
751   g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR + 1));
752   g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1));
753   g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1 + 1));
754 }
755 
756 /* Test that g_unichar_xdigit_value() returns the correct value for various
757  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
758 static void
test_xdigit_value(void)759 test_xdigit_value (void)
760 {
761   g_assert_cmpint (g_unichar_xdigit_value (' '), ==, -1);
762   g_assert_cmpint (g_unichar_xdigit_value ('a'), ==, 10);
763   g_assert_cmpint (g_unichar_xdigit_value ('f'), ==, 15);
764   g_assert_cmpint (g_unichar_xdigit_value ('g'), ==, -1);
765   g_assert_cmpint (g_unichar_xdigit_value ('0'), ==, 0);
766   g_assert_cmpint (g_unichar_xdigit_value ('9'), ==, 9);
767   g_assert_cmpint (g_unichar_xdigit_value ('A'), ==, 10);
768   g_assert_cmpint (g_unichar_xdigit_value ('F'), ==, 15);
769   g_assert_cmpint (g_unichar_xdigit_value ('G'), ==, -1);
770   g_assert_cmpint (g_unichar_xdigit_value ('-'), ==, -1);
771   g_assert_cmpint (g_unichar_xdigit_value (0xFF21), ==, 10); /* Unichar 'A' */
772   g_assert_cmpint (g_unichar_xdigit_value (0xFF26), ==, 15); /* Unichar 'F' */
773   g_assert_cmpint (g_unichar_xdigit_value (0xFF27), ==, -1); /* Unichar 'G' */
774   g_assert_cmpint (g_unichar_xdigit_value (0xFF3A), ==, -1); /* Unichar 'Z' */
775   g_assert_cmpint (g_unichar_xdigit_value (0xFF41), ==, 10); /* Unichar 'a' */
776   g_assert_cmpint (g_unichar_xdigit_value (0xFF46), ==, 15); /* Unichar 'f' */
777   g_assert_cmpint (g_unichar_xdigit_value (0xFF47), ==, -1); /* Unichar 'g' */
778   g_assert_cmpint (g_unichar_xdigit_value (0xFF5A), ==, -1); /* Unichar 'z' */
779   g_assert_cmpint (g_unichar_xdigit_value (0xFF10), ==, 0);  /* Unichar '0' */
780   g_assert_cmpint (g_unichar_xdigit_value (0xFF19), ==, 9);  /* Unichar '9' */
781   g_assert_cmpint (g_unichar_xdigit_value (0xFF0A), ==, -1); /* Unichar '*' */
782 
783   /*** Testing TYPE() border cases ***/
784   g_assert_cmpint (g_unichar_xdigit_value (0x3FF5), ==, -1);
785    /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
786   g_assert_cmpint (g_unichar_xdigit_value (0xFFEFF), ==, -1);
787   /* U+E0001 Language Tag */
788   g_assert_cmpint (g_unichar_xdigit_value (0xE0001), ==, -1);
789   g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR), ==, -1);
790   g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
791   g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
792   g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
793 }
794 
795 /* Test that g_unichar_ispunct() returns the correct value for various
796  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
797 static void
test_punctuation(void)798 test_punctuation (void)
799 {
800   g_assert_false (g_unichar_ispunct (' '));
801   g_assert_false (g_unichar_ispunct ('a'));
802   g_assert_true (g_unichar_ispunct ('.'));
803   g_assert_true (g_unichar_ispunct (','));
804   g_assert_true (g_unichar_ispunct (';'));
805   g_assert_true (g_unichar_ispunct (':'));
806   g_assert_true (g_unichar_ispunct ('-'));
807 
808   g_assert_false (g_unichar_ispunct (0xFF21)); /* Unichar fullwidth 'A' */
809   g_assert_true (g_unichar_ispunct (0x005F));  /* Unichar fullwidth '.' */
810   g_assert_true (g_unichar_ispunct (0x058A));  /* Unichar fullwidth '-' */
811 
812   /*** Testing TYPE() border cases ***/
813   g_assert_false (g_unichar_ispunct (0x3FF5));
814   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
815   g_assert_false (g_unichar_ispunct (0xFFEFF));
816   /* U+E0001 Language Tag */
817   g_assert_false (g_unichar_ispunct (0xE0001));
818   g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR));
819   g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR + 1));
820   g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1));
821   g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1 + 1));
822 }
823 
824 /* Test that g_unichar_iscntrl() returns the correct value for various
825  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
826 static void
test_cntrl(void)827 test_cntrl (void)
828 {
829   g_assert_true (g_unichar_iscntrl (0x08));
830   g_assert_false (g_unichar_iscntrl ('a'));
831   g_assert_true (g_unichar_iscntrl (0x007F)); /* Unichar fullwidth <del> */
832   g_assert_true (g_unichar_iscntrl (0x009F)); /* Unichar fullwidth control */
833 
834   /*** Testing TYPE() border cases ***/
835   g_assert_false (g_unichar_iscntrl (0x3FF5));
836   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
837   g_assert_false (g_unichar_iscntrl (0xFFEFF));
838   /* U+E0001 Language Tag */
839   g_assert_false (g_unichar_iscntrl (0xE0001));
840   g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR));
841   g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR + 1));
842   g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1));
843   g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1 + 1));
844 }
845 
846 /* Test that g_unichar_isgraph() returns the correct value for various
847  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
848 static void
test_graph(void)849 test_graph (void)
850 {
851   g_assert_false (g_unichar_isgraph (0x08));
852   g_assert_false (g_unichar_isgraph (' '));
853   g_assert_true (g_unichar_isgraph ('a'));
854   g_assert_true (g_unichar_isgraph ('0'));
855   g_assert_true (g_unichar_isgraph ('9'));
856   g_assert_true (g_unichar_isgraph ('A'));
857   g_assert_true (g_unichar_isgraph ('-'));
858   g_assert_true (g_unichar_isgraph ('*'));
859   g_assert_true (g_unichar_isgraph (0xFF21));  /* Unichar fullwidth 'A' */
860   g_assert_true (g_unichar_isgraph (0xFF3A));  /* Unichar fullwidth 'Z' */
861   g_assert_true (g_unichar_isgraph (0xFF41));  /* Unichar fullwidth 'a' */
862   g_assert_true (g_unichar_isgraph (0xFF5A));  /* Unichar fullwidth 'z' */
863   g_assert_true (g_unichar_isgraph (0xFF10));  /* Unichar fullwidth '0' */
864   g_assert_true (g_unichar_isgraph (0xFF19));  /* Unichar fullwidth '9' */
865   g_assert_true (g_unichar_isgraph (0xFF0A));  /* Unichar fullwidth '*' */
866   g_assert_false (g_unichar_isgraph (0x007F)); /* Unichar fullwidth <del> */
867   g_assert_false (g_unichar_isgraph (0x009F)); /* Unichar fullwidth control */
868 
869   /*** Testing TYPE() border cases ***/
870   g_assert_true (g_unichar_isgraph (0x3FF5));
871   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
872   g_assert_true (g_unichar_isgraph (0xFFEFF));
873   /* U+E0001 Language Tag */
874   g_assert_false (g_unichar_isgraph (0xE0001));
875   g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR));
876   g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR + 1));
877   g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1));
878   g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1 + 1));
879 }
880 
881 /* Test that g_unichar_iszerowidth() returns the correct value for various
882  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
883 static void
test_zerowidth(void)884 test_zerowidth (void)
885 {
886   g_assert_false (g_unichar_iszerowidth (0x00AD));
887   g_assert_false (g_unichar_iszerowidth (0x115F));
888   g_assert_true (g_unichar_iszerowidth (0x1160));
889   g_assert_true (g_unichar_iszerowidth (0x11AA));
890   g_assert_true (g_unichar_iszerowidth (0x11FF));
891   g_assert_false (g_unichar_iszerowidth (0x1200));
892   g_assert_false (g_unichar_iszerowidth (0x200A));
893   g_assert_true (g_unichar_iszerowidth (0x200B));
894   g_assert_true (g_unichar_iszerowidth (0x200C));
895   g_assert_true (g_unichar_iszerowidth (0x591));
896 
897   /*** Testing TYPE() border cases ***/
898   g_assert_false (g_unichar_iszerowidth (0x3FF5));
899   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
900   g_assert_false (g_unichar_iszerowidth (0xFFEFF));
901   /* U+E0001 Language Tag */
902   g_assert_true (g_unichar_iszerowidth (0xE0001));
903   g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR));
904   g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR + 1));
905   g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1));
906   g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1 + 1));
907 }
908 
909 /* Test that g_unichar_istitle() returns the correct value for various
910  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
911 static void
test_title(void)912 test_title (void)
913 {
914   g_assert_true (g_unichar_istitle (0x01c5));
915   g_assert_true (g_unichar_istitle (0x1f88));
916   g_assert_true (g_unichar_istitle (0x1fcc));
917   g_assert_false (g_unichar_istitle ('a'));
918   g_assert_false (g_unichar_istitle ('A'));
919   g_assert_false (g_unichar_istitle (';'));
920 
921   /*** Testing TYPE() border cases ***/
922   g_assert_false (g_unichar_istitle (0x3FF5));
923   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
924   g_assert_false (g_unichar_istitle (0xFFEFF));
925   /* U+E0001 Language Tag */
926   g_assert_false (g_unichar_istitle (0xE0001));
927   g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR));
928   g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR + 1));
929   g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1));
930   g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1 + 1));
931 
932   g_assert_cmphex (g_unichar_totitle (0x0000), ==, 0x0000);
933   g_assert_cmphex (g_unichar_totitle (0x01c6), ==, 0x01c5);
934   g_assert_cmphex (g_unichar_totitle (0x01c4), ==, 0x01c5);
935   g_assert_cmphex (g_unichar_totitle (0x01c5), ==, 0x01c5);
936   g_assert_cmphex (g_unichar_totitle (0x1f80), ==, 0x1f88);
937   g_assert_cmphex (g_unichar_totitle (0x1f88), ==, 0x1f88);
938   g_assert_cmphex (g_unichar_totitle ('a'), ==, 'A');
939   g_assert_cmphex (g_unichar_totitle ('A'), ==, 'A');
940 
941   /*** Testing TYPE() border cases ***/
942   g_assert_cmphex (g_unichar_totitle (0x3FF5), ==, 0x3FF5);
943   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
944   g_assert_cmphex (g_unichar_totitle (0xFFEFF), ==, 0xFFEFF);
945   g_assert_cmphex (g_unichar_totitle (0xDFFFF), ==, 0xDFFFF);
946   /* U+E0001 Language Tag */
947   g_assert_cmphex (g_unichar_totitle (0xE0001), ==, 0xE0001);
948   g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR), ==,
949                    G_UNICODE_LAST_CHAR);
950   g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR + 1), ==,
951                    (G_UNICODE_LAST_CHAR + 1));
952   g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1), ==,
953                    (G_UNICODE_LAST_CHAR_PART1));
954   g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
955                    (G_UNICODE_LAST_CHAR_PART1 + 1));
956 }
957 
958 /* Test that g_unichar_isupper() returns the correct value for various
959  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
960 static void
test_upper(void)961 test_upper (void)
962 {
963   g_assert_false (g_unichar_isupper (' '));
964   g_assert_false (g_unichar_isupper ('0'));
965   g_assert_false (g_unichar_isupper ('a'));
966   g_assert_true (g_unichar_isupper ('A'));
967   g_assert_false (g_unichar_isupper (0xff41)); /* Unicode fullwidth 'a' */
968   g_assert_true (g_unichar_isupper (0xff21)); /* Unicode fullwidth 'A' */
969 
970   /*** Testing TYPE() border cases ***/
971   g_assert_false (g_unichar_isupper (0x3FF5));
972   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
973   g_assert_false (g_unichar_isupper (0xFFEFF));
974   /* U+E0001 Language Tag */
975   g_assert_false (g_unichar_isupper (0xE0001));
976   g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR));
977   g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR + 1));
978   g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1));
979   g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1 + 1));
980 }
981 
982 /* Test that g_unichar_islower() returns the correct value for various
983  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
984 static void
test_lower(void)985 test_lower (void)
986 {
987   g_assert_false (g_unichar_islower (' '));
988   g_assert_false (g_unichar_islower ('0'));
989   g_assert_true (g_unichar_islower ('a'));
990   g_assert_false (g_unichar_islower ('A'));
991   g_assert_true (g_unichar_islower (0xff41)); /* Unicode fullwidth 'a' */
992   g_assert_false (g_unichar_islower (0xff21)); /* Unicode fullwidth 'A' */
993 
994   /*** Testing TYPE() border cases ***/
995   g_assert_false (g_unichar_islower (0x3FF5));
996   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
997   g_assert_false (g_unichar_islower (0xFFEFF));
998   /* U+E0001 Language Tag */
999   g_assert_false (g_unichar_islower (0xE0001));
1000   g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR));
1001   g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR + 1));
1002   g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1));
1003   g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1 + 1));
1004 }
1005 
1006 /* Test that g_unichar_isprint() returns the correct value for various
1007  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1008 static void
test_print(void)1009 test_print (void)
1010 {
1011   g_assert_true (g_unichar_isprint (' '));
1012   g_assert_true (g_unichar_isprint ('0'));
1013   g_assert_true (g_unichar_isprint ('a'));
1014   g_assert_true (g_unichar_isprint ('A'));
1015   g_assert_true (g_unichar_isprint (0xff41)); /* Unicode fullwidth 'a' */
1016   g_assert_true (g_unichar_isprint (0xff21)); /* Unicode fullwidth 'A' */
1017 
1018   /*** Testing TYPE() border cases ***/
1019   g_assert_true (g_unichar_isprint (0x3FF5));
1020   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
1021   g_assert_true (g_unichar_isprint (0xFFEFF));
1022   /* U+E0001 Language Tag */
1023   g_assert_false (g_unichar_isprint (0xE0001));
1024   g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR));
1025   g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR + 1));
1026   g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1));
1027   g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1 + 1));
1028 }
1029 
1030 /* Test that g_unichar_toupper() and g_unichar_tolower() return the
1031  * correct values for various ASCII and Unicode alphabetic, numeric,
1032  * and other, codepoints. */
1033 static void
test_cases(void)1034 test_cases (void)
1035 {
1036   g_assert_cmphex (g_unichar_toupper (0x0), ==, 0x0);
1037   g_assert_cmphex (g_unichar_tolower (0x0), ==, 0x0);
1038   g_assert_cmphex (g_unichar_toupper ('a'), ==, 'A');
1039   g_assert_cmphex (g_unichar_toupper ('A'), ==, 'A');
1040   /* Unicode fullwidth 'a' == 'A' */
1041   g_assert_cmphex (g_unichar_toupper (0xff41), ==, 0xff21);
1042   /* Unicode fullwidth 'A' == 'A' */
1043   g_assert_cmphex (g_unichar_toupper (0xff21), ==, 0xff21);
1044   g_assert_cmphex (g_unichar_toupper (0x01C5), ==, 0x01C4);
1045   g_assert_cmphex (g_unichar_toupper (0x01C6), ==, 0x01C4);
1046   g_assert_cmphex (g_unichar_tolower ('A'), ==, 'a');
1047   g_assert_cmphex (g_unichar_tolower ('a'), ==, 'a');
1048   /* Unicode fullwidth 'A' == 'a' */
1049   g_assert_cmphex (g_unichar_tolower (0xff21), ==, 0xff41);
1050   /* Unicode fullwidth 'a' == 'a' */
1051   g_assert_cmphex (g_unichar_tolower (0xff41), ==, 0xff41);
1052   g_assert_cmphex (g_unichar_tolower (0x01C4), ==, 0x01C6);
1053   g_assert_cmphex (g_unichar_tolower (0x01C5), ==, 0x01C6);
1054   g_assert_cmphex (g_unichar_tolower (0x1F8A), ==, 0x1F82);
1055   g_assert_cmphex (g_unichar_totitle (0x1F8A), ==, 0x1F8A);
1056   g_assert_cmphex (g_unichar_toupper (0x1F8A), ==, 0x1F8A);
1057   g_assert_cmphex (g_unichar_tolower (0x1FB2), ==, 0x1FB2);
1058   g_assert_cmphex (g_unichar_toupper (0x1FB2), ==, 0x1FB2);
1059 
1060   /* U+130 is a special case, it's a 'I' with a dot on top */
1061   g_assert_cmphex (g_unichar_tolower (0x130), ==, 0x69);
1062 
1063   /* Testing ATTTABLE() border cases */
1064   g_assert_cmphex (g_unichar_toupper (0x1D6FE), ==, 0x1D6FE);
1065 
1066   /*** Testing TYPE() border cases ***/
1067   g_assert_cmphex (g_unichar_toupper (0x3FF5), ==, 0x3FF5);
1068   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
1069   g_assert_cmphex (g_unichar_toupper (0xFFEFF), ==, 0xFFEFF);
1070   g_assert_cmphex (g_unichar_toupper (0xDFFFF), ==, 0xDFFFF);
1071   /* U+E0001 Language Tag */
1072   g_assert_cmphex (g_unichar_toupper (0xE0001), ==, 0xE0001);
1073   g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR), ==,
1074                    G_UNICODE_LAST_CHAR);
1075   g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR + 1), ==,
1076                    (G_UNICODE_LAST_CHAR + 1));
1077   g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1), ==,
1078                    (G_UNICODE_LAST_CHAR_PART1));
1079   g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
1080                    (G_UNICODE_LAST_CHAR_PART1 + 1));
1081 
1082   /* Testing ATTTABLE() border cases */
1083   g_assert_cmphex (g_unichar_tolower (0x1D6FA), ==, 0x1D6FA);
1084 
1085   /*** Testing TYPE() border cases ***/
1086   g_assert_cmphex (g_unichar_tolower (0x3FF5), ==, 0x3FF5);
1087   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
1088   g_assert_cmphex (g_unichar_tolower (0xFFEFF), ==, 0xFFEFF);
1089   g_assert_cmphex (g_unichar_tolower (0xDFFFF), ==, 0xDFFFF);
1090   /* U+E0001 Language Tag */
1091   g_assert_cmphex (g_unichar_tolower (0xE0001), ==, 0xE0001);
1092   g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR), ==,
1093                    G_UNICODE_LAST_CHAR);
1094   g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR + 1), ==,
1095                    (G_UNICODE_LAST_CHAR + 1));
1096   g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1), ==,
1097                    G_UNICODE_LAST_CHAR_PART1);
1098   g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
1099                    (G_UNICODE_LAST_CHAR_PART1 + 1));
1100 }
1101 
1102 /* Test that g_unichar_isdefined() returns the correct value for various
1103  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1104 static void
test_defined(void)1105 test_defined (void)
1106 {
1107   g_assert_true (g_unichar_isdefined (0x0903));
1108   g_assert_true (g_unichar_isdefined (0x20DD));
1109   g_assert_true (g_unichar_isdefined (0x20BA));
1110   g_assert_true (g_unichar_isdefined (0xA806));
1111   g_assert_true (g_unichar_isdefined ('a'));
1112   g_assert_false (g_unichar_isdefined (0x10C49));
1113   g_assert_false (g_unichar_isdefined (0x169D));
1114 
1115   /*** Testing TYPE() border cases ***/
1116   g_assert_true (g_unichar_isdefined (0x3FF5));
1117   /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
1118   g_assert_true (g_unichar_isdefined (0xFFEFF));
1119   g_assert_false (g_unichar_isdefined (0xDFFFF));
1120   /* U+E0001 Language Tag */
1121   g_assert_true (g_unichar_isdefined (0xE0001));
1122   g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR));
1123   g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR + 1));
1124   g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1));
1125   g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1 + 1));
1126 }
1127 
1128 /* Test that g_unichar_iswide() returns the correct value for various
1129  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1130 static void
test_wide(void)1131 test_wide (void)
1132 {
1133   guint i;
1134   struct {
1135     gunichar c;
1136     enum {
1137       NOT_WIDE,
1138       WIDE_CJK,
1139       WIDE
1140     } wide;
1141   } examples[] = {
1142     /* Neutral */
1143     {   0x0000, NOT_WIDE },
1144     {   0x0483, NOT_WIDE },
1145     {   0x0641, NOT_WIDE },
1146     {   0xFFFC, NOT_WIDE },
1147     {  0x10000, NOT_WIDE },
1148     {  0xE0001, NOT_WIDE },
1149     {  0x2FFFE, NOT_WIDE },
1150     {  0x3FFFE, NOT_WIDE },
1151 
1152     /* Narrow */
1153     {   0x0020, NOT_WIDE },
1154     {   0x0041, NOT_WIDE },
1155     {   0x27E6, NOT_WIDE },
1156 
1157     /* Halfwidth */
1158     {   0x20A9, NOT_WIDE },
1159     {   0xFF61, NOT_WIDE },
1160     {   0xFF69, NOT_WIDE },
1161     {   0xFFEE, NOT_WIDE },
1162 
1163     /* Ambiguous */
1164     {   0x00A1, WIDE_CJK },
1165     {   0x00BE, WIDE_CJK },
1166     {   0x02DD, WIDE_CJK },
1167     {   0x2020, WIDE_CJK },
1168     {   0xFFFD, WIDE_CJK },
1169     {   0x00A1, WIDE_CJK },
1170     {  0x1F100, WIDE_CJK },
1171     {  0xE0100, WIDE_CJK },
1172     { 0x100000, WIDE_CJK },
1173     { 0x10FFFD, WIDE_CJK },
1174 
1175     /* Fullwidth */
1176     {   0x3000, WIDE },
1177     {   0xFF60, WIDE },
1178 
1179     /* Wide */
1180     {   0x2329, WIDE },
1181     {   0x3001, WIDE },
1182     {   0xFE69, WIDE },
1183     {  0x30000, WIDE },
1184     {  0x3FFFD, WIDE },
1185 
1186     /* Default Wide blocks */
1187     {   0x4DBF, WIDE },
1188     {   0x9FFF, WIDE },
1189     {   0xFAFF, WIDE },
1190     {  0x2A6DF, WIDE },
1191     {  0x2B73F, WIDE },
1192     {  0x2B81F, WIDE },
1193     {  0x2FA1F, WIDE },
1194 
1195     /* Uniode-5.2 character additions */
1196     /* Wide */
1197     {   0x115F, WIDE },
1198 
1199     /* Uniode-6.0 character additions */
1200     /* Wide */
1201     {  0x2B740, WIDE },
1202     {  0x1B000, WIDE },
1203 
1204     { 0x111111, NOT_WIDE }
1205   };
1206 
1207   for (i = 0; i < G_N_ELEMENTS (examples); i++)
1208     {
1209       g_assert_cmpint (g_unichar_iswide (examples[i].c), ==,
1210                        (examples[i].wide == WIDE));
1211       g_assert_cmpint (g_unichar_iswide_cjk (examples[i].c), ==,
1212                        (examples[i].wide != NOT_WIDE));
1213     }
1214 };
1215 
1216 /* Test that g_unichar_compose() returns the correct value for various
1217  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1218 static void
test_compose(void)1219 test_compose (void)
1220 {
1221   gunichar ch;
1222 
1223   /* Not composable */
1224   g_assert_false (g_unichar_compose (0x0041, 0x0042, &ch) && ch == 0);
1225   g_assert_false (g_unichar_compose (0x0041, 0, &ch) && ch == 0);
1226   g_assert_false (g_unichar_compose (0x0066, 0x0069, &ch) && ch == 0);
1227 
1228   /* Tricky non-composable */
1229   g_assert_false (g_unichar_compose (0x0308, 0x0301, &ch) && ch == 0); /* !0x0344 */
1230   g_assert_false (g_unichar_compose (0x0F71, 0x0F72, &ch) && ch == 0); /* !0x0F73 */
1231 
1232   /* Singletons should not compose */
1233   g_assert_false (g_unichar_compose (0x212B, 0, &ch) && ch == 0);
1234   g_assert_false (g_unichar_compose (0x00C5, 0, &ch) && ch == 0);
1235   g_assert_false (g_unichar_compose (0x2126, 0, &ch) && ch == 0);
1236   g_assert_false (g_unichar_compose (0x03A9, 0, &ch) && ch == 0);
1237 
1238   /* Pairs */
1239   g_assert_true (g_unichar_compose (0x0041, 0x030A, &ch) && ch == 0x00C5);
1240   g_assert_true (g_unichar_compose (0x006F, 0x0302, &ch) && ch == 0x00F4);
1241   g_assert_true (g_unichar_compose (0x1E63, 0x0307, &ch) && ch == 0x1E69);
1242   g_assert_true (g_unichar_compose (0x0073, 0x0323, &ch) && ch == 0x1E63);
1243   g_assert_true (g_unichar_compose (0x0064, 0x0307, &ch) && ch == 0x1E0B);
1244   g_assert_true (g_unichar_compose (0x0064, 0x0323, &ch) && ch == 0x1E0D);
1245 
1246   /* Hangul */
1247   g_assert_true (g_unichar_compose (0xD4CC, 0x11B6, &ch) && ch == 0xD4DB);
1248   g_assert_true (g_unichar_compose (0x1111, 0x1171, &ch) && ch == 0xD4CC);
1249   g_assert_true (g_unichar_compose (0xCE20, 0x11B8, &ch) && ch == 0xCE31);
1250   g_assert_true (g_unichar_compose (0x110E, 0x1173, &ch) && ch == 0xCE20);
1251 }
1252 
1253 /* Test that g_unichar_decompose() returns the correct value for various
1254  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1255 static void
test_decompose(void)1256 test_decompose (void)
1257 {
1258   gunichar a, b;
1259 
1260   /* Not decomposable */
1261   g_assert_false (g_unichar_decompose (0x0041, &a, &b) && a == 0x0041 && b == 0);
1262   g_assert_false (g_unichar_decompose (0xFB01, &a, &b) && a == 0xFB01 && b == 0);
1263 
1264   /* Singletons */
1265   g_assert_true (g_unichar_decompose (0x212B, &a, &b) && a == 0x00C5 && b == 0);
1266   g_assert_true (g_unichar_decompose (0x2126, &a, &b) && a == 0x03A9 && b == 0);
1267 
1268   /* Tricky pairs */
1269   g_assert_true (g_unichar_decompose (0x0344, &a, &b) && a == 0x0308 && b == 0x0301);
1270   g_assert_true (g_unichar_decompose (0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72);
1271 
1272   /* Pairs */
1273   g_assert_true (g_unichar_decompose (0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
1274   g_assert_true (g_unichar_decompose (0x00F4, &a, &b) && a == 0x006F && b == 0x0302);
1275   g_assert_true (g_unichar_decompose (0x1E69, &a, &b) && a == 0x1E63 && b == 0x0307);
1276   g_assert_true (g_unichar_decompose (0x1E63, &a, &b) && a == 0x0073 && b == 0x0323);
1277   g_assert_true (g_unichar_decompose (0x1E0B, &a, &b) && a == 0x0064 && b == 0x0307);
1278   g_assert_true (g_unichar_decompose (0x1E0D, &a, &b) && a == 0x0064 && b == 0x0323);
1279 
1280   /* Hangul */
1281   g_assert_true (g_unichar_decompose (0xD4DB, &a, &b) && a == 0xD4CC && b == 0x11B6);
1282   g_assert_true (g_unichar_decompose (0xD4CC, &a, &b) && a == 0x1111 && b == 0x1171);
1283   g_assert_true (g_unichar_decompose (0xCE31, &a, &b) && a == 0xCE20 && b == 0x11B8);
1284   g_assert_true (g_unichar_decompose (0xCE20, &a, &b) && a == 0x110E && b == 0x1173);
1285 }
1286 
1287 /* Test that g_unichar_fully_decompose() returns the correct value for
1288  * various ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1289 static void
test_fully_decompose_canonical(void)1290 test_fully_decompose_canonical (void)
1291 {
1292   gunichar decomp[5];
1293   gsize len;
1294 
1295 #define TEST_DECOMP(ch, expected_len, a, b, c, d) \
1296   len = g_unichar_fully_decompose (ch, FALSE, decomp, G_N_ELEMENTS (decomp)); \
1297   g_assert_cmpint (expected_len, ==, len); \
1298   if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \
1299   if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \
1300   if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \
1301   if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \
1302 
1303 #define TEST0(ch)		TEST_DECOMP (ch, 1, ch, 0, 0, 0)
1304 #define TEST1(ch, a)		TEST_DECOMP (ch, 1, a, 0, 0, 0)
1305 #define TEST2(ch, a, b)		TEST_DECOMP (ch, 2, a, b, 0, 0)
1306 #define TEST3(ch, a, b, c)	TEST_DECOMP (ch, 3, a, b, c, 0)
1307 #define TEST4(ch, a, b, c, d)	TEST_DECOMP (ch, 4, a, b, c, d)
1308 
1309   /* Not decomposable */
1310   TEST0 (0x0041);
1311   TEST0 (0xFB01);
1312 
1313   /* Singletons */
1314   TEST2 (0x212B, 0x0041, 0x030A);
1315   TEST1 (0x2126, 0x03A9);
1316 
1317   /* Tricky pairs */
1318   TEST2 (0x0344, 0x0308, 0x0301);
1319   TEST2 (0x0F73, 0x0F71, 0x0F72);
1320 
1321   /* General */
1322   TEST2 (0x00C5, 0x0041, 0x030A);
1323   TEST2 (0x00F4, 0x006F, 0x0302);
1324   TEST3 (0x1E69, 0x0073, 0x0323, 0x0307);
1325   TEST2 (0x1E63, 0x0073, 0x0323);
1326   TEST2 (0x1E0B, 0x0064, 0x0307);
1327   TEST2 (0x1E0D, 0x0064, 0x0323);
1328 
1329   /* Hangul */
1330   TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6);
1331   TEST2 (0xD4CC, 0x1111, 0x1171);
1332   TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8);
1333   TEST2 (0xCE20, 0x110E, 0x1173);
1334 
1335 #undef TEST_DECOMP
1336 }
1337 
1338 /* Test that g_unicode_canonical_decomposition() returns the correct
1339  * value for various ASCII and Unicode alphabetic, numeric, and other,
1340  * codepoints. */
1341 static void
test_canonical_decomposition(void)1342 test_canonical_decomposition (void)
1343 {
1344   gunichar *decomp;
1345   gsize len;
1346 
1347 #define TEST_DECOMP(ch, expected_len, a, b, c, d) \
1348   decomp = g_unicode_canonical_decomposition (ch, &len); \
1349   g_assert_cmpint (expected_len, ==, len); \
1350   if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \
1351   if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \
1352   if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \
1353   if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \
1354   g_free (decomp);
1355 
1356 #define TEST0(ch)		TEST_DECOMP (ch, 1, ch, 0, 0, 0)
1357 #define TEST1(ch, a)		TEST_DECOMP (ch, 1, a, 0, 0, 0)
1358 #define TEST2(ch, a, b)		TEST_DECOMP (ch, 2, a, b, 0, 0)
1359 #define TEST3(ch, a, b, c)	TEST_DECOMP (ch, 3, a, b, c, 0)
1360 #define TEST4(ch, a, b, c, d)	TEST_DECOMP (ch, 4, a, b, c, d)
1361 
1362   /* Not decomposable */
1363   TEST0 (0x0041);
1364   TEST0 (0xFB01);
1365 
1366   /* Singletons */
1367   TEST2 (0x212B, 0x0041, 0x030A);
1368   TEST1 (0x2126, 0x03A9);
1369 
1370   /* Tricky pairs */
1371   TEST2 (0x0344, 0x0308, 0x0301);
1372   TEST2 (0x0F73, 0x0F71, 0x0F72);
1373 
1374   /* General */
1375   TEST2 (0x00C5, 0x0041, 0x030A);
1376   TEST2 (0x00F4, 0x006F, 0x0302);
1377   TEST3 (0x1E69, 0x0073, 0x0323, 0x0307);
1378   TEST2 (0x1E63, 0x0073, 0x0323);
1379   TEST2 (0x1E0B, 0x0064, 0x0307);
1380   TEST2 (0x1E0D, 0x0064, 0x0323);
1381 
1382   /* Hangul */
1383   TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6);
1384   TEST2 (0xD4CC, 0x1111, 0x1171);
1385   TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8);
1386   TEST2 (0xCE20, 0x110E, 0x1173);
1387 
1388 #undef TEST_DECOMP
1389 }
1390 
1391 /* Test that g_unichar_decompose() whenever encouttering a char ch
1392  * decomposes into a and b, b itself won't decompose any further. */
1393 static void
test_decompose_tail(void)1394 test_decompose_tail (void)
1395 {
1396   gunichar ch, a, b, c, d;
1397 
1398   /* Test that whenever a char ch decomposes into a and b, b itself
1399    * won't decompose any further. */
1400 
1401   for (ch = 0; ch < 0x110000; ch++)
1402     if (g_unichar_decompose (ch, &a, &b))
1403       g_assert_false (g_unichar_decompose (b, &c, &d));
1404     else
1405       {
1406         g_assert_cmpuint (a, ==, ch);
1407         g_assert_cmpuint (b, ==, 0);
1408       }
1409 }
1410 
1411 /* Test that all canonical decompositions of g_unichar_fully_decompose()
1412  * are at most 4 in length, and compatibility decompositions are
1413  * at most 18 in length. */
1414 static void
test_fully_decompose_len(void)1415 test_fully_decompose_len (void)
1416 {
1417   gunichar ch;
1418 
1419   /* Test that all canonical decompositions are at most 4 in length,
1420    * and compatibility decompositions are at most 18 in length.
1421    */
1422 
1423   for (ch = 0; ch < 0x110000; ch++) {
1424     g_assert_cmpint (g_unichar_fully_decompose (ch, FALSE, NULL, 0), <=, 4);
1425     g_assert_cmpint (g_unichar_fully_decompose (ch, TRUE,  NULL, 0), <=, 18);
1426   }
1427 }
1428 
1429 /* Check various examples from Unicode Annex #15 for NFD and NFC
1430  * normalization.
1431  */
1432 static void
test_normalization(void)1433 test_normalization (void)
1434 {
1435   const struct {
1436     const char *source;
1437     const char *nfd;
1438     const char *nfc;
1439   } tests[] = {
1440     // Singletons
1441     { "\xe2\x84\xab", "A\xcc\x8a", "Å" }, // U+212B ANGSTROM SIGN
1442     { "\xe2\x84\xa6", "Ω", "Ω" }, // U+2126 OHM SIGN
1443     // Canonical Composites
1444     { "Å", "A\xcc\x8a", "Å" }, // U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
1445     { "ô", "o\xcc\x82", "ô" }, // U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
1446     // Multiple Combining Marks
1447     { "\xe1\xb9\xa9", "s\xcc\xa3\xcc\x87", "ṩ" }, // U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
1448     { "\xe1\xb8\x8b\xcc\xa3", "d\xcc\xa3\xcc\x87", "ḍ̇" },
1449     { "q\xcc\x87\xcc\xa3", "q\xcc\xa3\xcc\x87", "q̣̇" },
1450     // Compatibility Composites
1451     { "fi", "fi", "fi" }, // U+FB01 LATIN SMALL LIGATURE FI
1452     { "2\xe2\x81\xb5", "2\xe2\x81\xb5", "2⁵" },
1453     { "\xe1\xba\x9b\xcc\xa3", "\xc5\xbf\xcc\xa3\xcc\x87", "ẛ̣" },
1454 
1455     // Tests for behavior with reordered marks
1456     { "s\xcc\x87\xcc\xa3", "s\xcc\xa3\xcc\x87", "ṩ" },
1457     { "α\xcc\x94\xcd\x82", "α\xcc\x94\xcd\x82", "ἇ" },
1458     { "α\xcd\x82\xcc\x94", "α\xcd\x82\xcc\x94", "ᾶ\xcc\x94" },
1459   };
1460   gsize i;
1461 
1462   for (i = 0; i < G_N_ELEMENTS (tests); i++)
1463     {
1464       char *nfd, *nfc;
1465 
1466       nfd = g_utf8_normalize (tests[i].source, -1, G_NORMALIZE_NFD);
1467       g_assert_cmpstr (nfd, ==, tests[i].nfd);
1468 
1469       nfc = g_utf8_normalize (tests[i].nfd, -1, G_NORMALIZE_NFC);
1470       g_assert_cmpstr (nfc, ==, tests[i].nfc);
1471 
1472       g_free (nfd);
1473       g_free (nfc);
1474     }
1475 }
1476 
1477 static void
test_iso15924(void)1478 test_iso15924 (void)
1479 {
1480   const struct {
1481     GUnicodeScript script;
1482     char four_letter_code[5];
1483   } data[] = {
1484     { G_UNICODE_SCRIPT_COMMON,             "Zyyy" },
1485     { G_UNICODE_SCRIPT_INHERITED,          "Zinh" },
1486     { G_UNICODE_SCRIPT_ARABIC,             "Arab" },
1487     { G_UNICODE_SCRIPT_ARMENIAN,           "Armn" },
1488     { G_UNICODE_SCRIPT_BENGALI,            "Beng" },
1489     { G_UNICODE_SCRIPT_BOPOMOFO,           "Bopo" },
1490     { G_UNICODE_SCRIPT_CHEROKEE,           "Cher" },
1491     { G_UNICODE_SCRIPT_COPTIC,             "Copt" },
1492     { G_UNICODE_SCRIPT_CYRILLIC,           "Cyrl" },
1493     { G_UNICODE_SCRIPT_DESERET,            "Dsrt" },
1494     { G_UNICODE_SCRIPT_DEVANAGARI,         "Deva" },
1495     { G_UNICODE_SCRIPT_ETHIOPIC,           "Ethi" },
1496     { G_UNICODE_SCRIPT_GEORGIAN,           "Geor" },
1497     { G_UNICODE_SCRIPT_GOTHIC,             "Goth" },
1498     { G_UNICODE_SCRIPT_GREEK,              "Grek" },
1499     { G_UNICODE_SCRIPT_GUJARATI,           "Gujr" },
1500     { G_UNICODE_SCRIPT_GURMUKHI,           "Guru" },
1501     { G_UNICODE_SCRIPT_HAN,                "Hani" },
1502     { G_UNICODE_SCRIPT_HANGUL,             "Hang" },
1503     { G_UNICODE_SCRIPT_HEBREW,             "Hebr" },
1504     { G_UNICODE_SCRIPT_HIRAGANA,           "Hira" },
1505     { G_UNICODE_SCRIPT_KANNADA,            "Knda" },
1506     { G_UNICODE_SCRIPT_KATAKANA,           "Kana" },
1507     { G_UNICODE_SCRIPT_KHMER,              "Khmr" },
1508     { G_UNICODE_SCRIPT_LAO,                "Laoo" },
1509     { G_UNICODE_SCRIPT_LATIN,              "Latn" },
1510     { G_UNICODE_SCRIPT_MALAYALAM,          "Mlym" },
1511     { G_UNICODE_SCRIPT_MONGOLIAN,          "Mong" },
1512     { G_UNICODE_SCRIPT_MYANMAR,            "Mymr" },
1513     { G_UNICODE_SCRIPT_OGHAM,              "Ogam" },
1514     { G_UNICODE_SCRIPT_OLD_ITALIC,         "Ital" },
1515     { G_UNICODE_SCRIPT_ORIYA,              "Orya" },
1516     { G_UNICODE_SCRIPT_RUNIC,              "Runr" },
1517     { G_UNICODE_SCRIPT_SINHALA,            "Sinh" },
1518     { G_UNICODE_SCRIPT_SYRIAC,             "Syrc" },
1519     { G_UNICODE_SCRIPT_TAMIL,              "Taml" },
1520     { G_UNICODE_SCRIPT_TELUGU,             "Telu" },
1521     { G_UNICODE_SCRIPT_THAANA,             "Thaa" },
1522     { G_UNICODE_SCRIPT_THAI,               "Thai" },
1523     { G_UNICODE_SCRIPT_TIBETAN,            "Tibt" },
1524     { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, "Cans" },
1525     { G_UNICODE_SCRIPT_YI,                 "Yiii" },
1526     { G_UNICODE_SCRIPT_TAGALOG,            "Tglg" },
1527     { G_UNICODE_SCRIPT_HANUNOO,            "Hano" },
1528     { G_UNICODE_SCRIPT_BUHID,              "Buhd" },
1529     { G_UNICODE_SCRIPT_TAGBANWA,           "Tagb" },
1530 
1531     /* Unicode-4.0 additions */
1532     { G_UNICODE_SCRIPT_BRAILLE,            "Brai" },
1533     { G_UNICODE_SCRIPT_CYPRIOT,            "Cprt" },
1534     { G_UNICODE_SCRIPT_LIMBU,              "Limb" },
1535     { G_UNICODE_SCRIPT_OSMANYA,            "Osma" },
1536     { G_UNICODE_SCRIPT_SHAVIAN,            "Shaw" },
1537     { G_UNICODE_SCRIPT_LINEAR_B,           "Linb" },
1538     { G_UNICODE_SCRIPT_TAI_LE,             "Tale" },
1539     { G_UNICODE_SCRIPT_UGARITIC,           "Ugar" },
1540 
1541     /* Unicode-4.1 additions */
1542     { G_UNICODE_SCRIPT_NEW_TAI_LUE,        "Talu" },
1543     { G_UNICODE_SCRIPT_BUGINESE,           "Bugi" },
1544     { G_UNICODE_SCRIPT_GLAGOLITIC,         "Glag" },
1545     { G_UNICODE_SCRIPT_TIFINAGH,           "Tfng" },
1546     { G_UNICODE_SCRIPT_SYLOTI_NAGRI,       "Sylo" },
1547     { G_UNICODE_SCRIPT_OLD_PERSIAN,        "Xpeo" },
1548     { G_UNICODE_SCRIPT_KHAROSHTHI,         "Khar" },
1549 
1550     /* Unicode-5.0 additions */
1551     { G_UNICODE_SCRIPT_UNKNOWN,            "Zzzz" },
1552     { G_UNICODE_SCRIPT_BALINESE,           "Bali" },
1553     { G_UNICODE_SCRIPT_CUNEIFORM,          "Xsux" },
1554     { G_UNICODE_SCRIPT_PHOENICIAN,         "Phnx" },
1555     { G_UNICODE_SCRIPT_PHAGS_PA,           "Phag" },
1556     { G_UNICODE_SCRIPT_NKO,                "Nkoo" },
1557 
1558     /* Unicode-5.1 additions */
1559     { G_UNICODE_SCRIPT_KAYAH_LI,           "Kali" },
1560     { G_UNICODE_SCRIPT_LEPCHA,             "Lepc" },
1561     { G_UNICODE_SCRIPT_REJANG,             "Rjng" },
1562     { G_UNICODE_SCRIPT_SUNDANESE,          "Sund" },
1563     { G_UNICODE_SCRIPT_SAURASHTRA,         "Saur" },
1564     { G_UNICODE_SCRIPT_CHAM,               "Cham" },
1565     { G_UNICODE_SCRIPT_OL_CHIKI,           "Olck" },
1566     { G_UNICODE_SCRIPT_VAI,                "Vaii" },
1567     { G_UNICODE_SCRIPT_CARIAN,             "Cari" },
1568     { G_UNICODE_SCRIPT_LYCIAN,             "Lyci" },
1569     { G_UNICODE_SCRIPT_LYDIAN,             "Lydi" },
1570 
1571     /* Unicode-5.2 additions */
1572     { G_UNICODE_SCRIPT_AVESTAN,                "Avst" },
1573     { G_UNICODE_SCRIPT_BAMUM,                  "Bamu" },
1574     { G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,   "Egyp" },
1575     { G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,       "Armi" },
1576     { G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,  "Phli" },
1577     { G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, "Prti" },
1578     { G_UNICODE_SCRIPT_JAVANESE,               "Java" },
1579     { G_UNICODE_SCRIPT_KAITHI,                 "Kthi" },
1580     { G_UNICODE_SCRIPT_LISU,                   "Lisu" },
1581     { G_UNICODE_SCRIPT_MEETEI_MAYEK,           "Mtei" },
1582     { G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,      "Sarb" },
1583     { G_UNICODE_SCRIPT_OLD_TURKIC,             "Orkh" },
1584     { G_UNICODE_SCRIPT_SAMARITAN,              "Samr" },
1585     { G_UNICODE_SCRIPT_TAI_THAM,               "Lana" },
1586     { G_UNICODE_SCRIPT_TAI_VIET,               "Tavt" },
1587 
1588     /* Unicode-6.0 additions */
1589     { G_UNICODE_SCRIPT_BATAK,                  "Batk" },
1590     { G_UNICODE_SCRIPT_BRAHMI,                 "Brah" },
1591     { G_UNICODE_SCRIPT_MANDAIC,                "Mand" },
1592 
1593     /* Unicode-6.1 additions */
1594     { G_UNICODE_SCRIPT_CHAKMA,                 "Cakm" },
1595     { G_UNICODE_SCRIPT_MEROITIC_CURSIVE,       "Merc" },
1596     { G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,   "Mero" },
1597     { G_UNICODE_SCRIPT_MIAO,                   "Plrd" },
1598     { G_UNICODE_SCRIPT_SHARADA,                "Shrd" },
1599     { G_UNICODE_SCRIPT_SORA_SOMPENG,           "Sora" },
1600     { G_UNICODE_SCRIPT_TAKRI,                  "Takr" },
1601 
1602     /* Unicode 7.0 additions */
1603     { G_UNICODE_SCRIPT_BASSA_VAH,              "Bass" },
1604     { G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN,     "Aghb" },
1605     { G_UNICODE_SCRIPT_DUPLOYAN,               "Dupl" },
1606     { G_UNICODE_SCRIPT_ELBASAN,                "Elba" },
1607     { G_UNICODE_SCRIPT_GRANTHA,                "Gran" },
1608     { G_UNICODE_SCRIPT_KHOJKI,                 "Khoj" },
1609     { G_UNICODE_SCRIPT_KHUDAWADI,              "Sind" },
1610     { G_UNICODE_SCRIPT_LINEAR_A,               "Lina" },
1611     { G_UNICODE_SCRIPT_MAHAJANI,               "Mahj" },
1612     { G_UNICODE_SCRIPT_MANICHAEAN,             "Mani" },
1613     { G_UNICODE_SCRIPT_MENDE_KIKAKUI,          "Mend" },
1614     { G_UNICODE_SCRIPT_MODI,                   "Modi" },
1615     { G_UNICODE_SCRIPT_MRO,                    "Mroo" },
1616     { G_UNICODE_SCRIPT_NABATAEAN,              "Nbat" },
1617     { G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN,      "Narb" },
1618     { G_UNICODE_SCRIPT_OLD_PERMIC,             "Perm" },
1619     { G_UNICODE_SCRIPT_PAHAWH_HMONG,           "Hmng" },
1620     { G_UNICODE_SCRIPT_PALMYRENE,              "Palm" },
1621     { G_UNICODE_SCRIPT_PAU_CIN_HAU,            "Pauc" },
1622     { G_UNICODE_SCRIPT_PSALTER_PAHLAVI,        "Phlp" },
1623     { G_UNICODE_SCRIPT_SIDDHAM,                "Sidd" },
1624     { G_UNICODE_SCRIPT_TIRHUTA,                "Tirh" },
1625     { G_UNICODE_SCRIPT_WARANG_CITI,            "Wara" },
1626 
1627     /* Unicode 8.0 additions */
1628     { G_UNICODE_SCRIPT_AHOM,                   "Ahom" },
1629     { G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS,  "Hluw" },
1630     { G_UNICODE_SCRIPT_HATRAN,                 "Hatr" },
1631     { G_UNICODE_SCRIPT_MULTANI,                "Mult" },
1632     { G_UNICODE_SCRIPT_OLD_HUNGARIAN,          "Hung" },
1633     { G_UNICODE_SCRIPT_SIGNWRITING,            "Sgnw" },
1634 
1635     /* Unicode 9.0 additions */
1636     { G_UNICODE_SCRIPT_ADLAM,                  "Adlm" },
1637     { G_UNICODE_SCRIPT_BHAIKSUKI,              "Bhks" },
1638     { G_UNICODE_SCRIPT_MARCHEN,                "Marc" },
1639     { G_UNICODE_SCRIPT_NEWA,                   "Newa" },
1640     { G_UNICODE_SCRIPT_OSAGE,                  "Osge" },
1641     { G_UNICODE_SCRIPT_TANGUT,                 "Tang" },
1642 
1643     /* Unicode 10.0 additions */
1644     { G_UNICODE_SCRIPT_MASARAM_GONDI,          "Gonm" },
1645     { G_UNICODE_SCRIPT_NUSHU,                  "Nshu" },
1646     { G_UNICODE_SCRIPT_SOYOMBO,                "Soyo" },
1647     { G_UNICODE_SCRIPT_ZANABAZAR_SQUARE,       "Zanb" },
1648 
1649     /* Unicode 11.0 additions */
1650     { G_UNICODE_SCRIPT_DOGRA,                  "Dogr" },
1651     { G_UNICODE_SCRIPT_GUNJALA_GONDI,          "Gong" },
1652     { G_UNICODE_SCRIPT_HANIFI_ROHINGYA,        "Rohg" },
1653     { G_UNICODE_SCRIPT_MAKASAR,                "Maka" },
1654     { G_UNICODE_SCRIPT_MEDEFAIDRIN,            "Medf" },
1655     { G_UNICODE_SCRIPT_OLD_SOGDIAN,            "Sogo" },
1656     { G_UNICODE_SCRIPT_SOGDIAN,                "Sogd" },
1657 
1658     /* Unicode 12.0 additions */
1659     { G_UNICODE_SCRIPT_ELYMAIC,                "Elym" },
1660     { G_UNICODE_SCRIPT_NANDINAGARI,            "Nand" },
1661     { G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, "Hmnp" },
1662     { G_UNICODE_SCRIPT_WANCHO,                 "Wcho" },
1663 
1664     /* Unicode 13.0 additions */
1665     { G_UNICODE_SCRIPT_CHORASMIAN,             "Chrs" },
1666     { G_UNICODE_SCRIPT_DIVES_AKURU,            "Diak" },
1667     { G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT,    "Kits" },
1668     { G_UNICODE_SCRIPT_YEZIDI,                 "Yezi" },
1669   };
1670   guint i;
1671 
1672   g_assert_cmphex (0, ==,
1673                    g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_INVALID_CODE));
1674   g_assert_cmphex (0x5A7A7A7A, ==, g_unicode_script_to_iso15924 (1000));
1675   g_assert_cmphex (0x41726162, ==,
1676                    g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_ARABIC));
1677 
1678   g_assert_cmphex (G_UNICODE_SCRIPT_INVALID_CODE, ==,
1679                    g_unicode_script_from_iso15924 (0));
1680   g_assert_cmphex (G_UNICODE_SCRIPT_UNKNOWN, ==,
1681                    g_unicode_script_from_iso15924 (0x12345678));
1682 
1683 #define PACK(a,b,c,d) \
1684   ((guint32)((((guint8)(a))<<24)|(((guint8)(b))<<16)|(((guint8)(c))<<8)|((guint8)(d))))
1685 
1686   for (i = 0; i < G_N_ELEMENTS (data); i++)
1687     {
1688       guint32 code = PACK (data[i].four_letter_code[0],
1689                            data[i].four_letter_code[1],
1690                            data[i].four_letter_code[2],
1691                            data[i].four_letter_code[3]);
1692 
1693       g_assert_cmphex (g_unicode_script_to_iso15924 (data[i].script), ==, code);
1694       g_assert_cmpint (g_unicode_script_from_iso15924 (code), ==, data[i].script);
1695     }
1696 
1697 #undef PACK
1698 }
1699 
1700 int
main(int argc,char * argv[])1701 main (int   argc,
1702       char *argv[])
1703 {
1704   g_test_init (&argc, &argv, NULL);
1705 
1706   g_test_add_func ("/unicode/alnum", test_alnum);
1707   g_test_add_func ("/unicode/alpha", test_alpha);
1708   g_test_add_func ("/unicode/break-type", test_unichar_break_type);
1709   g_test_add_func ("/unicode/canonical-decomposition", test_canonical_decomposition);
1710   g_test_add_func ("/unicode/casefold", test_casefold);
1711   g_test_add_func ("/unicode/cases", test_cases);
1712   g_test_add_func ("/unicode/character-type", test_unichar_character_type);
1713   g_test_add_func ("/unicode/cntrl", test_cntrl);
1714   g_test_add_func ("/unicode/combining-class", test_combining_class);
1715   g_test_add_func ("/unicode/compose", test_compose);
1716   g_test_add_func ("/unicode/decompose", test_decompose);
1717   g_test_add_func ("/unicode/decompose-tail", test_decompose_tail);
1718   g_test_add_func ("/unicode/defined", test_defined);
1719   g_test_add_func ("/unicode/digit", test_digit);
1720   g_test_add_func ("/unicode/digit-value", test_digit_value);
1721   g_test_add_func ("/unicode/fully-decompose-canonical", test_fully_decompose_canonical);
1722   g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
1723   g_test_add_func ("/unicode/normalization", test_normalization);
1724   g_test_add_func ("/unicode/graph", test_graph);
1725   g_test_add_func ("/unicode/iso15924", test_iso15924);
1726   g_test_add_func ("/unicode/lower", test_lower);
1727   g_test_add_func ("/unicode/mark", test_mark);
1728   g_test_add_func ("/unicode/mirror", test_mirror);
1729   g_test_add_func ("/unicode/print", test_print);
1730   g_test_add_func ("/unicode/punctuation", test_punctuation);
1731   g_test_add_func ("/unicode/script", test_unichar_script);
1732   g_test_add_func ("/unicode/space", test_space);
1733   g_test_add_func ("/unicode/strdown", test_strdown);
1734   g_test_add_func ("/unicode/strup", test_strup);
1735   g_test_add_func ("/unicode/turkish-strupdown", test_turkish_strupdown);
1736   g_test_add_func ("/unicode/title", test_title);
1737   g_test_add_func ("/unicode/upper", test_upper);
1738   g_test_add_func ("/unicode/validate", test_unichar_validate);
1739   g_test_add_func ("/unicode/wide", test_wide);
1740   g_test_add_func ("/unicode/xdigit", test_xdigit);
1741   g_test_add_func ("/unicode/xdigit-value", test_xdigit_value);
1742   g_test_add_func ("/unicode/zero-width", test_zerowidth);
1743 
1744   return g_test_run();
1745 }
1746