1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 * synwee added test for quick check
16 * synwee added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24
25 #if !UCONFIG_NO_NORMALIZATION
26
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33
34 static void
35 TestAPI(void);
36
37 static void
38 TestNormCoverage(void);
39
40 static void
41 TestConcatenate(void);
42
43 static void
44 TestNextPrevious(void);
45
46 static void TestIsNormalized(void);
47
48 static void
49 TestFCNFKCClosure(void);
50
51 static void
52 TestQuickCheckPerCP(void);
53
54 static void
55 TestComposition(void);
56
57 static void
58 TestFCD(void);
59
60 static void
61 TestGetDecomposition(void);
62
63 static void
64 TestGetRawDecomposition(void);
65
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68
69 static const char* const canonTests[][3] = {
70 /* Input*/ /*Decomposed*/ /*Composed*/
71 { "cat", "cat", "cat" },
72 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
73
74 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
75 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
76
77 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
78 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
79 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
80
81 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
82 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
83
84 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
85 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
86 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
87
88 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
89 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
90
91 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
92 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
93
94 { "Henry IV", "Henry IV", "Henry IV" },
95 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
96
97 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
98 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
99 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
100 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
101 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
102 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
103 { "", "", "" }
104 };
105
106 static const char* const compatTests[][3] = {
107 /* Input*/ /*Decomposed */ /*Composed*/
108 { "cat", "cat", "cat" },
109
110 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
111
112 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
113 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
114
115 { "Henry IV", "Henry IV", "Henry IV" },
116 { "Henry \\u2163", "Henry IV", "Henry IV" },
117
118 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
119 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
120
121 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
122
123 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
125 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
126 { "", "", "" }
127 };
128
129 static const char* const fcdTests[][3] = {
130 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */
132 { "\\u010e", "\\u010e", NULL } /* D-caron */
133 };
134
135 void addNormTest(TestNode** root);
136
addNormTest(TestNode ** root)137 void addNormTest(TestNode** root)
138 {
139 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
140 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
141 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
142 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
143 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
144 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
145 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
146 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
147 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
148 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
149 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
150 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
151 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
152 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
153 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
154 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
155 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
156 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
157 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159 }
160
161 static const char* const modeStrings[]={
162 "?",
163 "UNORM_NONE",
164 "UNORM_NFD",
165 "UNORM_NFKD",
166 "UNORM_NFC",
167 "UNORM_NFKC",
168 "UNORM_FCD",
169 "UNORM_MODE_COUNT"
170 };
171
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)172 static void TestNormCases(UNormalizationMode mode,
173 const char* const cases[][3], int32_t lengthOfCases) {
174 int32_t x, neededLen, length2;
175 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
176 UChar *source=NULL;
177 UChar result[16];
178 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
179 for(x=0; x < lengthOfCases; x++)
180 {
181 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
182 source=CharsToUChars(cases[x][0]);
183 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
184 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
185 if(neededLen!=length2) {
186 log_err("ERROR in unorm_normalize(%s)[%d]: "
187 "preflight length/srcLength %d!=%d preflight length/NUL\n",
188 modeStrings[mode], (int)x, (int)neededLen, (int)length2);
189 }
190 if(status==U_BUFFER_OVERFLOW_ERROR)
191 {
192 status=U_ZERO_ERROR;
193 }
194 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
195 if(U_FAILURE(status) || neededLen!=length2) {
196 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
197 modeStrings[mode], austrdup(source), myErrorName(status));
198 } else {
199 assertEqual(result, cases[x][expIndex], x);
200 }
201 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
202 if(U_FAILURE(status) || neededLen!=length2) {
203 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
204 modeStrings[mode], austrdup(source), myErrorName(status));
205 } else {
206 assertEqual(result, cases[x][expIndex], x);
207 }
208 free(source);
209 }
210 }
211
TestDecomp()212 void TestDecomp() {
213 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
214 }
215
TestCompatDecomp()216 void TestCompatDecomp() {
217 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
218 }
219
TestCanonDecompCompose()220 void TestCanonDecompCompose() {
221 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
222 }
223
TestCompatDecompCompose()224 void TestCompatDecompCompose() {
225 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
226 }
227
TestFCD()228 void TestFCD() {
229 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
230 }
231
assertEqual(const UChar * result,const char * expected,int32_t index)232 static void assertEqual(const UChar* result, const char* expected, int32_t index)
233 {
234 UChar *expectedUni = CharsToUChars(expected);
235 if(u_strcmp(result, expectedUni)!=0){
236 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
237 austrdup(result) );
238 }
239 free(expectedUni);
240 }
241
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)242 static void TestNull_check(UChar *src, int32_t srcLen,
243 UChar *exp, int32_t expLen,
244 UNormalizationMode mode,
245 const char *name)
246 {
247 UErrorCode status = U_ZERO_ERROR;
248 int32_t len, i;
249
250 UChar result[50];
251
252
253 status = U_ZERO_ERROR;
254
255 for(i=0;i<50;i++)
256 {
257 result[i] = 0xFFFD;
258 }
259
260 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
261
262 if(U_FAILURE(status)) {
263 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
264 } else if (len != expLen) {
265 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
266 }
267
268 {
269 for(i=0;i<len;i++){
270 if(exp[i] != result[i]) {
271 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
272 name,
273 i,
274 exp[i],
275 result[i]);
276 return;
277 }
278 log_verbose(" %d: \\u%04X\n", i, result[i]);
279 }
280 }
281
282 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
283 }
284
TestNull()285 void TestNull()
286 {
287
288 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
289 int32_t source_comp_len = 4;
290 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
291 int32_t expect_comp_len = 3;
292
293 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
294 int32_t source_dcmp_len = 3;
295 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
296 int32_t expect_dcmp_len = 5;
297
298 TestNull_check(source_comp,
299 source_comp_len,
300 expect_comp,
301 expect_comp_len,
302 UNORM_NFC,
303 "UNORM_NFC");
304
305 TestNull_check(source_dcmp,
306 source_dcmp_len,
307 expect_dcmp,
308 expect_dcmp_len,
309 UNORM_NFD,
310 "UNORM_NFD");
311
312 TestNull_check(source_comp,
313 source_comp_len,
314 expect_comp,
315 expect_comp_len,
316 UNORM_NFKC,
317 "UNORM_NFKC");
318
319
320 }
321
TestQuickCheckResultNO()322 static void TestQuickCheckResultNO()
323 {
324 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
325 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
326 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
327 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
328 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
329 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
331 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
332
333
334 const int SIZE = 10;
335
336 int count = 0;
337 UErrorCode error = U_ZERO_ERROR;
338
339 for (; count < SIZE; count ++)
340 {
341 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
342 UNORM_NO)
343 {
344 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
345 return;
346 }
347 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
348 UNORM_NO)
349 {
350 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
351 return;
352 }
353 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
354 UNORM_NO)
355 {
356 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
357 return;
358 }
359 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
360 UNORM_NO)
361 {
362 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
363 return;
364 }
365 }
366 }
367
368
TestQuickCheckResultYES()369 static void TestQuickCheckResultYES()
370 {
371 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
372 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
373 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
374 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
375 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
376 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
377 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
378 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
379
380 const int SIZE = 10;
381 int count = 0;
382 UErrorCode error = U_ZERO_ERROR;
383
384 UChar cp = 0;
385 while (cp < 0xA0)
386 {
387 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
388 {
389 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
390 return;
391 }
392 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
393 UNORM_YES)
394 {
395 log_err("ERROR in NFC quick check at U+%04x\n", cp);
396 return;
397 }
398 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
399 {
400 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
401 return;
402 }
403 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
404 UNORM_YES)
405 {
406 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
407 return;
408 }
409 cp ++;
410 }
411
412 for (; count < SIZE; count ++)
413 {
414 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
415 UNORM_YES)
416 {
417 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
418 return;
419 }
420 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
421 != UNORM_YES)
422 {
423 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
424 return;
425 }
426 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
427 UNORM_YES)
428 {
429 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
430 return;
431 }
432 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
433 UNORM_YES)
434 {
435 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
436 return;
437 }
438 }
439 }
440
TestQuickCheckResultMAYBE()441 static void TestQuickCheckResultMAYBE()
442 {
443 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
444 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
445 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
446 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
447
448
449 const int SIZE = 10;
450
451 int count = 0;
452 UErrorCode error = U_ZERO_ERROR;
453
454 /* NFD and NFKD does not have any MAYBE codepoints */
455 for (; count < SIZE; count ++)
456 {
457 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
458 UNORM_MAYBE)
459 {
460 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
461 return;
462 }
463 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
464 UNORM_MAYBE)
465 {
466 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
467 return;
468 }
469 }
470 }
471
TestQuickCheckStringResult()472 static void TestQuickCheckStringResult()
473 {
474 int count;
475 UChar *d = NULL;
476 UChar *c = NULL;
477 UErrorCode error = U_ZERO_ERROR;
478
479 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
480 {
481 d = CharsToUChars(canonTests[count][1]);
482 c = CharsToUChars(canonTests[count][2]);
483 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
484 UNORM_YES)
485 {
486 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
487 free(d); free(c);
488 return;
489 }
490
491 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
492 UNORM_NO)
493 {
494 log_err("ERROR in NFC quick check for string at count %d\n", count);
495 free(d); free(c);
496 return;
497 }
498
499 free(d);
500 free(c);
501 }
502
503 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
504 {
505 d = CharsToUChars(compatTests[count][1]);
506 c = CharsToUChars(compatTests[count][2]);
507 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
508 UNORM_YES)
509 {
510 log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
511 free(d); free(c);
512 return;
513 }
514
515 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
516 UNORM_YES)
517 {
518 log_err("ERROR in NFKC quick check for string at count %d\n", count);
519 free(d); free(c);
520 return;
521 }
522
523 free(d);
524 free(c);
525 }
526 }
527
TestQuickCheck()528 void TestQuickCheck()
529 {
530 TestQuickCheckResultNO();
531 TestQuickCheckResultYES();
532 TestQuickCheckResultMAYBE();
533 TestQuickCheckStringResult();
534 }
535
536 /*
537 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
538 * normalized, and some that are not.
539 * Here we pick some specific cases and test the C API.
540 */
TestIsNormalized(void)541 static void TestIsNormalized(void) {
542 static const UChar notNFC[][8]={ /* strings that are not in NFC */
543 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
544 { 0xfb1d, 0 }, /* excluded from composition */
545 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
546 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
547 };
548 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
549 { 0x1100, 0x1161, 0 }, /* Jamo compose */
550 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
551 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
552 };
553
554 int32_t i;
555 UErrorCode errorCode;
556
557 /* API test */
558
559 /* normal case with length>=0 (length -1 used for special cases below) */
560 errorCode=U_ZERO_ERROR;
561 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
562 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
563 }
564
565 /* incoming U_FAILURE */
566 errorCode=U_TRUNCATED_CHAR_FOUND;
567 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
568 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
569 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
570 }
571
572 /* NULL source */
573 errorCode=U_ZERO_ERROR;
574 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
575 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
576 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
577 }
578
579 /* bad length */
580 errorCode=U_ZERO_ERROR;
581 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
582 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
583 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
584 }
585
586 /* specific cases */
587 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
588 errorCode=U_ZERO_ERROR;
589 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
590 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
591 }
592 errorCode=U_ZERO_ERROR;
593 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
594 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
595 }
596 }
597 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
598 errorCode=U_ZERO_ERROR;
599 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
600 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
601 }
602 }
603 }
604
TestCheckFCD()605 void TestCheckFCD()
606 {
607 UErrorCode status = U_ZERO_ERROR;
608 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
609 0x0A};
610 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
611 0x02B9, 0x0314, 0x0315, 0x0316};
612 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
613 0x0050, 0x0730, 0x09EE, 0x1E10};
614
615 static const UChar datastr[][5] =
616 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
617 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
618 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
619 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
620 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
621
622 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
623 0x6a,
624 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
625 0xea,
626 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
627 0x0307, 0x0308, 0x0309, 0x030a,
628 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
629 0x0327, 0x0328, 0x0329, 0x032a,
630 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
631 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
632
633 int count = 0;
634
635 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
636 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
637 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
638 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
639 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
640 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
641
642 if (U_FAILURE(status))
643 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
644
645 while (count < 4)
646 {
647 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
648 if (U_FAILURE(status)) {
649 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data set %d - (Are you missing data?)\n", count);
650 break;
651 }
652 else {
653 if (result[count] != fcdresult) {
654 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
655 result[count]);
656 }
657 }
658 count ++;
659 }
660
661 /* random checks of long strings */
662 status = U_ZERO_ERROR;
663 srand((unsigned)time( NULL ));
664
665 for (count = 0; count < 50; count ++)
666 {
667 int size = 0;
668 UNormalizationCheckResult testresult = UNORM_YES;
669 UChar data[20];
670 UChar norm[100];
671 UChar nfd[100];
672 int normsize = 0;
673 int nfdsize = 0;
674
675 while (size != 19) {
676 data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
677 log_verbose("0x%x", data[size]);
678 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
679 norm + normsize, 100 - normsize, &status);
680 if (U_FAILURE(status)) {
681 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data generation - (Are you missing data?)\n");
682 break;
683 }
684 size ++;
685 }
686 log_verbose("\n");
687
688 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
689 nfd, 100, &status);
690 if (U_FAILURE(status)) {
691 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at normalized data generation - (Are you missing data?)\n");
692 }
693
694 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
695 testresult = UNORM_NO;
696 }
697 if (testresult == UNORM_YES) {
698 log_verbose("result UNORM_YES\n");
699 }
700 else {
701 log_verbose("result UNORM_NO\n");
702 }
703
704 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
705 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
706 }
707 }
708 }
709
710 static void
TestAPI()711 TestAPI() {
712 static const UChar in[]={ 0x68, 0xe4 };
713 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
714 UErrorCode errorCode;
715 int32_t length;
716
717 /* try preflighting */
718 errorCode=U_ZERO_ERROR;
719 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
720 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
721 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
722 return;
723 }
724
725 errorCode=U_ZERO_ERROR;
726 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
727 if(U_FAILURE(errorCode)) {
728 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
729 return;
730 }
731 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
732 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
733 return;
734 }
735 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
736 if(U_FAILURE(errorCode)) {
737 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
738 return;
739 }
740 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
741 if(U_FAILURE(errorCode)) {
742 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
743 return;
744 }
745 }
746
747 /* test cases to improve test code coverage */
748 enum {
749 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
750 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
751 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
752
753 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
754 HANGUL_WEO=0x116f, /* Jamo V U+116f */
755 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
756
757 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
758 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
759
760 MUSICAL_VOID_NOTEHEAD=0x1d157,
761 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
762 MUSICAL_STEM=0x1d165, /* cc=216 */
763 MUSICAL_STACCATO=0x1d17c /* cc=220 */
764 };
765
766 static void
TestNormCoverage()767 TestNormCoverage() {
768 UChar input[1000], expect[1000], output[1000];
769 UErrorCode errorCode;
770 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
771
772 /* create a long and nasty string with NFKC-unsafe characters */
773 inLength=0;
774
775 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
776 input[inLength++]=HANGUL_KIYEOK;
777 input[inLength++]=HANGUL_WEO;
778 input[inLength++]=HANGUL_KIYEOK_SIOS;
779
780 input[inLength++]=HANGUL_KIYEOK;
781 input[inLength++]=HANGUL_WEO;
782 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
783
784 input[inLength++]=HANGUL_KIYEOK;
785 input[inLength++]=HANGUL_K_WEO;
786 input[inLength++]=HANGUL_KIYEOK_SIOS;
787
788 input[inLength++]=HANGUL_KIYEOK;
789 input[inLength++]=HANGUL_K_WEO;
790 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
791
792 input[inLength++]=HANGUL_K_KIYEOK;
793 input[inLength++]=HANGUL_WEO;
794 input[inLength++]=HANGUL_KIYEOK_SIOS;
795
796 input[inLength++]=HANGUL_K_KIYEOK;
797 input[inLength++]=HANGUL_WEO;
798 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
799
800 input[inLength++]=HANGUL_K_KIYEOK;
801 input[inLength++]=HANGUL_K_WEO;
802 input[inLength++]=HANGUL_KIYEOK_SIOS;
803
804 input[inLength++]=HANGUL_K_KIYEOK;
805 input[inLength++]=HANGUL_K_WEO;
806 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
807
808 /* Hangul LV with normal/compatibility Jamo T */
809 input[inLength++]=HANGUL_AC00;
810 input[inLength++]=HANGUL_KIYEOK_SIOS;
811
812 input[inLength++]=HANGUL_AC00;
813 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
814
815 /* compatibility Jamo L, V */
816 input[inLength++]=HANGUL_K_KIYEOK;
817 input[inLength++]=HANGUL_K_WEO;
818
819 hangulPrefixLength=inLength;
820
821 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
822 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
823 for(i=0; i<200; ++i) {
824 input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
825 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
826 input[inLength++]=U16_LEAD(MUSICAL_STEM);
827 input[inLength++]=U16_TRAIL(MUSICAL_STEM);
828 }
829
830 /* (compatibility) Jamo L, T do not compose */
831 input[inLength++]=HANGUL_K_KIYEOK;
832 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
833
834 /* quick checks */
835 errorCode=U_ZERO_ERROR;
836 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
837 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
838 }
839 errorCode=U_ZERO_ERROR;
840 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
841 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
842 }
843 errorCode=U_ZERO_ERROR;
844 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
845 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
846 }
847 errorCode=U_ZERO_ERROR;
848 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
849 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
850 }
851 errorCode=U_ZERO_ERROR;
852 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
853 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
854 }
855
856 /* NFKC */
857 expectLength=0;
858 expect[expectLength++]=HANGUL_SYLLABLE;
859
860 expect[expectLength++]=HANGUL_SYLLABLE;
861
862 expect[expectLength++]=HANGUL_SYLLABLE;
863
864 expect[expectLength++]=HANGUL_SYLLABLE;
865
866 expect[expectLength++]=HANGUL_SYLLABLE;
867
868 expect[expectLength++]=HANGUL_SYLLABLE;
869
870 expect[expectLength++]=HANGUL_SYLLABLE;
871
872 expect[expectLength++]=HANGUL_SYLLABLE;
873
874 expect[expectLength++]=HANGUL_AC00+3;
875
876 expect[expectLength++]=HANGUL_AC00+3;
877
878 expect[expectLength++]=HANGUL_AC00+14*28;
879
880 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
881 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
882 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
883 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
884 for(i=0; i<200; ++i) {
885 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
886 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
887 }
888 for(i=0; i<200; ++i) {
889 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
890 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
891 }
892
893 expect[expectLength++]=HANGUL_KIYEOK;
894 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
895
896 /* try destination overflow first */
897 errorCode=U_ZERO_ERROR;
898 preflightLength=unorm_normalize(input, inLength,
899 UNORM_NFKC, 0,
900 output, 100, /* too short */
901 &errorCode);
902 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
903 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
904 }
905
906 /* real NFKC */
907 errorCode=U_ZERO_ERROR;
908 length=unorm_normalize(input, inLength,
909 UNORM_NFKC, 0,
910 output, UPRV_LENGTHOF(output),
911 &errorCode);
912 if(U_FAILURE(errorCode)) {
913 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
914 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
915 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
916 for(i=0; i<length; ++i) {
917 if(output[i]!=expect[i]) {
918 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
919 break;
920 }
921 }
922 }
923 if(length!=preflightLength) {
924 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
925 }
926
927 /* FCD */
928 u_memcpy(expect, input, hangulPrefixLength);
929 expectLength=hangulPrefixLength;
930
931 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
932 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
933 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
934 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
935 for(i=0; i<200; ++i) {
936 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
937 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
938 }
939 for(i=0; i<200; ++i) {
940 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
941 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
942 }
943
944 expect[expectLength++]=HANGUL_K_KIYEOK;
945 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
946
947 errorCode=U_ZERO_ERROR;
948 length=unorm_normalize(input, inLength,
949 UNORM_FCD, 0,
950 output, UPRV_LENGTHOF(output),
951 &errorCode);
952 if(U_FAILURE(errorCode)) {
953 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
954 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
955 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
956 for(i=0; i<length; ++i) {
957 if(output[i]!=expect[i]) {
958 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
959 break;
960 }
961 }
962 }
963 }
964
965 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
966 static void
TestConcatenate(void)967 TestConcatenate(void) {
968 /* "re + 'sume'" */
969 static const UChar
970 left[]={
971 0x72, 0x65, 0
972 },
973 right[]={
974 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
975 },
976 expect[]={
977 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
978 };
979
980 UChar buffer[100];
981 UErrorCode errorCode;
982 int32_t length;
983
984 /* left with length, right NUL-terminated */
985 errorCode=U_ZERO_ERROR;
986 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
987 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
988 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
989 }
990
991 /* preflighting */
992 errorCode=U_ZERO_ERROR;
993 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
994 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
995 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
996 }
997
998 buffer[2]=0x5555;
999 errorCode=U_ZERO_ERROR;
1000 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
1001 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
1002 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1003 }
1004
1005 /* enter with U_FAILURE */
1006 buffer[2]=0xaaaa;
1007 errorCode=U_UNEXPECTED_TOKEN;
1008 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1009 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1010 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1011 }
1012
1013 /* illegal arguments */
1014 buffer[2]=0xaaaa;
1015 errorCode=U_ZERO_ERROR;
1016 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1017 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1018 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1019 }
1020
1021 errorCode=U_ZERO_ERROR;
1022 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1023 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1024 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1025 }
1026 }
1027
1028 enum {
1029 _PLUS=0x2b
1030 };
1031
1032 static const char *const _modeString[UNORM_MODE_COUNT]={
1033 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1034 };
1035
1036 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1037 _testIter(const UChar *src, int32_t srcLength,
1038 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1039 const UChar *out, int32_t outLength,
1040 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1041 UChar buffer[4];
1042 const UChar *expect, *outLimit, *in;
1043 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1044 UErrorCode errorCode;
1045 UBool neededToNormalize, expectNeeded;
1046
1047 errorCode=U_ZERO_ERROR;
1048 outLimit=out+outLength;
1049 if(forward) {
1050 expect=out;
1051 i=index=0;
1052 } else {
1053 expect=outLimit;
1054 i=srcIndexesLength-2;
1055 index=srcLength;
1056 }
1057
1058 for(;;) {
1059 prevIndex=index;
1060 if(forward) {
1061 if(!iter->hasNext(iter)) {
1062 return;
1063 }
1064 length=unorm_next(iter,
1065 buffer, UPRV_LENGTHOF(buffer),
1066 mode, 0,
1067 (UBool)(out!=NULL), &neededToNormalize,
1068 &errorCode);
1069 expectIndex=srcIndexes[i+1];
1070 in=src+prevIndex;
1071 inLength=expectIndex-prevIndex;
1072
1073 if(out!=NULL) {
1074 /* get output piece from between plus signs */
1075 expectLength=0;
1076 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1077 ++expectLength;
1078 }
1079 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1080 } else {
1081 expect=in;
1082 expectLength=inLength;
1083 expectNeeded=FALSE;
1084 }
1085 } else {
1086 if(!iter->hasPrevious(iter)) {
1087 return;
1088 }
1089 length=unorm_previous(iter,
1090 buffer, UPRV_LENGTHOF(buffer),
1091 mode, 0,
1092 (UBool)(out!=NULL), &neededToNormalize,
1093 &errorCode);
1094 expectIndex=srcIndexes[i];
1095 in=src+expectIndex;
1096 inLength=prevIndex-expectIndex;
1097
1098 if(out!=NULL) {
1099 /* get output piece from between plus signs */
1100 expectLength=0;
1101 while(expect!=out && expect[-1]!=_PLUS) {
1102 ++expectLength;
1103 --expect;
1104 }
1105 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1106 } else {
1107 expect=in;
1108 expectLength=inLength;
1109 expectNeeded=FALSE;
1110 }
1111 }
1112 index=iter->getIndex(iter, UITER_CURRENT);
1113
1114 if(U_FAILURE(errorCode)) {
1115 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1116 forward, _modeString[mode], i, u_errorName(errorCode));
1117 return;
1118 }
1119 if(expectIndex!=index) {
1120 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1121 forward, _modeString[mode], i, index, expectIndex);
1122 return;
1123 }
1124 if(expectLength!=length) {
1125 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1126 forward, _modeString[mode], i, length, expectLength);
1127 return;
1128 }
1129 if(0!=u_memcmp(expect, buffer, length)) {
1130 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1131 forward, _modeString[mode], i);
1132 return;
1133 }
1134 if(neededToNormalize!=expectNeeded) {
1135 }
1136
1137 if(forward) {
1138 expect+=expectLength+1; /* go after the + */
1139 ++i;
1140 } else {
1141 --expect; /* go before the + */
1142 --i;
1143 }
1144 }
1145 }
1146
1147 static void
TestNextPrevious()1148 TestNextPrevious() {
1149 static const UChar
1150 src[]={ /* input string */
1151 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1152 },
1153 nfd[]={ /* + separates expected output pieces */
1154 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1155 },
1156 nfkd[]={
1157 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1158 },
1159 nfc[]={
1160 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1161 },
1162 nfkc[]={
1163 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1164 },
1165 fcd[]={
1166 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1167 };
1168
1169 /* expected iterator indexes in the source string for each iteration piece */
1170 static const int32_t
1171 nfdIndexes[]={
1172 0, 1, 2, 5, 6, 7
1173 },
1174 nfkdIndexes[]={
1175 0, 1, 2, 5, 6, 7
1176 },
1177 nfcIndexes[]={
1178 0, 1, 2, 5, 6, 7
1179 },
1180 nfkcIndexes[]={
1181 0, 1, 2, 5, 7
1182 },
1183 fcdIndexes[]={
1184 0, 1, 2, 5, 6, 7
1185 };
1186
1187 UCharIterator iter;
1188
1189 UChar buffer[4];
1190 int32_t length;
1191
1192 UBool neededToNormalize;
1193 UErrorCode errorCode;
1194
1195 uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1196
1197 /* test iteration with doNormalize */
1198 iter.index=0;
1199 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1200 iter.index=0;
1201 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1202 iter.index=0;
1203 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1204 iter.index=0;
1205 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1206 iter.index=0;
1207 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1208
1209 iter.index=iter.length;
1210 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1211 iter.index=iter.length;
1212 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1213 iter.index=iter.length;
1214 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1215 iter.index=iter.length;
1216 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1217 iter.index=iter.length;
1218 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1219
1220 /* test iteration without doNormalize */
1221 iter.index=0;
1222 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1223 iter.index=0;
1224 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1225 iter.index=0;
1226 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1227 iter.index=0;
1228 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1229 iter.index=0;
1230 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1231
1232 iter.index=iter.length;
1233 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1234 iter.index=iter.length;
1235 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1236 iter.index=iter.length;
1237 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1238 iter.index=iter.length;
1239 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1240 iter.index=iter.length;
1241 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1242
1243 /* try without neededToNormalize */
1244 errorCode=U_ZERO_ERROR;
1245 buffer[0]=5;
1246 iter.index=1;
1247 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1248 UNORM_NFD, 0, TRUE, NULL,
1249 &errorCode);
1250 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1251 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1252 return;
1253 }
1254
1255 /* preflight */
1256 neededToNormalize=9;
1257 iter.index=1;
1258 length=unorm_next(&iter, NULL, 0,
1259 UNORM_NFD, 0, TRUE, &neededToNormalize,
1260 &errorCode);
1261 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1262 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1263 return;
1264 }
1265
1266 errorCode=U_ZERO_ERROR;
1267 buffer[0]=buffer[1]=5;
1268 neededToNormalize=9;
1269 iter.index=1;
1270 length=unorm_next(&iter, buffer, 1,
1271 UNORM_NFD, 0, TRUE, &neededToNormalize,
1272 &errorCode);
1273 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1274 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1275 return;
1276 }
1277
1278 /* no iterator */
1279 errorCode=U_ZERO_ERROR;
1280 buffer[0]=buffer[1]=5;
1281 neededToNormalize=9;
1282 iter.index=1;
1283 length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1284 UNORM_NFD, 0, TRUE, &neededToNormalize,
1285 &errorCode);
1286 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1287 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1288 return;
1289 }
1290
1291 /* illegal mode */
1292 buffer[0]=buffer[1]=5;
1293 neededToNormalize=9;
1294 iter.index=1;
1295 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1296 (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1297 &errorCode);
1298 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1299 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1300 return;
1301 }
1302
1303 /* error coming in */
1304 errorCode=U_MISPLACED_QUANTIFIER;
1305 buffer[0]=5;
1306 iter.index=1;
1307 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1308 UNORM_NFD, 0, TRUE, NULL,
1309 &errorCode);
1310 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1311 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1312 return;
1313 }
1314 }
1315
1316 static void
TestFCNFKCClosure(void)1317 TestFCNFKCClosure(void) {
1318 static const struct {
1319 UChar32 c;
1320 const UChar s[6];
1321 } tests[]={
1322 { 0x00C4, { 0 } },
1323 { 0x00E4, { 0 } },
1324 { 0x037A, { 0x0020, 0x03B9, 0 } },
1325 { 0x03D2, { 0x03C5, 0 } },
1326 { 0x20A8, { 0x0072, 0x0073, 0 } },
1327 { 0x210B, { 0x0068, 0 } },
1328 { 0x210C, { 0x0068, 0 } },
1329 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1330 { 0x2122, { 0x0074, 0x006D, 0 } },
1331 { 0x2128, { 0x007A, 0 } },
1332 { 0x1D5DB, { 0x0068, 0 } },
1333 { 0x1D5ED, { 0x007A, 0 } },
1334 { 0x0061, { 0 } }
1335 };
1336
1337 UChar buffer[8];
1338 UErrorCode errorCode;
1339 int32_t i, length;
1340
1341 for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1342 errorCode=U_ZERO_ERROR;
1343 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1344 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1345 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1346 }
1347 }
1348
1349 /* error handling */
1350 errorCode=U_ZERO_ERROR;
1351 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1352 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1353 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1354 }
1355
1356 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1357 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1358 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1359 }
1360 }
1361
1362 static void
TestQuickCheckPerCP()1363 TestQuickCheckPerCP() {
1364 UErrorCode errorCode;
1365 UChar32 c, lead, trail;
1366 UChar s[U16_MAX_LENGTH], nfd[16];
1367 int32_t length, lccc1, lccc2, tccc1, tccc2;
1368 int32_t qc1, qc2;
1369
1370 if(
1371 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1372 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1373 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1374 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1375 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1376 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1377 ) {
1378 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1379 }
1380
1381 /*
1382 * compare the quick check property values for some code points
1383 * to the quick check results for checking same-code point strings
1384 */
1385 errorCode=U_ZERO_ERROR;
1386 c=0;
1387 while(c<0x110000) {
1388 length=0;
1389 U16_APPEND_UNSAFE(s, length, c);
1390
1391 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1392 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1393 if(qc1!=qc2) {
1394 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1395 }
1396
1397 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1398 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1399 if(qc1!=qc2) {
1400 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1401 }
1402
1403 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1404 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1405 if(qc1!=qc2) {
1406 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1407 }
1408
1409 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1410 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1411 if(qc1!=qc2) {
1412 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1413 }
1414
1415 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1416 if (U_FAILURE(errorCode)) {
1417 log_data_err("%s:%d errorCode=%s\n", __FILE__, __LINE__, u_errorName(errorCode));
1418 break;
1419 }
1420
1421 /* length-length == 0 is used to get around a compiler warning. */
1422 U16_GET(nfd, 0, length-length, length, lead);
1423 U16_GET(nfd, 0, length-1, length, trail);
1424
1425 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1426 lccc2=u_getCombiningClass(lead);
1427 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1428 tccc2=u_getCombiningClass(trail);
1429
1430 if(lccc1!=lccc2) {
1431 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1432 lccc1, lccc2, c);
1433 }
1434 if(tccc1!=tccc2) {
1435 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1436 tccc1, tccc2, c);
1437 }
1438
1439 /* skip some code points */
1440 c=(20*c)/19+1;
1441 }
1442 }
1443
1444 static void
TestComposition(void)1445 TestComposition(void) {
1446 static const struct {
1447 UNormalizationMode mode;
1448 uint32_t options;
1449 UChar input[12];
1450 UChar expect[12];
1451 } cases[]={
1452 /*
1453 * special cases for UAX #15 bug
1454 * see Unicode Corrigendum #5: Normalization Idempotency
1455 * at http://unicode.org/versions/corrigendum5.html
1456 * (was Public Review Issue #29)
1457 */
1458 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1459 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1460 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1461 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1462
1463 /* TODO: add test cases for UNORM_FCC here (j2151) */
1464 };
1465
1466 UChar output[16];
1467 UErrorCode errorCode;
1468 int32_t i, length;
1469
1470 for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1471 errorCode=U_ZERO_ERROR;
1472 length=unorm_normalize(
1473 cases[i].input, -1,
1474 cases[i].mode, cases[i].options,
1475 output, UPRV_LENGTHOF(output),
1476 &errorCode);
1477 if( U_FAILURE(errorCode) ||
1478 length!=u_strlen(cases[i].expect) ||
1479 0!=u_memcmp(output, cases[i].expect, length)
1480 ) {
1481 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1482 }
1483 }
1484 }
1485
1486 static void
TestGetDecomposition()1487 TestGetDecomposition() {
1488 UChar decomp[32];
1489 int32_t length;
1490
1491 UErrorCode errorCode=U_ZERO_ERROR;
1492 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1493 if(U_FAILURE(errorCode)) {
1494 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1495 return;
1496 }
1497
1498 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1499 if(U_FAILURE(errorCode) || length>=0) {
1500 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1501 }
1502 errorCode=U_ZERO_ERROR;
1503 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1504 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1505 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1506 }
1507 errorCode=U_ZERO_ERROR;
1508 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1509 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1510 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1511 }
1512 errorCode=U_ZERO_ERROR;
1513 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1514 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1515 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1516 }
1517 errorCode=U_ZERO_ERROR;
1518 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1519 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1520 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1521 }
1522 errorCode=U_ZERO_ERROR;
1523 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1524 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1525 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1526 }
1527 }
1528
1529 static void
TestGetRawDecomposition()1530 TestGetRawDecomposition() {
1531 UChar decomp[32];
1532 int32_t length;
1533
1534 UErrorCode errorCode=U_ZERO_ERROR;
1535 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1536 if(U_FAILURE(errorCode)) {
1537 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1538 return;
1539 }
1540 /*
1541 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1542 * without recursive decomposition.
1543 */
1544
1545 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1546 if(U_FAILURE(errorCode) || length>=0) {
1547 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1548 }
1549 errorCode=U_ZERO_ERROR;
1550 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1551 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1552 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1553 }
1554 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1555 errorCode=U_ZERO_ERROR;
1556 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1557 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1558 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1559 }
1560 /* U+212B ANGSTROM SIGN */
1561 errorCode=U_ZERO_ERROR;
1562 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1563 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1564 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1565 }
1566 errorCode=U_ZERO_ERROR;
1567 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1568 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1569 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1570 }
1571 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1572 errorCode=U_ZERO_ERROR;
1573 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1574 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1575 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1576 }
1577 errorCode=U_ZERO_ERROR;
1578 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1579 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1580 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1581 }
1582 errorCode=U_ZERO_ERROR;
1583 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1584 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1585 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1586 }
1587 errorCode=U_ZERO_ERROR;
1588 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1589 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1590 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1591 }
1592 }
1593
1594 static void
TestAppendRestoreMiddle()1595 TestAppendRestoreMiddle() {
1596 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1597 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1598 /* NFC: C5 is 'A with ring above' */
1599 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1600 int32_t length;
1601 UErrorCode errorCode=U_ZERO_ERROR;
1602 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1603 if(U_FAILURE(errorCode)) {
1604 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1605 return;
1606 }
1607 /*
1608 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1609 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1610 * still fits into a[] but the full result still overflows this capacity.
1611 * (Let it modify the destination buffer before reallocating internally.)
1612 */
1613 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1614 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1615 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1616 return;
1617 }
1618 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1619 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1620 log_err("unorm2_append(overflow) modified the first string\n");
1621 return;
1622 }
1623 errorCode=U_ZERO_ERROR;
1624 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1625 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1626 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1627 return;
1628 }
1629 }
1630
1631 static void
TestGetEasyToUseInstance()1632 TestGetEasyToUseInstance() {
1633 static const UChar in[]={
1634 0xA0, /* -> <noBreak> 0020 */
1635 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1636 };
1637 UChar out[32];
1638 int32_t length;
1639
1640 UErrorCode errorCode=U_ZERO_ERROR;
1641 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1642 if(U_FAILURE(errorCode)) {
1643 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1644 return;
1645 }
1646 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1647 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1648 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1649 (int)length, u_errorName(errorCode));
1650 }
1651
1652 errorCode=U_ZERO_ERROR;
1653 n2=unorm2_getNFDInstance(&errorCode);
1654 if(U_FAILURE(errorCode)) {
1655 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1656 return;
1657 }
1658 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1659 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1660 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1661 (int)length, u_errorName(errorCode));
1662 }
1663
1664 errorCode=U_ZERO_ERROR;
1665 n2=unorm2_getNFKCInstance(&errorCode);
1666 if(U_FAILURE(errorCode)) {
1667 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1668 return;
1669 }
1670 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1671 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1672 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1673 (int)length, u_errorName(errorCode));
1674 }
1675
1676 errorCode=U_ZERO_ERROR;
1677 n2=unorm2_getNFKDInstance(&errorCode);
1678 if(U_FAILURE(errorCode)) {
1679 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1680 return;
1681 }
1682 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1683 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1684 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1685 (int)length, u_errorName(errorCode));
1686 }
1687
1688 errorCode=U_ZERO_ERROR;
1689 n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1690 if(U_FAILURE(errorCode)) {
1691 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1692 return;
1693 }
1694 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1695 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1696 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1697 (int)length, u_errorName(errorCode));
1698 }
1699 }
1700
1701 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1702