1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1998-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*
9 * File test.c
10 *
11 * Modification History:
12 *
13 *   Date          Name        Description
14 *   05/01/2000    Madhu       Creation
15 *******************************************************************************
16 */
17 
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/utf16.h"
21 #include "unicode/utf_old.h"
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "cintltst.h"
25 #include <stdio.h>
26 
27 // Obsolete macro from obsolete unicode/utf_old.h, for some old test data.
28 #ifndef UTF_ERROR_VALUE
29 #   define UTF_ERROR_VALUE 0xffff
30 #endif
31 
32 #if !U_HIDE_OBSOLETE_UTF_OLD_H
printUChars(const UChar * uchars)33 static void printUChars(const UChar *uchars) {
34     int16_t i=0;
35     for(i=0; i<u_strlen(uchars); i++) {
36         printf("%x ", *(uchars+i));
37     }
38 }
39 #endif
40 
41 static void TestCodeUnitValues(void);
42 static void TestCharLength(void);
43 static void TestGetChar(void);
44 static void TestNextPrevChar(void);
45 static void TestNulTerminated(void);
46 static void TestFwdBack(void);
47 static void TestSetChar(void);
48 static void TestAppendChar(void);
49 static void TestAppend(void);
50 static void TestSurrogate(void);
51 
52 void addUTF16Test(TestNode** root);
53 
54 void
addUTF16Test(TestNode ** root)55 addUTF16Test(TestNode** root)
56 {
57     addTest(root, &TestCodeUnitValues,          "utf16tst/TestCodeUnitValues");
58     addTest(root, &TestCharLength,              "utf16tst/TestCharLength");
59     addTest(root, &TestGetChar,                 "utf16tst/TestGetChar");
60     addTest(root, &TestNextPrevChar,            "utf16tst/TestNextPrevChar");
61     addTest(root, &TestNulTerminated,           "utf16tst/TestNulTerminated");
62     addTest(root, &TestFwdBack,                 "utf16tst/TestFwdBack");
63     addTest(root, &TestSetChar,                 "utf16tst/TestSetChar");
64     addTest(root, &TestAppendChar,              "utf16tst/TestAppendChar");
65     addTest(root, &TestAppend,                  "utf16tst/TestAppend");
66     addTest(root, &TestSurrogate,               "utf16tst/TestSurrogate");
67 }
68 
TestCodeUnitValues()69 static void TestCodeUnitValues()
70 {
71     static uint16_t codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
72 
73     int16_t i;
74     for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
75         UChar c=codeunit[i];
76         log_verbose("Testing code unit value of %x\n", c);
77         if(i<4){
78             if(
79 #if !U_HIDE_OBSOLETE_UTF_OLD_H
80                     !UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || UTF16_IS_TRAIL(c) ||
81 #endif
82                     !U16_IS_SINGLE(c) || U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
83                 log_err("ERROR: %x is a single character\n", c);
84             }
85         }
86         if(i >= 4 && i< 8){
87             if(
88 #if !U_HIDE_OBSOLETE_UTF_OLD_H
89                     !UTF16_IS_LEAD(c) || UTF16_IS_SINGLE(c) || UTF16_IS_TRAIL(c) ||
90 #endif
91                     !U16_IS_LEAD(c) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c)){
92                 log_err("ERROR: %x is a first surrogate\n", c);
93             }
94         }
95         if(i >= 8 && i< 12){
96             if(
97 #if !U_HIDE_OBSOLETE_UTF_OLD_H
98                     !UTF16_IS_TRAIL(c) || UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) ||
99 #endif
100                     !U16_IS_TRAIL(c) || U16_IS_SINGLE(c) || U16_IS_LEAD(c)) {
101                 log_err("ERROR: %x is a second surrogate\n", c);
102             }
103         }
104     }
105 }
106 
TestCharLength()107 static void TestCharLength()
108 {
109     static uint32_t codepoint[]={
110         1, 0x0061,
111         1, 0xe065,
112         1, 0x20ac,
113         2, 0x20402,
114         2, 0x23456,
115         2, 0x24506,
116         2, 0x20402,
117         2, 0x10402,
118         1, 0xd7ff,
119         1, 0xe000
120     };
121 
122     int16_t i;
123 #if !U_HIDE_OBSOLETE_UTF_OLD_H
124     UBool multiple;
125 #endif
126     for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
127         UChar32 c=codepoint[i+1];
128         if(
129 #if !U_HIDE_OBSOLETE_UTF_OLD_H
130                 UTF16_CHAR_LENGTH(c) != (uint16_t)codepoint[i] ||
131 #endif
132                 U16_LENGTH(c) != (uint16_t)codepoint[i]) {
133           log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
134         }else{
135               log_verbose("The no: of code units for %lx is %d\n",c, U16_LENGTH(c));
136         }
137 #if !U_HIDE_OBSOLETE_UTF_OLD_H
138         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
139         if(UTF16_NEED_MULTIPLE_UCHAR(c) != multiple){
140               log_err("ERROR: UTF16_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
141         }
142 #endif
143     }
144 }
145 
TestGetChar()146 static void TestGetChar()
147 {
148     static UChar input[]={
149     /*  code unit,*/
150         0xdc00,
151         0x20ac,
152         0xd841,
153         0x61,
154         0xd841,
155         0xdc02,
156         0xd842,
157         0xdc06,
158         0,
159         0xd842,
160         0xd7ff,
161         0xdc41,
162         0xe000,
163         0xd800
164     };
165     static UChar32 result[]={
166      /*codepoint-unsafe,  codepoint-safe(not strict)  codepoint-safe(strict)*/
167         (UChar32)0xfca10000, 0xdc00,                  UTF_ERROR_VALUE,
168         0x20ac,           0x20ac,                     0x20ac,
169         0x12861,          0xd841,                     UTF_ERROR_VALUE,
170         0x61,             0x61,                       0x61,
171         0x20402,          0x20402,                    0x20402,
172         0x20402,          0x20402,                    0x20402,
173         0x20806,          0x20806,                    0x20806,
174         0x20806,          0x20806,                    0x20806,
175         0x00,             0x00,                       0x00,
176         0x203ff,          0xd842,                     UTF_ERROR_VALUE,
177         0xd7ff,           0xd7ff,                     0xd7ff,
178         0xfc41,           0xdc41,                     UTF_ERROR_VALUE,
179         0xe000,           0xe000,                     0xe000,
180         0x11734,          0xd800,                     UTF_ERROR_VALUE
181     };
182     uint16_t i=0;
183     UChar32 c, expected;
184     uint16_t offset=0;
185     for(offset=0; offset<UPRV_LENGTHOF(input); offset++) {
186         if(0<offset && offset<UPRV_LENGTHOF(input)-1){
187 #if !U_HIDE_OBSOLETE_UTF_OLD_H
188             UTF16_GET_CHAR_UNSAFE(input, offset, c);
189             if(c != result[i]){
190                 log_err("ERROR: UTF16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
191             }
192 #endif
193             U16_GET_UNSAFE(input, offset, c);
194             if(c != result[i]){
195                 log_err("ERROR: U16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
196             }
197         }
198         expected=result[i+1];
199 #if !U_HIDE_OBSOLETE_UTF_OLD_H
200         UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, FALSE);
201         if(c != expected) {
202             log_err("ERROR: UTF16_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
203         }
204 #endif
205         U16_GET(input, 0, offset, UPRV_LENGTHOF(input), c);
206         if(c != expected) {
207             log_err("ERROR: U16_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
208         }
209 
210         U16_GET_OR_FFFD(input, 0, offset, UPRV_LENGTHOF(input), c);
211         if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
212         if(c != expected) {
213             log_err("ERROR: U16_GET_OR_FFFD failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
214         }
215 #if !U_HIDE_OBSOLETE_UTF_OLD_H
216         UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, TRUE);
217         if(c != result[i+2]){
218             log_err("ERROR: UTF16_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
219         }
220 #endif
221         i=(uint16_t)(i+3);
222     }
223 }
224 
TestNextPrevChar()225 static void TestNextPrevChar(){
226 
227     static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
228     static UChar32 result[]={
229     /*next_unsafe    next_safe_ns  next_safe_s       prev_unsafe   prev_safe_ns     prev_safe_s*/
230         0x0061,        0x0061,       0x0061,           0x0000,       0x0000,          0x0000,
231         0x10000,       0x10000,      0x10000,          0x120400,     0xdc00,          UTF_ERROR_VALUE,
232         0xdc00,        0xdc00,       UTF_ERROR_VALUE,  0x20441,      0x20441,         0x20441,
233         0x10ffff,      0x10ffff,     0x10ffff,         0xd841,       0xd841,          UTF_ERROR_VALUE,
234         0xdfff,        0xdfff,       UTF_ERROR_VALUE,  0xd7ff,       0xd7ff,          0xd7ff,
235         0x0062,        0x0062,       0x0062,           0xd841,       0xd841,          UTF_ERROR_VALUE,
236         0x1ffff,       0xd841,       UTF_ERROR_VALUE,  0x0062,       0x0062,          0x0062,
237         0xd7ff,        0xd7ff,       0xd7ff,           0x10ffff,     0x10ffff,        0x10ffff,
238         0x20441,       0x20441,      0x20441,          0xdbff,       0xdbff,          UTF_ERROR_VALUE,
239         0xdc41,        0xdc41,       UTF_ERROR_VALUE,  0x10000,      0x10000,         0x10000,
240         0xdc00,        0xdc00,       UTF_ERROR_VALUE,  0xd800,       0xd800,          UTF_ERROR_VALUE,
241         0x0000,        0x0000,       0x0000,           0x0061,       0x0061,          0x0061
242     };
243     static uint16_t movedOffset[]={
244    /*next_unsafe    next_safe_ns  next_safe_s       prev_unsafe   prev_safe_ns     prev_safe_s*/
245         1,            1,           1,                11,           11,               11,
246         3,            3,           3,                9,            10 ,              10,
247         3,            3,           3,                8,            8,                8,
248         5,            5,           4,                8,            8,                8,
249         5,            5,           5,                7,            7,                7,
250         6,            6,           6,                6,            6,                6,
251         8,            7,           7,                5,            5,                5,
252         8,            8,           8,                3,            3,                3,
253         10,           10,          10,               3,            3,                3,
254         10,           10,          10,               1,            1,                1,
255         11,           11,          11,               1,            1,                1,
256         12,           12,          12,               0,            0,                0,
257     };
258 
259 
260     UChar32 c=0x0000, expected;
261     uint16_t i=0;
262     uint16_t offset=0, setOffset=0;
263     for(offset=0; offset<UPRV_LENGTHOF(input); offset++){
264          setOffset=offset;
265 #if !U_HIDE_OBSOLETE_UTF_OLD_H
266          UTF16_NEXT_CHAR_UNSAFE(input, setOffset, c);
267          if(setOffset != movedOffset[i]){
268              log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
269                  offset, movedOffset[i], setOffset);
270          }
271          if(c != result[i]){
272              log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
273          }
274 #endif
275          setOffset=offset;
276          U16_NEXT_UNSAFE(input, setOffset, c);
277          if(setOffset != movedOffset[i]){
278              log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
279                  offset, movedOffset[i], setOffset);
280          }
281          if(c != result[i]){
282              log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
283          }
284         expected=result[i+1];
285 #if !U_HIDE_OBSOLETE_UTF_OLD_H
286          setOffset=offset;
287          UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, FALSE);
288          if(setOffset != movedOffset[i+1]){
289              log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
290                  offset, movedOffset[i+1], setOffset);
291          }
292         if(c != expected) {
293             log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
294         }
295 #endif
296          setOffset=offset;
297          U16_NEXT(input, setOffset, UPRV_LENGTHOF(input), c);
298          if(setOffset != movedOffset[i+1]){
299              log_err("ERROR: U16_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
300                  offset, movedOffset[i+1], setOffset);
301          }
302         if(c != expected){
303             log_err("ERROR: U16_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
304         }
305 
306         setOffset=offset;
307         U16_NEXT_OR_FFFD(input, setOffset, UPRV_LENGTHOF(input), c);
308         if(setOffset != movedOffset[i+1]){
309             log_err("ERROR: U16_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
310                 offset, movedOffset[i+1], setOffset);
311         }
312         if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
313         if(c != expected){
314             log_err("ERROR: U16_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
315         }
316 #if !U_HIDE_OBSOLETE_UTF_OLD_H
317          setOffset=offset;
318          UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, TRUE);
319          if(setOffset != movedOffset[i+1]){
320              log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
321                  offset, movedOffset[i+2], setOffset);
322          }
323          if(c != result[i+2]){
324              log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
325          }
326 #endif
327          i=(uint16_t)(i+6);
328     }
329     i=0;
330     for(offset=(uint16_t)UPRV_LENGTHOF(input); offset > 0; --offset){
331          setOffset=offset;
332 #if !U_HIDE_OBSOLETE_UTF_OLD_H
333          UTF16_PREV_CHAR_UNSAFE(input, setOffset, c);
334          if(setOffset != movedOffset[i+3]){
335              log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
336                  offset, movedOffset[i+3], setOffset);
337          }
338          if(c != result[i+3]){
339              log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
340          }
341 #endif
342          setOffset=offset;
343          U16_PREV_UNSAFE(input, setOffset, c);
344          if(setOffset != movedOffset[i+3]){
345              log_err("ERROR: U16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
346                  offset, movedOffset[i+3], setOffset);
347          }
348          if(c != result[i+3]){
349              log_err("ERROR: U16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
350          }
351 #if !U_HIDE_OBSOLETE_UTF_OLD_H
352          setOffset=offset;
353          UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
354          if(setOffset != movedOffset[i+4]){
355              log_err("ERROR: UTF16_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
356                  offset, movedOffset[i+4], setOffset);
357          }
358          if(c != result[i+4]){
359              log_err("ERROR: UTF16_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
360          }
361 #endif
362          setOffset=offset;
363          U16_PREV(input, 0, setOffset, c);
364          if(setOffset != movedOffset[i+4]){
365              log_err("ERROR: U16_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
366                  offset, movedOffset[i+4], setOffset);
367          }
368         expected = result[i+4];
369         if(c != expected) {
370             log_err("ERROR: U16_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
371         }
372 
373         setOffset=offset;
374         U16_PREV_OR_FFFD(input, 0, setOffset, c);
375         if(setOffset != movedOffset[i+4]){
376             log_err("ERROR: U16_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
377                 offset, movedOffset[i+4], setOffset);
378         }
379         if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
380         if(c != expected) {
381             log_err("ERROR: U16_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
382         }
383 #if !U_HIDE_OBSOLETE_UTF_OLD_H
384          setOffset=offset;
385          UTF16_PREV_CHAR_SAFE(input, 0,  setOffset, c, TRUE);
386          if(setOffset != movedOffset[i+5]){
387              log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
388                  offset, movedOffset[i+5], setOffset);
389          }
390          if(c != result[i+5]){
391              log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
392          }
393 #endif
394          i=(uint16_t)(i+6);
395     }
396 
397 }
398 
399 /* keep this in sync with utf8tst.c's TestNulTerminated() */
TestNulTerminated()400 static void TestNulTerminated() {
401     static const UChar input[]={
402         /*  0 */  0x61,
403         /*  1 */  0xd801, 0xdc01,
404         /*  3 */  0xdc01,
405         /*  4 */  0x62,
406         /*  5 */  0xd801,
407         /*  6 */  0x00
408         /*  7 */
409     };
410     static const UChar32 result[]={
411         0x61,
412         0x10401,
413         0xdc01,
414         0x62,
415         0xd801,
416         0
417     };
418 
419     UChar32 c, c2, expected;
420     int32_t i0, i=0, j, k, expectedIndex;
421     int32_t cpIndex=0;
422     do {
423         i0=i;
424         U16_NEXT(input, i, -1, c);
425         expected=result[cpIndex];
426         if(c!=expected) {
427             log_err("U16_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected);
428         }
429         j=i0;
430         U16_NEXT_OR_FFFD(input, j, -1, c);
431         if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
432         if(c!=expected) {
433             log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected);
434         }
435         if(j!=i) {
436             log_err("U16_NEXT_OR_FFFD() moved to index %d but U16_NEXT() moved to %d\n", j, i);
437         }
438         j=i0;
439         U16_FWD_1(input, j, -1);
440         if(j!=i) {
441             log_err("U16_FWD_1() moved to index %d but U16_NEXT() moved to %d\n", j, i);
442         }
443         ++cpIndex;
444         /*
445          * Move by this many code points from the start.
446          * U16_FWD_N() stops at the end of the string, that is, at the NUL if necessary.
447          */
448         expectedIndex= (c==0) ? i-1 : i;
449         k=0;
450         U16_FWD_N(input, k, -1, cpIndex);
451         if(k!=expectedIndex) {
452             log_err("U16_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex);
453         }
454     } while(c!=0);
455 
456     i=0;
457     do {
458         j=i0=i;
459         U16_NEXT(input, i, -1, c);
460         do {
461             U16_GET(input, 0, j, -1, c2);
462             if(c2!=c) {
463                 log_err("U16_NEXT(from %d)=U+%04x != U+%04x=U16_GET(at %d)\n", i0, c, c2, j);
464             }
465             U16_GET_OR_FFFD(input, 0, j, -1, c2);
466             expected= U_IS_SURROGATE(c) ? 0xfffd : c;
467             if(c2!=expected) {
468                 log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U16_GET_OR_FFFD(at %d)\n", i0, expected, c2, j);
469             }
470             /* U16_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
471             k=j+1;
472             U16_SET_CP_LIMIT(input, 0, k, -1);
473             if(k!=i) {
474                 log_err("U16_NEXT() moved to %d but U16_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k);
475             }
476         } while(++j<i);
477     } while(c!=0);
478 }
479 
TestFwdBack()480 static void TestFwdBack(){
481     static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
482     static uint16_t fwd_unsafe[] ={1, 3, 5, 6,  8, 10, 11, 12};
483     static uint16_t fwd_safe[]   ={1, 3, 5, 6, 7, 8, 10, 11, 12};
484     static uint16_t back_unsafe[]={11, 9, 8, 7, 6, 5, 3, 1, 0};
485     static uint16_t back_safe[]  ={11, 10, 8, 7, 6, 5, 3, 1, 0};
486 
487     static uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1};
488     static uint16_t fwd_N_unsafe[] ={0, 1, 5, 10, 11};
489     static uint16_t fwd_N_safe[]   ={0, 1, 5, 8, 10, 12, 12}; /*safe macro keeps it at the end of the string */
490     static uint16_t back_N_unsafe[]={12, 11, 8, 5, 3};
491     static uint16_t back_N_safe[]  ={12, 11, 8, 5, 3, 0, 0};
492 
493     uint16_t offunsafe=0, offsafe=0;
494     uint16_t i=0;
495 #if !U_HIDE_OBSOLETE_UTF_OLD_H
496     while(offunsafe < UPRV_LENGTHOF(input)){
497         UTF16_FWD_1_UNSAFE(input, offunsafe);
498         if(offunsafe != fwd_unsafe[i]){
499             log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
500         }
501         i++;
502     }
503 #endif
504     offunsafe=0, offsafe=0;
505     i=0;
506     while(offunsafe < UPRV_LENGTHOF(input)){
507         U16_FWD_1_UNSAFE(input, offunsafe);
508         if(offunsafe != fwd_unsafe[i]){
509             log_err("ERROR: U16_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
510         }
511         i++;
512     }
513 #if !U_HIDE_OBSOLETE_UTF_OLD_H
514     offunsafe=0, offsafe=0;
515     i=0;
516     while(offsafe < UPRV_LENGTHOF(input)){
517         UTF16_FWD_1_SAFE(input, offsafe, UPRV_LENGTHOF(input));
518         if(offsafe != fwd_safe[i]){
519             log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
520         }
521         i++;
522     }
523 #endif
524     offunsafe=0, offsafe=0;
525     i=0;
526     while(offsafe < UPRV_LENGTHOF(input)){
527         U16_FWD_1(input, offsafe, UPRV_LENGTHOF(input));
528         if(offsafe != fwd_safe[i]){
529             log_err("ERROR: U16_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
530         }
531         i++;
532     }
533 #if !U_HIDE_OBSOLETE_UTF_OLD_H
534     offunsafe=UPRV_LENGTHOF(input);
535     offsafe=UPRV_LENGTHOF(input);
536     i=0;
537     while(offunsafe > 0){
538         UTF16_BACK_1_UNSAFE(input, offunsafe);
539         if(offunsafe != back_unsafe[i]){
540             log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
541         }
542         i++;
543     }
544 #endif
545     offunsafe=UPRV_LENGTHOF(input);
546     offsafe=UPRV_LENGTHOF(input);
547     i=0;
548     while(offunsafe > 0){
549         U16_BACK_1_UNSAFE(input, offunsafe);
550         if(offunsafe != back_unsafe[i]){
551             log_err("ERROR: U16_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
552         }
553         i++;
554     }
555 #if !U_HIDE_OBSOLETE_UTF_OLD_H
556     offunsafe=UPRV_LENGTHOF(input);
557     offsafe=UPRV_LENGTHOF(input);
558     i=0;
559     while(offsafe > 0){
560         UTF16_BACK_1_SAFE(input,0,  offsafe);
561         if(offsafe != back_safe[i]){
562             log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
563         }
564         i++;
565     }
566 #endif
567     offunsafe=UPRV_LENGTHOF(input);
568     offsafe=UPRV_LENGTHOF(input);
569     i=0;
570     while(offsafe > 0){
571         U16_BACK_1(input,0,  offsafe);
572         if(offsafe != back_safe[i]){
573             log_err("ERROR: U16_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
574         }
575         i++;
576     }
577 
578     offunsafe=0;
579     offsafe=0;
580 #if !U_HIDE_OBSOLETE_UTF_OLD_H
581     for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){  /*didn't want it to fail(we assume 0<i<length)*/
582         UTF16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
583         if(offunsafe != fwd_N_unsafe[i]){
584             log_err("ERROR: Forward_N_unsafe offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
585         }
586     }
587 #endif
588     offunsafe=0;
589     for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){  /*didn't want it to fail(we assume 0<i<length)*/
590         U16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
591         if(offunsafe != fwd_N_unsafe[i]){
592             log_err("ERROR: U16_FWD_N_UNSAFE offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
593         }
594     }
595 #if !U_HIDE_OBSOLETE_UTF_OLD_H
596     offsafe=0;
597     for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
598         UTF16_FWD_N_SAFE(input, offsafe, UPRV_LENGTHOF(input), Nvalue[i]);
599         if(offsafe != fwd_N_safe[i]){
600             log_err("ERROR: Forward_N_safe offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
601         }
602     }
603 #endif
604     offsafe=0;
605     for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
606         U16_FWD_N(input, offsafe, UPRV_LENGTHOF(input), Nvalue[i]);
607         if(offsafe != fwd_N_safe[i]){
608             log_err("ERROR: U16_FWD_N offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
609         }
610     }
611 #if !U_HIDE_OBSOLETE_UTF_OLD_H
612     offunsafe=UPRV_LENGTHOF(input);
613     for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){
614         UTF16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
615         if(offunsafe != back_N_unsafe[i]){
616             log_err("ERROR: backward_N_unsafe offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
617         }
618     }
619 #endif
620     offunsafe=UPRV_LENGTHOF(input);
621     for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){
622         U16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
623         if(offunsafe != back_N_unsafe[i]){
624             log_err("ERROR: U16_BACK_N_UNSAFE offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
625         }
626     }
627 #if !U_HIDE_OBSOLETE_UTF_OLD_H
628     offsafe=UPRV_LENGTHOF(input);
629     for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
630         UTF16_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
631         if(offsafe != back_N_safe[i]){
632             log_err("ERROR: backward_N_safe offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
633         }
634     }
635 #endif
636     offsafe=UPRV_LENGTHOF(input);
637     for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
638         U16_BACK_N(input, 0, offsafe, Nvalue[i]);
639         if(offsafe != back_N_safe[i]){
640             log_err("ERROR: U16_BACK_N offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
641         }
642     }
643 }
644 
TestSetChar()645 static void TestSetChar(){
646     static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
647     static uint16_t start_unsafe[]={0, 1, 1, 3, 3, 5, 6, 7, 8, 8, 9, 11};
648     static uint16_t start_safe[]  ={0, 1, 1, 3, 3, 5, 6, 7, 8, 8, 10, 11};
649     static uint16_t limit_unsafe[]={0, 1, 3, 3, 5, 5, 6, 8, 8, 10, 10, 11};
650     static uint16_t limit_safe[]  ={0, 1, 3, 3, 5, 5, 6, 7, 8, 10, 10, 11};
651 
652     uint16_t i=0;
653     uint16_t offset=0, setOffset=0;
654     for(offset=0; offset<UPRV_LENGTHOF(input); offset++){
655 #if !U_HIDE_OBSOLETE_UTF_OLD_H
656          setOffset=offset;
657          UTF16_SET_CHAR_START_UNSAFE(input, setOffset);
658          if(setOffset != start_unsafe[i]){
659              log_err("ERROR: UTF16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset);
660          }
661 #endif
662          setOffset=offset;
663          U16_SET_CP_START_UNSAFE(input, setOffset);
664          if(setOffset != start_unsafe[i]){
665              log_err("ERROR: U16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset);
666          }
667 #if !U_HIDE_OBSOLETE_UTF_OLD_H
668          setOffset=offset;
669          UTF16_SET_CHAR_START_SAFE(input, 0, setOffset);
670          if(setOffset != start_safe[i]){
671              log_err("ERROR: UTF16_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset);
672          }
673 #endif
674          setOffset=offset;
675          U16_SET_CP_START(input, 0, setOffset);
676          if(setOffset != start_safe[i]){
677              log_err("ERROR: U16_SET_CHAR_START failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset);
678          }
679 
680          if (offset > 0) {
681 #if !U_HIDE_OBSOLETE_UTF_OLD_H
682              setOffset=offset;
683              UTF16_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
684              if(setOffset != limit_unsafe[i]){
685                  log_err("ERROR: UTF16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset);
686              }
687 #endif
688              setOffset=offset;
689              U16_SET_CP_LIMIT_UNSAFE(input, setOffset);
690              if(setOffset != limit_unsafe[i]){
691                  log_err("ERROR: U16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset);
692              }
693          }
694 
695          setOffset=offset;
696          U16_SET_CP_LIMIT(input,0, setOffset, UPRV_LENGTHOF(input));
697          if(setOffset != limit_safe[i]){
698              log_err("ERROR: U16_SET_CHAR_LIMIT failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_safe[i], setOffset);
699          }
700 
701          i++;
702     }
703 }
704 
TestAppendChar()705 static void TestAppendChar(){
706 #if !U_HIDE_OBSOLETE_UTF_OLD_H
707     static UChar s[5]={0x0061, 0x0062, 0x0063, 0x0064, 0x0000};
708     static uint32_t test[]={
709      /*append-position(unsafe),  CHAR to be appended  */
710         0,                        0x20441,
711         2,                        0x0028,
712         2,                        0xdc00,
713         3,                        0xd800,
714         1,                        0x20402,
715 
716     /*append-position(safe),     CHAR to be appended */
717         0,                        0x20441,
718         2,                        0xdc00,
719         3,                        0xd800,
720         1,                        0x20402,
721         3,                        0x20402,
722         3,                        0x10402,
723         2,                        0x10402,
724 
725     };
726     static uint16_t movedOffset[]={
727         /*offset-moved-to(unsafe)*/
728           2,              /*for append-pos: 0 , CHAR 0x20441*/
729           3,
730           3,
731           4,
732           3,
733           /*offse-moved-to(safe)*/
734           2,              /*for append-pos: 0, CHAR  0x20441*/
735           3,
736           4,
737           3,
738           4,
739           4,
740           4
741     };
742 
743     static UChar result[][5]={
744         /*unsafe*/
745         {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000},
746         {0x0061, 0x0062, 0x0028, 0x0064, 0x0000},
747         {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000},
748         {0x0061, 0x0062, 0x0063, 0xd800, 0x0000},
749         {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000},
750 
751         /*safe*/
752         {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000},
753         {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000},
754         {0x0061, 0x0062, 0x0063, 0xd800, 0x0000},
755         {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000},
756         {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000},
757         {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000},
758         {0x0061, 0x0062, 0xd801, 0xdc02, 0x0000},
759 
760 
761     };
762     uint16_t i, count=0;
763     UChar *str=(UChar*)malloc(sizeof(UChar) * (u_strlen(s)+1));
764     uint16_t offset;
765     for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){
766         if(count<5){
767             u_strcpy(str, s);
768             offset=(uint16_t)test[i];
769             UTF16_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
770             if(offset != movedOffset[count]){
771                 log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
772                     count, movedOffset[count], offset);
773 
774             }
775             if(u_strcmp(str, result[count]) !=0){
776                 log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed for count=%d. Expected:", count);
777                 printUChars(result[count]);
778                 printf("\nGot:");
779                 printUChars(str);
780                 printf("\n");
781             }
782         }else{
783             u_strcpy(str, s);
784             offset=(uint16_t)test[i];
785             UTF16_APPEND_CHAR_SAFE(str, offset, (uint16_t)u_strlen(str), test[i+1]);
786             if(offset != movedOffset[count]){
787                 log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
788                     count, movedOffset[count], offset);
789 
790             }
791             if(u_strcmp(str, result[count]) !=0){
792                 log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed for count=%d. Expected:", count);
793                 printUChars(result[count]);
794                 printf("\nGot:");
795                 printUChars(str);
796                 printf("\n");
797             }
798         }
799         count++;
800     }
801     free(str);
802 #endif
803 }
804 
TestAppend()805 static void TestAppend() {
806     static const UChar32 codePoints[]={
807         0x61, 0xdf, 0x901, 0x3040,
808         0xac00, 0xd800, 0xdbff, 0xdcde,
809         0xdffd, 0xe000, 0xffff, 0x10000,
810         0x12345, 0xe0021, 0x10ffff, 0x110000,
811         0x234567, 0x7fffffff, -1, -1000,
812         0, 0x400
813     };
814     static const UChar expectUnsafe[]={
815         0x61, 0xdf, 0x901, 0x3040,
816         0xac00, 0xd800, 0xdbff, 0xdcde,
817         0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
818         0xd808, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
819         /* none from this line */
820         0, 0x400
821     }, expectSafe[]={
822         0x61, 0xdf, 0x901, 0x3040,
823         0xac00, 0xd800, 0xdbff, 0xdcde,
824         0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
825         0xd808, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
826         /* none from this line */
827         0, 0x400
828     };
829 
830     UChar buffer[100];
831     UChar32 c;
832     int32_t i, length;
833     UBool isError, expectIsError, wrongIsError;
834 
835     length=0;
836     for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
837         c=codePoints[i];
838         if(c<0 || 0x10ffff<c) {
839             continue; /* skip non-code points for U16_APPEND_UNSAFE */
840         }
841 
842         U16_APPEND_UNSAFE(buffer, length, c);
843     }
844     if(length!=UPRV_LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) {
845         log_err("U16_APPEND_UNSAFE did not generate the expected output\n");
846     }
847 
848     length=0;
849     wrongIsError=FALSE;
850     for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
851         c=codePoints[i];
852         expectIsError= c<0 || 0x10ffff<c; /* || U_IS_SURROGATE(c); */ /* surrogates in UTF-32 shouldn't be used, but it's okay to pass them around internally. */
853         isError=FALSE;
854 
855         U16_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError);
856         wrongIsError|= isError!=expectIsError;
857     }
858     if(wrongIsError) {
859         log_err("U16_APPEND did not set isError correctly\n");
860     }
861     if(length!=UPRV_LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) {
862         log_err("U16_APPEND did not generate the expected output\n");
863     }
864 }
865 
TestSurrogate()866 static void TestSurrogate(){
867     static UChar32 s[] = {0x10000, 0x10ffff, 0x50000, 0x100000, 0x1abcd};
868     int i = 0;
869     while (i < 5) {
870         UChar first  = U16_LEAD(s[i]);
871         UChar second = U16_TRAIL(s[i]);
872         /* algorithm from the Unicode consortium */
873         UChar firstresult  = (UChar)(((s[i] - 0x10000) / 0x400) + 0xD800);
874         UChar secondresult = (UChar)(((s[i] - 0x10000) % 0x400) + 0xDC00);
875 
876         if (
877 #if !U_HIDE_OBSOLETE_UTF_OLD_H
878                 first != UTF16_LEAD(s[i]) || first != UTF_FIRST_SURROGATE(s[i]) ||
879 #endif
880                 first != firstresult) {
881             log_err("Failure in first surrogate in 0x%x expected to be 0x%x\n",
882                     s[i], firstresult);
883         }
884         if (
885 #if !U_HIDE_OBSOLETE_UTF_OLD_H
886                 second != UTF16_TRAIL(s[i]) || second != UTF_SECOND_SURROGATE(s[i]) ||
887 #endif
888                 second != secondresult) {
889             log_err("Failure in second surrogate in 0x%x expected to be 0x%x\n",
890                     s[i], secondresult);
891         }
892         i ++;
893     }
894 }
895