1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2001-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File custrtrn.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *        Ram                      String transformations test
15 *********************************************************************************
16 */
17 /****************************************************************************/
18 
19 
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "unicode/ures.h"
26 #include "ustr_imp.h"
27 #include "cintltst.h"
28 #include "cmemory.h"
29 #include "cstring.h"
30 #include "cwchar.h"
31 
32 void addUCharTransformTest(TestNode** root);
33 
34 static void Test_strToUTF32(void);
35 static void Test_strToUTF32_surrogates(void);
36 static void Test_strFromUTF32(void);
37 static void Test_strFromUTF32_surrogates(void);
38 static void Test_UChar_UTF8_API(void);
39 static void Test_FromUTF8(void);
40 static void Test_FromUTF8Lenient(void);
41 static void Test_UChar_WCHART_API(void);
42 static void Test_widestrs(void);
43 static void Test_WCHART_LongString(void);
44 static void Test_strToJavaModifiedUTF8(void);
45 static void Test_strFromJavaModifiedUTF8(void);
46 static void TestNullEmptySource(void);
47 
48 void
addUCharTransformTest(TestNode ** root)49 addUCharTransformTest(TestNode** root)
50 {
51    addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
52    addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
53    addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
54    addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
55    addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
56    addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
57    addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
58    addTest(root, &Test_UChar_WCHART_API,  "custrtrn/Test_UChar_WCHART_API");
59    addTest(root, &Test_widestrs,  "custrtrn/Test_widestrs");
60 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
61    addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
62 #endif
63    addTest(root, &Test_strToJavaModifiedUTF8,  "custrtrn/Test_strToJavaModifiedUTF8");
64    addTest(root, &Test_strFromJavaModifiedUTF8,  "custrtrn/Test_strFromJavaModifiedUTF8");
65    addTest(root, &TestNullEmptySource,  "custrtrn/TestNullEmptySource");
66 }
67 
68 static const UChar32 src32[]={
69     0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
70     0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
71     0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
72     0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
73     0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
74     0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
75     0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
76     0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
77     0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
78     0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
79     0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
80     0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
81     0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
82     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
83     0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
84     0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
85     0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
86     0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
87     0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
88     /* test non-BMP code points */
89     0x0002A699,
90     0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB,
91     0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7,
92     0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1,
93     0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0,
94     0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5,
95 
96     0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
97     0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
98     0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
99     0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
100     0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
101 };
102 
103 static const UChar src16[] = {
104     0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
105     0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
106     0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
107     0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
108     0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
109     0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
110     0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
111     0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
112     0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
113     0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
114     0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
115     0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
116     0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
117     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
118     0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
119     0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
120     0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
121     0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
122     0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
123 
124     /* test non-BMP code points */
125     0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
126     0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
127     0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
128     0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
129     0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
130     0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
131     0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
132     0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
133     0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
134     0xD869, 0xDED5,
135 
136     0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
137     0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
138     0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
139     0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
140     0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
141 };
142 
143 
Test_strToUTF32(void)144 static void Test_strToUTF32(void){
145     UErrorCode err = U_ZERO_ERROR;
146     UChar32 u32Target[400];
147     int32_t u32DestLen;
148     int i= 0;
149 
150     /* first with length */
151     u32DestLen = -2;
152     u_strToUTF32(u32Target, 0, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
153     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
154         log_err("u_strToUTF32(preflight with length): "
155                 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
156                 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
157         return;
158     }
159     err = U_ZERO_ERROR;
160     u32DestLen = -2;
161     u_strToUTF32(u32Target, UPRV_LENGTHOF(src32)+1, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
162     if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
163         log_err("u_strToUTF32(with length): "
164                 "length %ld != %ld and %s != U_ZERO_ERROR\n",
165                 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
166         return;
167     }
168     /*for(i=0; i< u32DestLen; i++){
169         printf("0x%08X, ",uTarget[i]);
170         if(i%10==0){
171             printf("\n");
172         }
173     }*/
174     for(i=0; i< UPRV_LENGTHOF(src32); i++){
175         if(u32Target[i] != src32[i]){
176             log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i);
177         }
178     }
179     if(u32Target[i] != 0){
180         log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i);
181     }
182 
183     /* now NUL-terminated */
184     u32DestLen = -2;
185     u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err);
186     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
187         log_err("u_strToUTF32(preflight with NUL-termination): "
188                 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
189                 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
190         return;
191     }
192     err = U_ZERO_ERROR;
193     u32DestLen = -2;
194     u_strToUTF32(u32Target, UPRV_LENGTHOF(src32), &u32DestLen, src16, -1,&err);
195     if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
196         log_err("u_strToUTF32(with NUL-termination): "
197                 "length %ld != %ld and %s != U_ZERO_ERROR\n",
198                 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
199         return;
200     }
201 
202     for(i=0; i< UPRV_LENGTHOF(src32); i++){
203         if(u32Target[i] != src32[i]){
204             log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]);
205         }
206     }
207 }
208 
209 /* test unpaired surrogates */
Test_strToUTF32_surrogates()210 static void Test_strToUTF32_surrogates() {
211     UErrorCode err = U_ZERO_ERROR;
212     UChar32 u32Target[400];
213     int32_t len16, u32DestLen;
214     int32_t numSubstitutions;
215     int i;
216 
217     static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
218     static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
219     static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 };
220     static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 };
221     len16 = UPRV_LENGTHOF(surr16);
222     for(i = 0; i < 4; ++i) {
223         err = U_ZERO_ERROR;
224         u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
225         if(err != U_INVALID_CHAR_FOUND) {
226             log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
227                     (long)i, u_errorName(err));
228             return;
229         }
230 
231         err = U_ZERO_ERROR;
232         u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
233         if(err != U_INVALID_CHAR_FOUND) {
234             log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
235                     (long)i, u_errorName(err));
236             return;
237         }
238 
239         err = U_ZERO_ERROR;
240         u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
241         if(err != U_INVALID_CHAR_FOUND) {
242             log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
243                     (long)i, u_errorName(err));
244             return;
245         }
246 
247         err = U_ZERO_ERROR;
248         u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
249         if(err != U_INVALID_CHAR_FOUND) {
250             log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
251                     (long)i, u_errorName(err));
252             return;
253         }
254     }
255 
256     err = U_ZERO_ERROR;
257     u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
258     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
259         log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
260                 u_errorName(err));
261         return;
262     }
263 
264     err = U_ZERO_ERROR;
265     u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
266     if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
267         log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
268                 u_errorName(err));
269         return;
270     }
271 
272     err = U_ZERO_ERROR;
273     u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
274     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
275         log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
276                 u_errorName(err));
277         return;
278     }
279 
280     err = U_ZERO_ERROR;
281     u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
282     if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
283         log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
284                 u_errorName(err));
285         return;
286     }
287 
288     /* with substitution character */
289     numSubstitutions = -1;
290     err = U_ZERO_ERROR;
291     u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
292     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
293         log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
294                 u_errorName(err));
295         return;
296     }
297 
298     err = U_ZERO_ERROR;
299     u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
300     if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) {
301         log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
302                 u_errorName(err));
303         return;
304     }
305 
306     err = U_ZERO_ERROR;
307     u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
308     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
309         log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
310                 u_errorName(err));
311         return;
312     }
313 
314     err = U_ZERO_ERROR;
315     u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
316     if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) {
317         log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
318                 u_errorName(err));
319         return;
320     }
321 }
322 
Test_strFromUTF32(void)323 static void Test_strFromUTF32(void){
324     UErrorCode err = U_ZERO_ERROR;
325     UChar uTarget[400];
326     int32_t uDestLen;
327     int i= 0;
328 
329     /* first with length */
330     uDestLen = -2;
331     u_strFromUTF32(uTarget,0,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
332     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
333         log_err("u_strFromUTF32(preflight with length): "
334                 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
335                 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
336         return;
337     }
338     err = U_ZERO_ERROR;
339     uDestLen = -2;
340     u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16)+1,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
341     if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
342         log_err("u_strFromUTF32(with length): "
343                 "length %ld != %ld and %s != U_ZERO_ERROR\n",
344                 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
345         return;
346     }
347     /*for(i=0; i< uDestLen; i++){
348         printf("0x%04X, ",uTarget[i]);
349         if(i%10==0){
350             printf("\n");
351         }
352     }*/
353 
354     for(i=0; i< uDestLen; i++){
355         if(uTarget[i] != src16[i]){
356             log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i);
357         }
358     }
359     if(uTarget[i] != 0){
360         log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i);
361     }
362 
363     /* now NUL-terminated */
364     uDestLen = -2;
365     u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err);
366     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
367         log_err("u_strFromUTF32(preflight with NUL-termination): "
368                 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
369                 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
370         return;
371     }
372     err = U_ZERO_ERROR;
373     uDestLen = -2;
374     u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16),&uDestLen,src32,-1,&err);
375     if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
376         log_err("u_strFromUTF32(with NUL-termination): "
377                 "length %ld != %ld and %s != U_ZERO_ERROR\n",
378                 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
379         return;
380     }
381 
382     for(i=0; i< uDestLen; i++){
383         if(uTarget[i] != src16[i]){
384             log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]);
385         }
386     }
387 }
388 
389 /* test surrogate code points */
Test_strFromUTF32_surrogates()390 static void Test_strFromUTF32_surrogates() {
391     UErrorCode err = U_ZERO_ERROR;
392     UChar uTarget[400];
393     int32_t len32, uDestLen;
394     int32_t numSubstitutions;
395     int i;
396 
397     static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
398     static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
399     static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
400     static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45,
401                                             0x5a, 0xd900, 0xdc00, 0x7a, 0 };
402     len32 = UPRV_LENGTHOF(surr32);
403     for(i = 0; i < 6; ++i) {
404         err = U_ZERO_ERROR;
405         u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
406         if(err != U_INVALID_CHAR_FOUND) {
407             log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
408                     (long)i, u_errorName(err));
409             return;
410         }
411 
412         err = U_ZERO_ERROR;
413         u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
414         if(err != U_INVALID_CHAR_FOUND) {
415             log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
416                     (long)i, u_errorName(err));
417             return;
418         }
419 
420         err = U_ZERO_ERROR;
421         u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
422         if(err != U_INVALID_CHAR_FOUND) {
423             log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
424                     (long)i, u_errorName(err));
425             return;
426         }
427 
428         err = U_ZERO_ERROR;
429         u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
430         if(err != U_INVALID_CHAR_FOUND) {
431             log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
432                     (long)i, u_errorName(err));
433             return;
434         }
435     }
436 
437     err = U_ZERO_ERROR;
438     u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
439     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
440         log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
441                 u_errorName(err));
442         return;
443     }
444 
445     err = U_ZERO_ERROR;
446     u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
447     if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
448         log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
449                 u_errorName(err));
450         return;
451     }
452 
453     err = U_ZERO_ERROR;
454     u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
455     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
456         log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
457                 u_errorName(err));
458         return;
459     }
460 
461     err = U_ZERO_ERROR;
462     u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
463     if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
464         log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
465                 u_errorName(err));
466         return;
467     }
468 
469     /* with substitution character */
470     numSubstitutions = -1;
471     err = U_ZERO_ERROR;
472     u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
473     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) {
474         log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
475                 u_errorName(err));
476         return;
477     }
478 
479     err = U_ZERO_ERROR;
480     u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
481     if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) {
482         log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
483                 u_errorName(err));
484         return;
485     }
486 
487     err = U_ZERO_ERROR;
488     u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
489     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) {
490         log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
491                 u_errorName(err));
492         return;
493     }
494 
495     err = U_ZERO_ERROR;
496     u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
497     if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) {
498         log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
499                 u_errorName(err));
500         return;
501     }
502 }
503 
Test_UChar_UTF8_API(void)504 static void Test_UChar_UTF8_API(void){
505 
506     UErrorCode err = U_ZERO_ERROR;
507     UChar uTemp[1];
508     char u8Temp[1];
509     UChar* uTarget=uTemp;
510     const char* u8Src;
511     int32_t u8SrcLen = 0;
512     int32_t uTargetLength = 0;
513     int32_t uDestLen=0;
514     const UChar* uSrc = src16;
515     int32_t uSrcLen   = sizeof(src16)/2;
516     char* u8Target = u8Temp;
517     int32_t u8TargetLength =0;
518     int32_t u8DestLen =0;
519     UBool failed = FALSE;
520     int i= 0;
521     int32_t numSubstitutions;
522 
523     {
524         /* preflight */
525         u8Temp[0] = 0x12;
526         u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
527         if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){
528             err = U_ZERO_ERROR;
529             u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
530             u8TargetLength = u8DestLen;
531 
532             u8Target[u8TargetLength] = (char)0xfe;
533             u8DestLen = -1;
534             u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
535             if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){
536                 log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err));
537                 return;
538             }
539 
540         }
541         else {
542             log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
543         }
544         failed = FALSE;
545         /*for(i=0; i< u8DestLen; i++){
546             printf("0x%04X, ",u8Target[i]);
547             if(i%10==0){
548                 printf("\n");
549             }
550         }*/
551         /*for(i=0; i< u8DestLen; i++){
552             if(u8Target[i] != src8[i]){
553                 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
554                 failed =TRUE;
555             }
556         }
557         if(failed){
558             log_err("u_strToUTF8() failed \n");
559         }*/
560         u8Src = u8Target;
561         u8SrcLen = u8DestLen;
562 
563         /* preflight */
564         uTemp[0] = 0x1234;
565         u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
566         if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){
567             err = U_ZERO_ERROR;
568             uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
569             uTargetLength =  uDestLen;
570 
571             uTarget[uTargetLength] = 0xfff0;
572             uDestLen = -1;
573             u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
574         }
575         else {
576             log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n");
577         }
578         /*for(i=0; i< uDestLen; i++){
579             printf("0x%04X, ",uTarget[i]);
580             if(i%10==0){
581                 printf("\n");
582             }
583         }*/
584 
585         if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) {
586             failed = TRUE;
587         }
588         for(i=0; i< uSrcLen; i++){
589             if(uTarget[i] != src16[i]){
590                 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
591                 failed =TRUE;
592             }
593         }
594         if(failed){
595             log_err("error: u_strFromUTF8(after preflighting) failed\n");
596         }
597 
598         free(u8Target);
599         free(uTarget);
600     }
601     {
602         u8SrcLen = -1;
603         uTargetLength = 0;
604         uSrcLen =-1;
605         u8TargetLength=0;
606         failed = FALSE;
607         /* preflight */
608         u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
609         if(err == U_BUFFER_OVERFLOW_ERROR){
610             err = U_ZERO_ERROR;
611             u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
612             u8TargetLength = u8DestLen;
613 
614             u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
615 
616         }
617         else {
618             log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
619         }
620         failed = FALSE;
621         /*for(i=0; i< u8DestLen; i++){
622             printf("0x%04X, ",u8Target[i]);
623             if(i%10==0){
624                 printf("\n");
625             }
626         }*/
627         /*for(i=0; i< u8DestLen; i++){
628             if(u8Target[i] != src8[i]){
629                 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
630                 failed =TRUE;
631             }
632         }
633         if(failed){
634             log_err("u_strToUTF8() failed \n");
635         }*/
636         u8Src = u8Target;
637         u8SrcLen = u8DestLen;
638 
639         /* preflight */
640         u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
641         if(err == U_BUFFER_OVERFLOW_ERROR){
642             err = U_ZERO_ERROR;
643             uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
644             uTargetLength =  uDestLen;
645 
646             u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
647         }
648         else {
649             log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
650         }
651         /*for(i=0; i< uDestLen; i++){
652             printf("0x%04X, ",uTarget[i]);
653             if(i%10==0){
654                 printf("\n");
655             }
656         }*/
657 
658         for(i=0; i< uSrcLen; i++){
659             if(uTarget[i] != src16[i]){
660                 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
661                 failed =TRUE;
662             }
663         }
664         if(failed){
665             log_err("u_strToUTF8() failed \n");
666         }
667 
668         free(u8Target);
669         free(uTarget);
670     }
671 
672     /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */
673     // Since ICU 60, each surrogate byte sequence is treated as 3 single-byte errors.
674     {
675         static const UChar
676             withLead16[]={ 0x1800, 0xd89a, 0x0061 },
677             withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 },
678             withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0xfffd, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */
679             withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0xd900, 0xdc05, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */
680         static const uint8_t
681             withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 },
682             withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 },
683             withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */
684             withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */
685         UChar out16[10];
686         char out8[10];
687 
688         if(
689             (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withLead16, UPRV_LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) ||
690             (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) ||
691             (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withLead8, UPRV_LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) ||
692             (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND)
693         ) {
694             log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n");
695         }
696 
697         /* test error handling with substitution characters */
698 
699         /* from UTF-8 with length */
700         err=U_ZERO_ERROR;
701         numSubstitutions=-1;
702         out16[0]=0x55aa;
703         uDestLen=0;
704         u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
705                              (const char *)withTrail8, (int32_t)uprv_strlen((const char *)withTrail8),
706                              0x50005, &numSubstitutions,
707                              &err);
708         if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) ||
709                              0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) ||
710                              numSubstitutions!=3) {
711             log_err("error: u_strFromUTF8WithSub(length) failed\n");
712         }
713 
714         /* from UTF-8 with NUL termination */
715         err=U_ZERO_ERROR;
716         numSubstitutions=-1;
717         out16[0]=0x55aa;
718         uDestLen=0;
719         u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
720                              (const char *)withTrail8, -1,
721                              0xfffd, &numSubstitutions,
722                              &err);
723         if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) ||
724                              0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) ||
725                              numSubstitutions!=3) {
726             log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n");
727         }
728 
729         /* preflight from UTF-8 with NUL termination */
730         err=U_ZERO_ERROR;
731         numSubstitutions=-1;
732         out16[0]=0x55aa;
733         uDestLen=0;
734         u_strFromUTF8WithSub(out16, 1, &uDestLen,
735                              (const char *)withTrail8, -1,
736                              0x50005, &numSubstitutions,
737                              &err);
738         if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=3) {
739             log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n");
740         }
741 
742         /* to UTF-8 with length */
743         err=U_ZERO_ERROR;
744         numSubstitutions=-1;
745         out8[0]=(char)0xf5;
746         u8DestLen=0;
747         u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
748                            withTrail16, u_strlen(withTrail16),
749                            0xfffd, &numSubstitutions,
750                            &err);
751         if(U_FAILURE(err) || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8SubFFFD) ||
752                              0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) ||
753                              numSubstitutions!=1) {
754             log_err("error: u_strToUTF8WithSub(length) failed\n");
755         }
756 
757         /* to UTF-8 with NUL termination */
758         err=U_ZERO_ERROR;
759         numSubstitutions=-1;
760         out8[0]=(char)0xf5;
761         u8DestLen=0;
762         u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
763                            withTrail16, -1,
764                            0x1a, &numSubstitutions,
765                            &err);
766         if(U_FAILURE(err) || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8Sub1A) ||
767                              0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) ||
768                              numSubstitutions!=1) {
769             log_err("error: u_strToUTF8WithSub(NUL termination) failed\n");
770         }
771 
772         /* preflight to UTF-8 with NUL termination */
773         err=U_ZERO_ERROR;
774         numSubstitutions=-1;
775         out8[0]=(char)0xf5;
776         u8DestLen=0;
777         u_strToUTF8WithSub(out8, 1, &u8DestLen,
778                            withTrail16, -1,
779                            0xfffd, &numSubstitutions,
780                            &err);
781         if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8SubFFFD) ||
782                                            numSubstitutions!=1) {
783             log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n");
784         }
785 
786         /* test that numSubstitutions==0 if there are no substitutions */
787 
788         /* from UTF-8 with length (just first 3 bytes which are valid) */
789         err=U_ZERO_ERROR;
790         numSubstitutions=-1;
791         out16[0]=0x55aa;
792         uDestLen=0;
793         u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
794                              (const char *)withTrail8, 3,
795                              0x50005, &numSubstitutions,
796                              &err);
797         if(U_FAILURE(err) || uDestLen!=1 ||
798                              0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
799                              numSubstitutions!=0) {
800             log_err("error: u_strFromUTF8WithSub(no subs) failed\n");
801         }
802 
803         /* to UTF-8 with length (just first UChar which is valid) */
804         err=U_ZERO_ERROR;
805         numSubstitutions=-1;
806         out8[0]=(char)0xf5;
807         u8DestLen=0;
808         u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
809                            withTrail16, 1,
810                            0xfffd, &numSubstitutions,
811                            &err);
812         if(U_FAILURE(err) || u8DestLen!=3 ||
813                              0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
814                              numSubstitutions!=0) {
815             log_err("error: u_strToUTF8WithSub(no subs) failed\n");
816         }
817 
818         /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */
819 
820         /* from UTF-8 with length (just first 3 bytes which are valid) */
821         err=U_ZERO_ERROR;
822         numSubstitutions=-1;
823         out16[0]=0x55aa;
824         uDestLen=0;
825         u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
826                              (const char *)withTrail8, 3,
827                              U_SENTINEL, &numSubstitutions,
828                              &err);
829         if(U_FAILURE(err) || uDestLen!=1 ||
830                              0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
831                              numSubstitutions!=0) {
832             log_err("error: u_strFromUTF8WithSub(no subchar) failed\n");
833         }
834 
835         /* to UTF-8 with length (just first UChar which is valid) */
836         err=U_ZERO_ERROR;
837         numSubstitutions=-1;
838         out8[0]=(char)0xf5;
839         u8DestLen=0;
840         u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
841                            withTrail16, 1,
842                            U_SENTINEL, &numSubstitutions,
843                            &err);
844         if(U_FAILURE(err) || u8DestLen!=3 ||
845                              0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
846                              numSubstitutions!=0) {
847             log_err("error: u_strToUTF8WithSub(no subchar) failed\n");
848         }
849     }
850     {
851         /*
852          * Test with an illegal lead byte that would be followed by more than 3 trail bytes.
853          * See ticket #10371.
854          */
855         static const char src[1]={ (char)0xf8 };
856         UChar out16[10];
857         err=U_ZERO_ERROR;
858         u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, src, 1, &err);
859         if(err!=U_INVALID_CHAR_FOUND) {
860             log_err("error: u_strFromUTF8(5-byte lead byte) failed\n");
861         }
862     }
863 }
864 
865 /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */
866 static UBool
equalAnyFFFD(const UChar * s,const UChar * t,int32_t length)867 equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) {
868     UChar c1, c2;
869 
870     while(length>0) {
871         c1=*s++;
872         c2=*t++;
873         if(c1!=c2 && c2!=0xfffd) {
874             return FALSE;
875         }
876         --length;
877     }
878     return TRUE;
879 }
880 
881 /* test u_strFromUTF8Lenient() */
882 static void
Test_FromUTF8(void)883 Test_FromUTF8(void) {
884     /*
885      * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)"
886      */
887     static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 };
888     UChar dest[64];
889     UChar *destPointer;
890     int32_t destLength;
891     UErrorCode errorCode;
892 
893     /* 3 bytes input, one UChar output (U+095C) */
894     errorCode=U_ZERO_ERROR;
895     destLength=-99;
896     destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode);
897     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
898         log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n",
899                 (long)destLength, u_errorName(errorCode));
900     }
901 
902     /* 4 bytes input, two UChars output (U+095C U+0000) */
903     errorCode=U_ZERO_ERROR;
904     destLength=-99;
905     destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode);
906     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) {
907         log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n",
908                 (long)destLength, u_errorName(errorCode));
909     }
910 
911     /* NUL-terminated 3 bytes input, one UChar output (U+095C) */
912     errorCode=U_ZERO_ERROR;
913     destLength=-99;
914     destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode);
915     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
916         log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n",
917                 (long)destLength, u_errorName(errorCode));
918     }
919 
920     /* 3 bytes input, one UChar output (U+095C), transform not just preflight */
921     errorCode=U_ZERO_ERROR;
922     dest[0]=dest[1]=99;
923     destLength=-99;
924     destPointer=u_strFromUTF8(dest, UPRV_LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode);
925     if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) {
926         log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n",
927                 (long)destLength, u_errorName(errorCode));
928     }
929 }
930 
931 /* test u_strFromUTF8Lenient() */
932 static void
Test_FromUTF8Lenient(void)933 Test_FromUTF8Lenient(void) {
934     /*
935      * Multiple input strings, each NUL-terminated.
936      * Terminate with a string starting with 0xff.
937      */
938     static const uint8_t bytes[]={
939         /* well-formed UTF-8 */
940         0x61,  0xc3, 0x9f,  0xe0, 0xa0, 0x80,  0xf0, 0xa0, 0x80, 0x80,
941         0x62,  0xc3, 0xa0,  0xe0, 0xa0, 0x81,  0xf0, 0xa0, 0x80, 0x81, 0,
942 
943         /* various malformed sequences */
944         0xc3, 0xc3, 0x9f,  0xc3, 0xa0,  0xe0, 0x80, 0x8a,  0xf0, 0x41, 0x42, 0x43, 0,
945 
946         /* truncated input */
947         0xc3, 0,
948         0xe0, 0,
949         0xe0, 0xa0, 0,
950         0xf0, 0,
951         0xf0, 0x90, 0,
952         0xf0, 0x90, 0x80, 0,
953 
954         /* non-ASCII characters in the last few bytes */
955         0x61,  0xc3, 0x9f,  0xe0, 0xa0, 0x80, 0,
956         0x61,  0xe0, 0xa0, 0x80,  0xc3, 0x9f, 0,
957 
958         /* empty string */
959         0,
960 
961         /* finish */
962         0xff, 0
963     };
964 
965     /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */
966     static const UChar uchars[]={
967         0x61, 0xdf, 0x800,  0xd840, 0xdc00,
968         0x62, 0xe0, 0x801,  0xd840, 0xdc01,  0,
969 
970         0xfffd, 0x9f, 0xe0, 0xa,  0xfffd, 0xfffd,  0,
971 
972         0xfffd, 0,
973         0xfffd, 0,
974         0xfffd, 0,
975         0xfffd, 0,
976         0xfffd, 0,
977         0xfffd, 0,
978 
979         0x61, 0xdf, 0x800,  0,
980         0x61, 0x800, 0xdf,  0,
981 
982         0,
983 
984         0
985     };
986 
987     UChar dest[64];
988     const char *pb;
989     const UChar *pu, *pDest;
990     int32_t srcLength, destLength0, destLength;
991     int number;
992     UErrorCode errorCode;
993 
994     /* verify checking for some illegal arguments */
995     dest[0]=0x1234;
996     destLength=-1;
997     errorCode=U_ZERO_ERROR;
998     pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode);
999     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) {
1000         log_err("u_strFromUTF8Lenient(src=NULL) failed\n");
1001     }
1002 
1003     dest[0]=0x1234;
1004     destLength=-1;
1005     errorCode=U_ZERO_ERROR;
1006     pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode);
1007     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1008         log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n");
1009     }
1010 
1011     dest[0]=0x1234;
1012     destLength=-1;
1013     errorCode=U_MEMORY_ALLOCATION_ERROR;
1014     pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode);
1015     if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) {
1016         log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n");
1017     }
1018 
1019     /* test normal behavior */
1020     number=0; /* string number for log_err() */
1021 
1022     for(pb=(const char *)bytes, pu=uchars;
1023         *pb!=(char)0xff;
1024         pb+=srcLength+1, pu+=destLength0+1, ++number
1025     ) {
1026         srcLength=(int32_t)uprv_strlen(pb);
1027         destLength0=u_strlen(pu);
1028 
1029         /* preflighting with NUL-termination */
1030         dest[0]=0x1234;
1031         destLength=-1;
1032         errorCode=U_ZERO_ERROR;
1033         pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode);
1034         if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1035             pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0
1036         ) {
1037             log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number);
1038         }
1039 
1040         /* preflighting/some capacity with NUL-termination */
1041         if(srcLength>0) {
1042             dest[destLength0-1]=0x1234;
1043             destLength=-1;
1044             errorCode=U_ZERO_ERROR;
1045             pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode);
1046             if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1047                 dest[destLength0-1]!=0x1234 || destLength!=destLength0
1048             ) {
1049                 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number);
1050             }
1051         }
1052 
1053         /* conversion with NUL-termination, much capacity */
1054         dest[0]=dest[destLength0]=0x1234;
1055         destLength=-1;
1056         errorCode=U_ZERO_ERROR;
1057         pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, -1, &errorCode);
1058         if (errorCode!=U_ZERO_ERROR ||
1059             pDest!=dest || dest[destLength0]!=0 ||
1060             destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1061         ) {
1062             log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number);
1063         }
1064 
1065         /* conversion with NUL-termination, exact capacity */
1066         dest[0]=dest[destLength0]=0x1234;
1067         destLength=-1;
1068         errorCode=U_ZERO_ERROR;
1069         pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode);
1070         if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1071             pDest!=dest || dest[destLength0]!=0x1234 ||
1072             destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1073         ) {
1074             log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number);
1075         }
1076 
1077         /* preflighting with length */
1078         dest[0]=0x1234;
1079         destLength=-1;
1080         errorCode=U_ZERO_ERROR;
1081         pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode);
1082         if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1083             pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength
1084         ) {
1085             log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number);
1086         }
1087 
1088         /* preflighting/some capacity with length */
1089         if(srcLength>0) {
1090             dest[srcLength-1]=0x1234;
1091             destLength=-1;
1092             errorCode=U_ZERO_ERROR;
1093             pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode);
1094             if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1095                 dest[srcLength-1]!=0x1234 || destLength!=srcLength
1096             ) {
1097                 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number);
1098             }
1099         }
1100 
1101         /* conversion with length, much capacity */
1102         dest[0]=dest[destLength0]=0x1234;
1103         destLength=-1;
1104         errorCode=U_ZERO_ERROR;
1105         pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, srcLength, &errorCode);
1106         if (errorCode!=U_ZERO_ERROR ||
1107             pDest!=dest || dest[destLength0]!=0 ||
1108             destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1109         ) {
1110             log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number);
1111         }
1112 
1113         /* conversion with length, srcLength capacity */
1114         dest[0]=dest[srcLength]=dest[destLength0]=0x1234;
1115         destLength=-1;
1116         errorCode=U_ZERO_ERROR;
1117         pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode);
1118         if(srcLength==destLength0) {
1119             if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1120                 pDest!=dest || dest[destLength0]!=0x1234 ||
1121                 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1122             ) {
1123                 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number);
1124             }
1125         } else {
1126             if (errorCode!=U_ZERO_ERROR ||
1127                 pDest!=dest || dest[destLength0]!=0 ||
1128                 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1129             ) {
1130                 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number);
1131             }
1132         }
1133     }
1134 }
1135 
1136 static const uint16_t src16j[] = {
1137     0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
1138     0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
1139     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1140     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1141     0x0000,
1142     /* Test only ASCII */
1143 
1144 };
1145 static const uint16_t src16WithNulls[] = {
1146     0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000,
1147     0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000,
1148     0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000,
1149     0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000,
1150     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1151     0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1152     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1153     0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1154     /* test only ASCII */
1155     /*
1156     0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD,
1157     0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1158     0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
1159     0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
1160     0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5,
1161     0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1162     0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
1163     0x0054, 0x0000 */
1164 
1165 };
Test_UChar_WCHART_API(void)1166 static void Test_UChar_WCHART_API(void){
1167 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1168     UErrorCode err = U_ZERO_ERROR;
1169     const UChar* uSrc = src16j;
1170     int32_t uSrcLen = sizeof(src16j)/2;
1171     wchar_t* wDest = NULL;
1172     int32_t wDestLen = 0;
1173     int32_t reqLen= 0 ;
1174     UBool failed = FALSE;
1175     UChar* uDest = NULL;
1176     int32_t uDestLen = 0;
1177     int i =0;
1178     {
1179         /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */
1180         if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1181             log_err("u_strFromWCS() should return NULL with a bad argument\n");
1182         }
1183         if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1184             log_err("u_strToWCS() should return NULL with a bad argument\n");
1185         }
1186 
1187         /* NULL source & destination. */
1188         err = U_ZERO_ERROR;
1189         u_strFromWCS(NULL,0,NULL,NULL,0,&err);
1190         if (err != U_STRING_NOT_TERMINATED_WARNING) {
1191             log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1192         }
1193         err = U_ZERO_ERROR;
1194         u_strToWCS(NULL,0,NULL,NULL,0,&err);
1195         if (err != U_STRING_NOT_TERMINATED_WARNING) {
1196             log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1197         }
1198         err = U_ZERO_ERROR;
1199 
1200         /* pre-flight*/
1201         u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1202 
1203         if(err == U_BUFFER_OVERFLOW_ERROR){
1204             err=U_ZERO_ERROR;
1205             wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1206             wDestLen = reqLen+1;
1207             u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1208         }
1209 
1210         /* pre-flight */
1211         u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1212 
1213 
1214         if(err == U_BUFFER_OVERFLOW_ERROR){
1215             err =U_ZERO_ERROR;
1216             uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1217             uDestLen = reqLen + 1;
1218             u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1219         }else if(U_FAILURE(err)){
1220 
1221             log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err));
1222             return;
1223         }
1224 
1225         for(i=0; i< uSrcLen; i++){
1226             if(uDest[i] != src16j[i]){
1227                 log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1228                 failed =TRUE;
1229             }
1230         }
1231 
1232         if(U_FAILURE(err)){
1233             failed = TRUE;
1234         }
1235         if(failed){
1236             log_err("u_strToWCS() failed \n");
1237         }
1238         free(wDest);
1239         free(uDest);
1240 
1241 
1242         /* test with embedded nulls */
1243         uSrc = src16WithNulls;
1244         uSrcLen = sizeof(src16WithNulls)/2;
1245         wDestLen =0;
1246         uDestLen =0;
1247         wDest = NULL;
1248         uDest = NULL;
1249         /* pre-flight*/
1250         u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1251 
1252         if(err == U_BUFFER_OVERFLOW_ERROR){
1253             err=U_ZERO_ERROR;
1254             wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1255             wDestLen = reqLen+1;
1256             u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1257         }
1258 
1259         /* pre-flight */
1260         u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1261 
1262         if(err == U_BUFFER_OVERFLOW_ERROR){
1263             err =U_ZERO_ERROR;
1264             uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1265             uDestLen = reqLen + 1;
1266             u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1267         }
1268 
1269         if(!U_FAILURE(err)) {
1270          for(i=0; i< uSrcLen; i++){
1271             if(uDest[i] != src16WithNulls[i]){
1272                 log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i);
1273                 failed =TRUE;
1274             }
1275          }
1276         }
1277 
1278         if(U_FAILURE(err)){
1279             failed = TRUE;
1280         }
1281         if(failed){
1282             log_err("u_strToWCS() failed \n");
1283         }
1284         free(wDest);
1285         free(uDest);
1286 
1287     }
1288 
1289     {
1290 
1291         uSrc = src16j;
1292         uSrcLen = sizeof(src16j)/2;
1293         wDestLen =0;
1294         uDestLen =0;
1295         wDest = NULL;
1296         uDest = NULL;
1297         wDestLen = 0;
1298         /* pre-flight*/
1299         u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1300 
1301         if(err == U_BUFFER_OVERFLOW_ERROR){
1302             err=U_ZERO_ERROR;
1303             wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1304             wDestLen = reqLen+1;
1305             u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1306         }
1307         uDestLen = 0;
1308         /* pre-flight */
1309         u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1310 
1311         if(err == U_BUFFER_OVERFLOW_ERROR){
1312             err =U_ZERO_ERROR;
1313             uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1314             uDestLen = reqLen + 1;
1315             u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1316         }
1317 
1318 
1319         if(!U_FAILURE(err)) {
1320          for(i=0; i< uSrcLen; i++){
1321             if(uDest[i] != src16j[i]){
1322                 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1323                 failed =TRUE;
1324             }
1325          }
1326         }
1327 
1328         if(U_FAILURE(err)){
1329             failed = TRUE;
1330         }
1331         if(failed){
1332             log_err("u_strToWCS() failed \n");
1333         }
1334         free(wDest);
1335         free(uDest);
1336     }
1337 
1338     /*
1339      * Test u_terminateWChars().
1340      * All u_terminateXYZ() use the same implementation macro;
1341      * we test this function to improve API coverage.
1342      */
1343     {
1344         wchar_t buffer[10];
1345 
1346         err=U_ZERO_ERROR;
1347         buffer[3]=0x20ac;
1348         wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1349         if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1350             log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n",
1351                     u_errorName(err), wDestLen, buffer[3]);
1352         }
1353 
1354         err=U_ZERO_ERROR;
1355         buffer[3]=0x20ac;
1356         wDestLen=u_terminateWChars(buffer, 3, 3, &err);
1357         if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) {
1358             log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n",
1359                     u_errorName(err), wDestLen, buffer[3]);
1360         }
1361 
1362         err=U_STRING_NOT_TERMINATED_WARNING;
1363         buffer[3]=0x20ac;
1364         wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1365         if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1366             log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n",
1367                     u_errorName(err), wDestLen, buffer[3]);
1368         }
1369 
1370         err=U_ZERO_ERROR;
1371         buffer[3]=0x20ac;
1372         wDestLen=u_terminateWChars(buffer, 2, 3, &err);
1373         if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) {
1374             log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n",
1375                     u_errorName(err), wDestLen, buffer[3]);
1376         }
1377     }
1378 #else
1379     log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1380 #endif
1381 }
1382 
Test_widestrs()1383 static void Test_widestrs()
1384 {
1385 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1386         wchar_t ws[100];
1387         UChar rts[100];
1388         int32_t wcap = UPRV_LENGTHOF(ws);
1389         int32_t wl;
1390         int32_t rtcap = UPRV_LENGTHOF(rts);
1391         int32_t rtl;
1392         wchar_t *wcs;
1393         UChar *cp;
1394         const char *errname;
1395         UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0};
1396         int32_t ul = UPRV_LENGTHOF(ustr) -1;
1397         char astr[100];
1398 
1399         UErrorCode err;
1400 
1401         err = U_ZERO_ERROR;
1402         wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err);
1403         if (U_FAILURE(err)) {
1404                 errname = u_errorName(err);
1405                 log_err("test_widestrs: u_strToWCS error: %s!\n",errname);
1406         }
1407         if(ul!=wl){
1408             log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl);
1409         }
1410         err = U_ZERO_ERROR;
1411         wl = (int32_t)uprv_wcslen(wcs);
1412         cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err);
1413         (void)cp;    /* Suppress set but not used warning. */
1414         if (U_FAILURE(err)) {
1415                 errname = u_errorName(err);
1416                 fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname);
1417         }
1418         if(wl != rtl){
1419             log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl);
1420         }
1421 #else
1422     log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1423 #endif
1424 }
1425 
1426 static void
Test_WCHART_LongString()1427 Test_WCHART_LongString(){
1428 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1429     UErrorCode status = U_ZERO_ERROR;
1430     const char* testdatapath=loadTestData(&status);
1431     UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status);
1432     int32_t strLen =0;
1433     const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status);
1434     const UChar* uSrc = str;
1435     int32_t uSrcLen = strLen;
1436     int32_t wDestLen =0, reqLen=0, i=0;
1437     int32_t uDestLen =0;
1438     wchar_t* wDest = NULL;
1439     UChar* uDest = NULL;
1440     UBool failed = FALSE;
1441 
1442     log_verbose("Loaded string of %d UChars\n", uSrcLen);
1443 
1444     if(U_FAILURE(status)){
1445         log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status));
1446         return;
1447     }
1448 
1449     /* pre-flight*/
1450     u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1451 
1452     if(status == U_BUFFER_OVERFLOW_ERROR){
1453         status=U_ZERO_ERROR;
1454         wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1455         wDestLen = reqLen+1;
1456         u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1457         log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t));
1458     }
1459 
1460     {
1461       int j;
1462       for(j=0;j>=0&&j<reqLen;j++) {
1463         if(wDest[j]!=uSrc[j]) {
1464           log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j);
1465           break;
1466         }
1467       }
1468     }
1469 
1470     uDestLen = 0;
1471     /* pre-flight */
1472     u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1473     if(status == U_BUFFER_OVERFLOW_ERROR){
1474         status =U_ZERO_ERROR;
1475         uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1476         u_memset(uDest,0xFFFF,reqLen+1);
1477         uDestLen = reqLen + 1;
1478         u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1479         log_verbose("Back to %d UChars\n", reqLen);
1480     }
1481 #if defined(U_WCHAR_IS_UTF16)
1482     log_verbose("U_WCHAR_IS_UTF16\n");
1483 #elif defined(U_WCHAR_IS_UTF32)
1484     log_verbose("U_WCHAR_IS_UTF32\n");
1485 #else
1486     log_verbose("U_WCHAR_IS_idunno (not UTF)\n");
1487 #endif
1488 
1489     if(reqLen!=uSrcLen) {
1490         log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen);
1491     }
1492 
1493     for(i=0; i< uSrcLen; i++){
1494         if(uDest[i] != str[i]){
1495             log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i);
1496             failed =TRUE;
1497         }
1498     }
1499 
1500     if(U_FAILURE(status)){
1501         failed = TRUE;
1502     }
1503     if(failed){
1504         log_err("u_strToWCS() failed \n");
1505     }
1506     free(wDest);
1507     free(uDest);
1508     /* close the bundle */
1509     ures_close(theBundle);
1510 #else
1511     log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1512 #endif
1513 }
1514 
Test_strToJavaModifiedUTF8()1515 static void Test_strToJavaModifiedUTF8() {
1516     static const UChar src[]={
1517         0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1518         0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1519         0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1520         0xdbff, 0xdfff,
1521         0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f
1522     };
1523     static const uint8_t expected[]={
1524         0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1525         0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1526         0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1527         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80,
1528         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1529         0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f
1530     };
1531     static const UChar shortSrc[]={
1532         0xe01, 0xe1, 0x61
1533     };
1534     static const uint8_t shortExpected[]={
1535         0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1536     };
1537     static const UChar asciiNul[]={
1538         0x61, 0x62, 0x63, 0
1539     };
1540     static const uint8_t asciiNulExpected[]={
1541         0x61, 0x62, 0x63
1542     };
1543     char dest[200];
1544     char *p;
1545     int32_t length, expectedTerminatedLength;
1546     UErrorCode errorCode;
1547 
1548     expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")-
1549                                        (const char *)expected);
1550 
1551     errorCode=U_ZERO_ERROR;
1552     length=-5;
1553     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1554                               src, UPRV_LENGTHOF(src), &errorCode);
1555     if( U_FAILURE(errorCode) || p!=dest ||
1556         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1557         dest[length]!=0
1558     ) {
1559         log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode));
1560     }
1561     memset(dest, 0xff, sizeof(dest));
1562     errorCode=U_ZERO_ERROR;
1563     length=-5;
1564     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL,
1565                               src, UPRV_LENGTHOF(src), &errorCode);
1566     if( U_FAILURE(errorCode) || p!=dest ||
1567         0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1568         dest[UPRV_LENGTHOF(expected)]!=0
1569     ) {
1570         log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1571     }
1572     memset(dest, 0xff, sizeof(dest));
1573     errorCode=U_ZERO_ERROR;
1574     length=-5;
1575     p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected), &length,
1576                               src, UPRV_LENGTHOF(src), &errorCode);
1577     if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1578         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1579         dest[length]!=(char)0xff
1580     ) {
1581         log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode));
1582     }
1583     memset(dest, 0xff, sizeof(dest));
1584     errorCode=U_ZERO_ERROR;
1585     length=-5;
1586     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode);
1587     if( U_FAILURE(errorCode) || p!=dest ||
1588         length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1589         dest[length]!=0
1590     ) {
1591         log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1592     }
1593     memset(dest, 0xff, sizeof(dest));
1594     errorCode=U_ZERO_ERROR;
1595     length=-5;
1596     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode);
1597     if( U_FAILURE(errorCode) || p!=dest ||
1598         0!=memcmp(dest, expected, expectedTerminatedLength) ||
1599         dest[expectedTerminatedLength]!=0
1600     ) {
1601         log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1602     }
1603     memset(dest, 0xff, sizeof(dest));
1604     errorCode=U_ZERO_ERROR;
1605     length=-5;
1606     p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected)/2, &length,
1607                               src, UPRV_LENGTHOF(src), &errorCode);
1608     if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1609         length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=(char)0xff
1610     ) {
1611         log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode));
1612     }
1613     memset(dest, 0xff, sizeof(dest));
1614     errorCode=U_ZERO_ERROR;
1615     length=-5;
1616     p=u_strToJavaModifiedUTF8(NULL, 0, &length,
1617                               src, UPRV_LENGTHOF(src), &errorCode);
1618     if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1619         length!=UPRV_LENGTHOF(expected) || dest[0]!=(char)0xff
1620     ) {
1621         log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode));
1622     }
1623     memset(dest, 0xff, sizeof(dest));
1624     errorCode=U_ZERO_ERROR;
1625     length=-5;
1626     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1627                               shortSrc, UPRV_LENGTHOF(shortSrc), &errorCode);
1628     if( U_FAILURE(errorCode) || p!=dest ||
1629         length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1630         dest[length]!=0
1631     ) {
1632         log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode));
1633     }
1634     memset(dest, 0xff, sizeof(dest));
1635     errorCode=U_ZERO_ERROR;
1636     length=-5;
1637     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1638                               asciiNul, -1, &errorCode);
1639     if( U_FAILURE(errorCode) || p!=dest ||
1640         length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1641         dest[length]!=0
1642     ) {
1643         log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode));
1644     }
1645     memset(dest, 0xff, sizeof(dest));
1646     errorCode=U_ZERO_ERROR;
1647     length=-5;
1648     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1649                               NULL, 0, &errorCode);
1650     if( U_FAILURE(errorCode) || p!=dest ||
1651         length!=0 || dest[0]!=0
1652     ) {
1653         log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode));
1654     }
1655 
1656     /* illegal arguments */
1657     memset(dest, 0xff, sizeof(dest));
1658     errorCode=U_ZERO_ERROR;
1659     length=-5;
1660     p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length,
1661                               src, UPRV_LENGTHOF(src), &errorCode);
1662     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1663         log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode));
1664     }
1665     memset(dest, 0xff, sizeof(dest));
1666     errorCode=U_ZERO_ERROR;
1667     length=-5;
1668     p=u_strToJavaModifiedUTF8(dest, -1, &length,
1669                               src, UPRV_LENGTHOF(src), &errorCode);
1670     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1671         log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1672     }
1673     memset(dest, 0xff, sizeof(dest));
1674     errorCode=U_ZERO_ERROR;
1675     length=-5;
1676     p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1677                               NULL, UPRV_LENGTHOF(src), &errorCode);
1678     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1679         log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode));
1680     }
1681     memset(dest, 0xff, sizeof(dest));
1682     errorCode=U_ZERO_ERROR;
1683     length=-5;
1684     p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1685                               NULL, -1, &errorCode);
1686     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1687         log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1688     }
1689 }
1690 
Test_strFromJavaModifiedUTF8()1691 static void Test_strFromJavaModifiedUTF8() {
1692     static const uint8_t src[]={
1693         0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1694         0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1695         0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1696         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0,
1697         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1698         0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80,  /* invalid sequences */
1699         0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1700         0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad,  /* non-shortest forms are allowed */
1701         0xe0, 0xb8, 0x8e, 0x6f
1702     };
1703     static const UChar expected[]={
1704         0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1705         0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1706         0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1707         0xdbff, 0xdfff,
1708         0xfffd, 0xfffd, 0xfffd, 0xfffd,
1709         0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1710         0x6c, 0xed,
1711         0xe0e, 0x6f
1712     };
1713     static const uint8_t shortSrc[]={
1714         0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1715     };
1716     static const UChar shortExpected[]={
1717         0xe01, 0xe1, 0x61
1718     };
1719     static const uint8_t asciiNul[]={
1720         0x61, 0x62, 0x63, 0
1721     };
1722     static const UChar asciiNulExpected[]={
1723         0x61, 0x62, 0x63
1724     };
1725     static const uint8_t invalid[]={
1726         0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80
1727     };
1728     static const UChar invalidExpectedFFFD[]={
1729         0xfffd, 0xfffd, 0xfffd, 0xfffd
1730     };
1731     static const UChar invalidExpected50000[]={
1732         0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00
1733     };
1734     UChar dest[200];
1735     UChar *p;
1736     int32_t length, expectedTerminatedLength;
1737     int32_t numSubstitutions;
1738     UErrorCode errorCode;
1739 
1740     expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected);
1741 
1742     errorCode=U_ZERO_ERROR;
1743     length=numSubstitutions=-5;
1744     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1745                                        (const char *)src, UPRV_LENGTHOF(src),
1746                                        0xfffd, &numSubstitutions, &errorCode);
1747     if( U_FAILURE(errorCode) || p!=dest ||
1748         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1749         dest[length]!=0 ||
1750         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1751     ) {
1752         log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode));
1753     }
1754     memset(dest, 0xff, sizeof(dest));
1755     errorCode=U_ZERO_ERROR;
1756     length=numSubstitutions=-5;
1757     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1758                                        (const char *)src, UPRV_LENGTHOF(src),
1759                                        0xfffd, &numSubstitutions, &errorCode);
1760     if( U_FAILURE(errorCode) || p!=dest ||
1761         0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1762         dest[UPRV_LENGTHOF(expected)]!=0 ||
1763         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1764     ) {
1765         log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1766     }
1767     memset(dest, 0xff, sizeof(dest));
1768     errorCode=U_ZERO_ERROR;
1769     length=numSubstitutions=-5;
1770     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1771                                        (const char *)src, UPRV_LENGTHOF(src),
1772                                        0xfffd, NULL, &errorCode);
1773     if( U_FAILURE(errorCode) || p!=dest ||
1774         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1775         dest[length]!=0
1776     ) {
1777         log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1778     }
1779     memset(dest, 0xff, sizeof(dest));
1780     errorCode=U_ZERO_ERROR;
1781     length=numSubstitutions=-5;
1782     p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected), &length,
1783                                        (const char *)src, UPRV_LENGTHOF(src),
1784                                        0xfffd, &numSubstitutions, &errorCode);
1785     if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1786         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1787         dest[length]!=0xffff ||
1788         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1789     ) {
1790         log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode));
1791     }
1792     memset(dest, 0xff, sizeof(dest));
1793     errorCode=U_ZERO_ERROR;
1794     length=numSubstitutions=-5;
1795     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1796                                        (const char *)src, -1,
1797                                        0xfffd, &numSubstitutions, &errorCode);
1798     if( U_FAILURE(errorCode) || p!=dest ||
1799         length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1800         dest[length]!=0 ||
1801         numSubstitutions!=0
1802     ) {
1803         log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1804     }
1805     memset(dest, 0xff, sizeof(dest));
1806     errorCode=U_ZERO_ERROR;
1807     length=numSubstitutions=-5;
1808     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1809                                        (const char *)src, -1,
1810                                        0xfffd, &numSubstitutions, &errorCode);
1811     if( U_FAILURE(errorCode) || p!=dest ||
1812         0!=memcmp(dest, expected, expectedTerminatedLength) ||
1813         dest[expectedTerminatedLength]!=0 ||
1814         numSubstitutions!=0
1815     ) {
1816         log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1817     }
1818     memset(dest, 0xff, sizeof(dest));
1819     errorCode=U_ZERO_ERROR;
1820     length=numSubstitutions=-5;
1821     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1822                                        (const char *)src, -1,
1823                                        0xfffd, NULL, &errorCode);
1824     if( U_FAILURE(errorCode) || p!=dest ||
1825         length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1826         dest[length]!=0
1827     ) {
1828         log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1829     }
1830     memset(dest, 0xff, sizeof(dest));
1831     errorCode=U_ZERO_ERROR;
1832     length=numSubstitutions=-5;
1833     p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected)/2, &length,
1834                                        (const char *)src, UPRV_LENGTHOF(src),
1835                                        0xfffd, &numSubstitutions, &errorCode);
1836     if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1837         length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=0xffff
1838     ) {
1839         log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode));
1840     }
1841     memset(dest, 0xff, sizeof(dest));
1842     errorCode=U_ZERO_ERROR;
1843     length=numSubstitutions=-5;
1844     p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length,
1845                                        (const char *)src, UPRV_LENGTHOF(src),
1846                                        0xfffd, &numSubstitutions, &errorCode);
1847     if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1848         length!=UPRV_LENGTHOF(expected) || dest[0]!=0xffff
1849     ) {
1850         log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode));
1851     }
1852     memset(dest, 0xff, sizeof(dest));
1853     errorCode=U_ZERO_ERROR;
1854     length=numSubstitutions=-5;
1855     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1856                                        (const char *)shortSrc, UPRV_LENGTHOF(shortSrc),
1857                                        0xfffd, &numSubstitutions, &errorCode);
1858     if( U_FAILURE(errorCode) || p!=dest ||
1859         length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1860         dest[length]!=0 ||
1861         numSubstitutions!=0
1862     ) {
1863         log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode));
1864     }
1865     memset(dest, 0xff, sizeof(dest));
1866     errorCode=U_ZERO_ERROR;
1867     length=numSubstitutions=-5;
1868     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1869                                        (const char *)asciiNul, -1,
1870                                        0xfffd, &numSubstitutions, &errorCode);
1871     if( U_FAILURE(errorCode) || p!=dest ||
1872         length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1873         dest[length]!=0 ||
1874         numSubstitutions!=0
1875     ) {
1876         log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode));
1877     }
1878     memset(dest, 0xff, sizeof(dest));
1879     errorCode=U_ZERO_ERROR;
1880     length=numSubstitutions=-5;
1881     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1882                                        NULL, 0, 0xfffd, &numSubstitutions, &errorCode);
1883     if( U_FAILURE(errorCode) || p!=dest ||
1884         length!=0 || dest[0]!=0 ||
1885         numSubstitutions!=0
1886     ) {
1887         log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode));
1888     }
1889     memset(dest, 0xff, sizeof(dest));
1890     errorCode=U_ZERO_ERROR;
1891     length=numSubstitutions=-5;
1892     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1893                                        (const char *)invalid, UPRV_LENGTHOF(invalid),
1894                                        0xfffd, &numSubstitutions, &errorCode);
1895     if( U_FAILURE(errorCode) || p!=dest ||
1896         length!=UPRV_LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) ||
1897         dest[length]!=0 ||
1898         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1899     ) {
1900         log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode));
1901     }
1902     memset(dest, 0xff, sizeof(dest));
1903     errorCode=U_ZERO_ERROR;
1904     length=numSubstitutions=-5;
1905     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1906                                        (const char *)invalid, UPRV_LENGTHOF(invalid),
1907                                        0x50000, &numSubstitutions, &errorCode);
1908     if( U_FAILURE(errorCode) || p!=dest ||
1909         length!=UPRV_LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) ||
1910         dest[length]!=0 ||
1911         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)  /* not ...50000 */
1912     ) {
1913         log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode));
1914     }
1915     memset(dest, 0xff, sizeof(dest));
1916     errorCode=U_ZERO_ERROR;
1917     length=numSubstitutions=-5;
1918     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1919                                        (const char *)invalid, UPRV_LENGTHOF(invalid),
1920                                        U_SENTINEL, &numSubstitutions, &errorCode);
1921     if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) {
1922         log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode));
1923     }
1924     memset(dest, 0xff, sizeof(dest));
1925     errorCode=U_ZERO_ERROR;
1926     length=numSubstitutions=-5;
1927     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1928                                        (const char *)src, UPRV_LENGTHOF(src),
1929                                        U_SENTINEL, &numSubstitutions, &errorCode);
1930     if( errorCode!=U_INVALID_CHAR_FOUND ||
1931         length>=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)-1]!=0xffff ||
1932         numSubstitutions!=0
1933     ) {
1934         log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode));
1935     }
1936 
1937     /* illegal arguments */
1938     memset(dest, 0xff, sizeof(dest));
1939     errorCode=U_ZERO_ERROR;
1940     length=numSubstitutions=-5;
1941     p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length,
1942                                        (const char *)src, UPRV_LENGTHOF(src),
1943                                        0xfffd, &numSubstitutions, &errorCode);
1944     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1945         log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode));
1946     }
1947     memset(dest, 0xff, sizeof(dest));
1948     errorCode=U_ZERO_ERROR;
1949     length=numSubstitutions=-5;
1950     p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length,
1951                                        (const char *)src, UPRV_LENGTHOF(src),
1952                                        0xfffd, &numSubstitutions, &errorCode);
1953     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1954         log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1955     }
1956     memset(dest, 0xff, sizeof(dest));
1957     errorCode=U_ZERO_ERROR;
1958     length=numSubstitutions=-5;
1959     p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1960                                        NULL, UPRV_LENGTHOF(src),
1961                                        0xfffd, &numSubstitutions, &errorCode);
1962     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1963         log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode));
1964     }
1965     memset(dest, 0xff, sizeof(dest));
1966     errorCode=U_ZERO_ERROR;
1967     length=numSubstitutions=-5;
1968     p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1969                                        NULL, -1, 0xfffd, &numSubstitutions, &errorCode);
1970     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1971         log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1972     }
1973     memset(dest, 0xff, sizeof(dest));
1974     errorCode=U_ZERO_ERROR;
1975     length=numSubstitutions=-5;
1976     p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1977                                        (const char *)src, UPRV_LENGTHOF(src),
1978                                        0x110000, &numSubstitutions, &errorCode);
1979     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1980         log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode));
1981     }
1982     memset(dest, 0xff, sizeof(dest));
1983     errorCode=U_ZERO_ERROR;
1984     length=numSubstitutions=-5;
1985     p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1986                                        (const char *)src, UPRV_LENGTHOF(src),
1987                                        0xdfff, &numSubstitutions, &errorCode);
1988     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1989         log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode));
1990     }
1991 }
1992 
1993 /* test that string transformation functions permit NULL source pointer when source length==0 */
TestNullEmptySource()1994 static void TestNullEmptySource() {
1995     char dest8[4]={ 3, 3, 3, 3 };
1996     UChar dest16[4]={ 3, 3, 3, 3 };
1997     UChar32 dest32[4]={ 3, 3, 3, 3 };
1998 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1999     wchar_t destW[4]={ 3, 3, 3, 3 };
2000 #endif
2001 
2002     int32_t length;
2003     UErrorCode errorCode;
2004 
2005     /* u_strFromXyz() */
2006 
2007     dest16[0]=3;
2008     length=3;
2009     errorCode=U_ZERO_ERROR;
2010     u_strFromUTF8(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2011     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2012         log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n");
2013     }
2014 
2015     dest16[0]=3;
2016     length=3;
2017     errorCode=U_ZERO_ERROR;
2018     u_strFromUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2019     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2020         log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2021     }
2022 
2023     dest16[0]=3;
2024     length=3;
2025     errorCode=U_ZERO_ERROR;
2026     u_strFromUTF8Lenient(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2027     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2028         log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n");
2029     }
2030 
2031     dest16[0]=3;
2032     length=3;
2033     errorCode=U_ZERO_ERROR;
2034     u_strFromUTF32(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2035     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2036         log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n");
2037     }
2038 
2039     dest16[0]=3;
2040     length=3;
2041     errorCode=U_ZERO_ERROR;
2042     u_strFromUTF32WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2043     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2044         log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2045     }
2046 
2047     dest16[0]=3;
2048     length=3;
2049     errorCode=U_ZERO_ERROR;
2050     u_strFromJavaModifiedUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2051     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2052         log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2053     }
2054 
2055     /* u_strToXyz() */
2056 
2057     dest8[0]=3;
2058     length=3;
2059     errorCode=U_ZERO_ERROR;
2060     u_strToUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2061     if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2062         log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2063     }
2064 
2065     dest8[0]=3;
2066     length=3;
2067     errorCode=U_ZERO_ERROR;
2068     u_strToUTF8WithSub(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2069     if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2070         log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2071     }
2072 
2073     dest32[0]=3;
2074     length=3;
2075     errorCode=U_ZERO_ERROR;
2076     u_strToUTF32(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, &errorCode);
2077     if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2078         log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n");
2079     }
2080 
2081     dest32[0]=3;
2082     length=3;
2083     errorCode=U_ZERO_ERROR;
2084     u_strToUTF32WithSub(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2085     if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2086         log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2087     }
2088 
2089     dest8[0]=3;
2090     length=3;
2091     errorCode=U_ZERO_ERROR;
2092     u_strToJavaModifiedUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2093     if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2094         log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n");
2095     }
2096 
2097 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2098 
2099     dest16[0]=3;
2100     length=3;
2101     errorCode=U_ZERO_ERROR;
2102     u_strFromWCS(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2103     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2104         log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n");
2105     }
2106 
2107     destW[0]=3;
2108     length=3;
2109     errorCode=U_ZERO_ERROR;
2110     u_strToWCS(destW, UPRV_LENGTHOF(destW), &length, NULL, 0, &errorCode);
2111     if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) {
2112         log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n");
2113     }
2114 
2115 #endif
2116 }
2117