1 /*
2  * PROJECT:    ReactOS wcstombs Test Suite
3  * LICENSE:    GPL v2 or any later version
4  * FILE:       tests/wcstombs-tests/wcstombs-tests.c
5  * PURPOSE:    Application for testing the CRT API's (wcstombs and wctomb) and the Win32 API WideCharToMultiByte for the Unicode to MultiByte string conversion
6  * COPYRIGHT:  Copyright 2008 Colin Finck <colin@reactos.org>
7  */
8 
9 #include <windows.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <locale.h>
13 #include <errno.h>
14 
15 /* Macros for simplification */
16 #define SETLOCALE(locale) \
17     loc = setlocale(LC_ALL, locale); \
18     if(!loc) \
19     { \
20         puts("setlocale failed for " locale ", this locale is probably not installed on your system"); \
21         return; \
22     }
23 
24 #define OK(condition, fail_message, ...) \
25     if(!(condition)) \
26         printf("%d: " fail_message "\n", __LINE__, ##__VA_ARGS__);
27 
28 /* Global variables for easier handling */
29 char mbc;
30 char mbs[5];
31 int ret;
32 wchar_t wc1 = 228;                                  /* Western Windows-1252 character */
33 wchar_t wc2 = 1088;                                 /* Russian Windows-1251 character not displayable for Windows-1252 */
34 wchar_t wcs[5] = {'T', 'h', 1088, 'i', 0};          /* String with ASCII characters and a Russian character */
35 wchar_t dbwcs[3] = {28953, 25152, 0};               /* String with Chinese (codepage 950) characters */
36 
37 
38 void CRT_Tests()
39 {
40     char* loc;
41 
42     puts("CRT-Tests");
43     puts("---------");
44 
45     /* Current locale is "C", wcstombs should return the length of the input buffer without the terminating null character */
46     ret = wcstombs(NULL, dbwcs, 0);
47     OK(ret == 2, "ret is %d", ret);
48 
49     ret = wcstombs(mbs, dbwcs, ret);
50     OK(ret == -1, "ret is %d", ret);
51     OK(mbs[0] == 0, "mbs[0] is %d", mbs[0]);
52     OK(errno == EILSEQ, "errno is %d", errno);
53 
54     ret = wcstombs(NULL, wcs, 0);
55     OK(ret == 4, "ret is %d", ret);
56 
57     ret = wcstombs(mbs, wcs, ret);
58     OK(ret == -1, "ret is %d", ret);
59     OK(!strcmp(mbs, "Th"), "mbs is %s", mbs);
60     OK(errno == EILSEQ, "errno is %d", errno);
61 
62     ret = wctomb(&mbc, wcs[0]);
63     OK(ret == 1, "ret is %d", ret);
64     OK(mbc == 84, "mbc is %d", mbc);
65 
66     mbc = 84;
67     ret = wcstombs(&mbc, &dbwcs[0], 1);
68     OK(ret == -1, "ret is %d", ret);
69     OK(mbc == 84, "mbc is %d", mbc);
70 
71     ret = wcstombs(mbs, wcs, 0);
72     OK(ret == 0, "ret is %d", ret);
73 
74     /* The length for the null character (in any locale) is 0, but if you pass a variable, it will be set to 0 and wctomb returns 1 */
75     ret = wctomb(NULL, 0);
76     OK(ret == 0, "ret is %d", ret);
77 
78     ret = wctomb(&mbc, 0);
79     OK(ret == 1, "ret is %d", ret);
80     OK(mbc == 0, "mbc is %d", mbc);
81 
82     /* msvcr80.dll and later versions of CRT change mbc in the following call back to 0, msvcrt.dll from WinXP SP2 leaves it untouched */
83     mbc = 84;
84     ret = wctomb(&mbc, dbwcs[0]);
85     OK(ret == -1, "ret is %d", ret);
86     OK(errno == EILSEQ, "errno is %d", errno);
87     OK(mbc == 84, "mbc is %d", mbc);
88 
89     /* With a real locale, -1 also becomes a possible return value in case of an invalid character */
90     SETLOCALE("German");
91     ret = wcstombs(NULL, dbwcs, 0);
92     OK(ret == -1, "ret is %d", ret);
93     OK(errno == EILSEQ, "errno is %d", errno);
94 
95     ret = wcstombs(NULL, wcs, 2);
96     OK(ret == -1, "ret is %d", ret);
97     OK(errno == EILSEQ, "errno is %d", errno);
98 
99     /* Test if explicitly setting the locale back to "C" also leads to the same results as above */
100     SETLOCALE("C");
101 
102     ret = wcstombs(NULL, dbwcs, 0);
103     OK(ret == 2, "ret is %d", ret);
104 
105     ret = wcstombs(NULL, wcs, 0);
106     OK(ret == 4, "ret is %d", ret);
107 
108     /* Test wctomb() as well */
109     SETLOCALE("English");
110 
111     ret = wctomb(&mbc, wc1);
112     OK(ret == 1, "ret is %d", ret);
113     OK(mbc == -28, "mbc is %d", mbc);
114 
115     ret = wctomb(&mbc, wc2);
116     OK(ret == -1, "ret is %d", ret);
117     OK(errno == EILSEQ, "errno is %d", errno);
118     OK(mbc == 63, "mbc is %d", mbc);
119 
120     SETLOCALE("Russian");
121 
122     ret = wcstombs(mbs, wcs, sizeof(mbs));
123     OK(ret == 4, "ret is %d", ret);
124     OK(!strcmp(mbs, "Th�i"), "mbs is %s", mbs);
125 
126     ret = wctomb(&mbc, wc2);
127     OK(ret == 1, "ret is %d", ret);
128     OK(mbc == -16, "mbc is %d", mbc);
129 
130     ret = wctomb(&mbc, wc1);
131     OK(ret == 1, "ret is %d", ret);
132     OK(mbc == 97, "mbc is %d", mbc);
133 
134     SETLOCALE("English");
135 
136     ret = wcstombs(&mbc, wcs, 1);
137     OK(ret == 1, "ret is %d", ret);
138     OK(mbc == 84, "mbc is %d", mbc);
139 
140     ZeroMemory(mbs, sizeof(mbs));
141     ret = wcstombs(mbs, wcs, sizeof(mbs));
142     OK(ret == -1, "ret is %d", ret);
143     OK(errno == EILSEQ, "errno is %d", errno);
144     OK(!strcmp(mbs, "Th?i"), "mbs is %s", mbs);
145     mbs[0] = 0;
146 
147     /* wcstombs mustn't add any null character automatically.
148        So in this case, we should get the same string again, even if we only copied the first three bytes. */
149     ret = wcstombs(mbs, wcs, 3);
150     OK(ret == -1, "ret is %d", ret);
151     OK(errno == EILSEQ, "errno is %d", errno);
152     OK(!strcmp(mbs, "Th?i"), "mbs is %s", mbs);
153     ZeroMemory(mbs, 5);
154 
155     /* Now this shouldn't be the case like above as we zeroed the complete string buffer. */
156     ret = wcstombs(mbs, wcs, 3);
157     OK(ret == -1, "ret is %d", ret);
158     OK(errno == EILSEQ, "errno is %d", errno);
159     OK(!strcmp(mbs, "Th?"), "mbs is %s", mbs);
160 
161     /* Double-byte tests */
162     SETLOCALE("Chinese");
163     ret = wcstombs(mbs, dbwcs, sizeof(mbs));
164     OK(ret == 4, "ret is %d", ret);
165     OK(!strcmp(mbs, "�H��"), "mbs is %s", mbs);
166     ZeroMemory(mbs, 5);
167 
168     /* Length-only tests */
169     SETLOCALE("English");
170     ret = wcstombs(NULL, wcs, 0);
171     OK(ret == -1, "ret is %d", ret);
172     OK(errno == EILSEQ, "errno is %d", errno);
173 
174     SETLOCALE("Chinese");
175     ret = wcstombs(NULL, dbwcs, 0);
176     OK(ret == 4, "ret is %d", ret);
177 
178     /* This call causes an ERROR_INSUFFICIENT_BUFFER in the called WideCharToMultiByte function.
179        For some reason, wcstombs under Windows doesn't reset the last error to the previous value here, so we can check for ERROR_INSUFFICIENT_BUFFER with GetLastError().
180        This could also be seen as an indication that Windows uses WideCharToMultiByte internally for wcstombs. */
181     ret = wcstombs(mbs, dbwcs, 1);
182     OK(ret == 0, "ret is %d", ret);
183     OK(mbs[0] == 0, "mbs[0] is %d", mbs[0]);
184 
185     /* ERROR_INSUFFICIENT_BUFFER is also the result of this call with SBCS characters. WTF?!
186        Anyway this is a Win32 error not related to the CRT, so we leave out this criteria. */
187     ret = wcstombs(mbs, wcs, 1);
188     OK(ret == 1, "ret is %d", ret);
189     OK(mbs[0] == 84, "mbs[0] is %d", mbs[0]);
190 
191     putchar('\n');
192 }
193 
194 void Win32_Tests(LPBOOL bUsedDefaultChar)
195 {
196     SetLastError(0xdeadbeef);
197 
198     puts("Win32-Tests");
199     puts("-----------");
200 
201     ret = WideCharToMultiByte(1252, 0, &wc1, 1, &mbc, 1, NULL, bUsedDefaultChar);
202     OK(ret == 1, "ret is %d", ret);
203     OK(mbc == -28, "mbc is %d", mbc);
204     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
205     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
206 
207     ret = WideCharToMultiByte(1252, 0, &wc2, 1, &mbc, 1, NULL, bUsedDefaultChar);
208     OK(ret == 1, "ret is %d", ret);
209     OK(mbc == 63, "mbc is %d", mbc);
210     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
211     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
212 
213     ret = WideCharToMultiByte(1251, 0, &wc2, 1, &mbc, 1, NULL, bUsedDefaultChar);
214     OK(ret == 1, "ret is %d", ret);
215     OK(mbc == -16, "mbc is %d", mbc);
216     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
217     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
218 
219     ret = WideCharToMultiByte(1251, 0, &wc1, 1, &mbc, 1, NULL, bUsedDefaultChar);
220     OK(ret == 1, "ret is %d", ret);
221     OK(mbc == 97, "mbc is %d", mbc);
222     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
223     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
224 
225     /* The behaviour for this character is different when WC_NO_BEST_FIT_CHARS is used */
226     ret = WideCharToMultiByte(1251, WC_NO_BEST_FIT_CHARS, &wc1, 1, &mbc, 1, NULL, bUsedDefaultChar);
227     OK(ret == 1, "ret is %d", ret);
228     OK(mbc == 63, "mbc is %d", mbc);
229     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
230     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
231 
232     ret = WideCharToMultiByte(1252, 0, dbwcs, -1, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
233     OK(ret == 3, "ret is %d", ret);
234     OK(!strcmp(mbs, "??"), "mbs is %s", mbs);
235     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
236     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
237     ZeroMemory(mbs, 5);
238 
239     ret = WideCharToMultiByte(1252, WC_NO_BEST_FIT_CHARS, dbwcs, -1, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
240     OK(ret == 3, "ret is %d", ret);
241     OK(!strcmp(mbs, "??"), "mbs is %s", mbs);
242     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
243     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
244 
245     /* This call triggers the last Win32 error */
246     ret = WideCharToMultiByte(1252, 0, wcs, -1, &mbc, 1, NULL, bUsedDefaultChar);
247     OK(ret == 0, "ret is %d", ret);
248     OK(mbc == 84, "mbc is %d", mbc);
249     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
250     OK(GetLastError() == ERROR_INSUFFICIENT_BUFFER, "GetLastError() is %lu", GetLastError());
251     SetLastError(0xdeadbeef);
252 
253     ret = WideCharToMultiByte(1252, 0, wcs, -1, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
254     OK(ret == 5, "ret is %d", ret);
255     OK(!strcmp(mbs, "Th?i"), "mbs is %s", mbs);
256     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
257     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
258     mbs[0] = 0;
259 
260     /* WideCharToMultiByte mustn't add any null character automatically.
261        So in this case, we should get the same string again, even if we only copied the first three bytes. */
262     ret = WideCharToMultiByte(1252, 0, wcs, 3, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
263     OK(ret == 3, "ret is %d", ret);
264     OK(!strcmp(mbs, "Th?i"), "mbs is %s", mbs);
265     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
266     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
267     ZeroMemory(mbs, 5);
268 
269     /* Now this shouldn't be the case like above as we zeroed the complete string buffer. */
270     ret = WideCharToMultiByte(1252, 0, wcs, 3, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
271     OK(ret == 3, "ret is %d", ret);
272     OK(!strcmp(mbs, "Th?"), "mbs is %s", mbs);
273     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
274     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
275 
276     /* Chinese codepage tests
277        Swapping the WC_NO_BEST_FIT_CHARS and 0 tests causes bUsedDefaultChar to be set to TRUE in the following test, which quits with ERROR_INSUFFICIENT_BUFFER.
278        But as it isn't documented whether all other variables are undefined if ERROR_INSUFFICIENT_BUFFER is set, we skip this behaviour. */
279     ret = WideCharToMultiByte(950, WC_NO_BEST_FIT_CHARS, &wc1, 1, &mbc, 1, NULL, bUsedDefaultChar);
280     OK(ret == 1, "ret is %d", ret);
281     OK(mbc == 63, "mbc is %d", mbc);
282     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
283     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
284 
285     ret = WideCharToMultiByte(950, 0, &wc1, 1, &mbc, 1, NULL, bUsedDefaultChar);
286     OK(ret == 1, "ret is %d", ret);
287     OK(mbc == 97, "mbc is %d", mbc);
288     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
289     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
290 
291     /* Double-byte tests */
292     ret = WideCharToMultiByte(950, 0, dbwcs, -1, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
293     OK(ret == 5, "ret is %d", ret);
294     OK(!strcmp(mbs, "�H��"), "mbs is %s", mbs);
295     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
296     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
297 
298     ret = WideCharToMultiByte(950, 0, dbwcs, 1, &mbc, 1, NULL, bUsedDefaultChar);
299     OK(ret == 0, "ret is %d", ret);
300     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
301     OK(GetLastError() == ERROR_INSUFFICIENT_BUFFER, "GetLastError() is %lu", GetLastError());
302     SetLastError(0xdeadbeef);
303     ZeroMemory(mbs, 5);
304 
305     ret = WideCharToMultiByte(950, 0, dbwcs, 1, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
306     OK(ret == 2, "ret is %d", ret);
307     OK(!strcmp(mbs, "�H"), "mbs is %s", mbs);
308     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
309     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
310 
311     /* Length-only tests */
312     ret = WideCharToMultiByte(1252, 0, &wc2, 1, NULL, 0, NULL, bUsedDefaultChar);
313     OK(ret == 1, "ret is %d", ret);
314     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
315     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
316 
317     ret = WideCharToMultiByte(1252, 0, wcs, -1, NULL, 0, NULL, bUsedDefaultChar);
318     OK(ret == 5, "ret is %d", ret);
319     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
320     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
321 
322     ret = WideCharToMultiByte(950, 0, dbwcs, 1, NULL, 0, NULL, bUsedDefaultChar);
323     OK(ret == 2, "ret is %d", ret);
324     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
325     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
326 
327     ret = WideCharToMultiByte(950, 0, dbwcs, -1, NULL, 0, NULL, bUsedDefaultChar);
328     OK(ret == 5, "ret is %d", ret);
329     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
330     OK(GetLastError() == 0xdeadbeef, "GetLastError() is %lu", GetLastError());
331 
332     /* Abnormal uses of WideCharToMultiByte */
333     ret = WideCharToMultiByte(1252, 0, NULL, 5, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
334     OK(ret == 0, "ret is %d", ret);
335     if(bUsedDefaultChar) OK(*bUsedDefaultChar == FALSE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
336     OK(GetLastError() == ERROR_INVALID_PARAMETER, "GetLastError() is %lu", GetLastError());
337     SetLastError(0xdeadbeef);
338 
339     ret = WideCharToMultiByte(0, 0, dbwcs, 5, mbs, sizeof(mbs), NULL, bUsedDefaultChar);
340     OK(ret == 5, "ret is %d", ret);
341     OK(!strcmp(mbs, "??"), "mbs is %s", mbs);
342     if(bUsedDefaultChar) OK(*bUsedDefaultChar == TRUE, "bUsedDefaultChar is %d", *bUsedDefaultChar);
343 
344     ret = WideCharToMultiByte(1252, 0, wcs, -1, (LPSTR)wcs, 5, NULL, bUsedDefaultChar);
345     OK(ret == 0, "ret is %d", ret);
346     OK(GetLastError() == ERROR_INVALID_PARAMETER, "GetLastError() is %lu", GetLastError());
347     SetLastError(0xdeadbeef);
348 
349     ret = WideCharToMultiByte(1252, 0, wcs, -1, mbs, -1, NULL, bUsedDefaultChar);
350     OK(ret == 0, "ret is %d", ret);
351     OK(GetLastError() == ERROR_INVALID_PARAMETER, "GetLastError() is %lu", GetLastError());
352     SetLastError(0xdeadbeef);
353 
354     putchar('\n');
355 }
356 
357 int main()
358 {
359     BOOL UsedDefaultChar;
360 
361     CRT_Tests();
362 
363     /* There are two code pathes in WideCharToMultiByte, one when Flags || DefaultChar || UsedDefaultChar is set and one when it's not.
364        Test both here. */
365     Win32_Tests(NULL);
366     Win32_Tests(&UsedDefaultChar);
367 
368     return 0;
369 }
370