1 /* Test of conversion of multibyte character to wide character.
2    Copyright (C) 2008-2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
18 
19 #include <config.h>
20 
21 #include <wchar.h>
22 
23 #include "signature.h"
24 SIGNATURE_CHECK (mbrtowc, size_t, (wchar_t *, char const *, size_t,
25                                    mbstate_t *));
26 
27 #include <locale.h>
28 #include <stdio.h>
29 #include <string.h>
30 
31 #include "macros.h"
32 
33 int
main(int argc,char * argv[])34 main (int argc, char *argv[])
35 {
36   mbstate_t state;
37   wchar_t wc;
38   size_t ret;
39 
40   /* configure should already have checked that the locale is supported.  */
41   if (setlocale (LC_ALL, "") == NULL)
42     return 1;
43 
44   /* Test zero-length input.  */
45   {
46     memset (&state, '\0', sizeof (mbstate_t));
47     wc = (wchar_t) 0xBADFACE;
48     ret = mbrtowc (&wc, "x", 0, &state);
49     ASSERT (ret == (size_t)(-2));
50     ASSERT (mbsinit (&state));
51   }
52 
53   /* Test NUL byte input.  */
54   {
55     memset (&state, '\0', sizeof (mbstate_t));
56     wc = (wchar_t) 0xBADFACE;
57     ret = mbrtowc (&wc, "", 1, &state);
58     ASSERT (ret == 0);
59     ASSERT (wc == 0);
60     ASSERT (mbsinit (&state));
61     ret = mbrtowc (NULL, "", 1, &state);
62     ASSERT (ret == 0);
63     ASSERT (mbsinit (&state));
64   }
65 
66   /* Test single-byte input.  */
67   {
68     int c;
69     char buf[1];
70 
71     memset (&state, '\0', sizeof (mbstate_t));
72     for (c = 0; c < 0x100; c++)
73       switch (c)
74         {
75         default:
76           if (! (c && 1 < argc && argv[1][0] == '5'))
77             break;
78           FALLTHROUGH;
79         case '\t': case '\v': case '\f':
80         case ' ': case '!': case '"': case '#': case '%':
81         case '&': case '\'': case '(': case ')': case '*':
82         case '+': case ',': case '-': case '.': case '/':
83         case '0': case '1': case '2': case '3': case '4':
84         case '5': case '6': case '7': case '8': case '9':
85         case ':': case ';': case '<': case '=': case '>':
86         case '?':
87         case 'A': case 'B': case 'C': case 'D': case 'E':
88         case 'F': case 'G': case 'H': case 'I': case 'J':
89         case 'K': case 'L': case 'M': case 'N': case 'O':
90         case 'P': case 'Q': case 'R': case 'S': case 'T':
91         case 'U': case 'V': case 'W': case 'X': case 'Y':
92         case 'Z':
93         case '[': case '\\': case ']': case '^': case '_':
94         case 'a': case 'b': case 'c': case 'd': case 'e':
95         case 'f': case 'g': case 'h': case 'i': case 'j':
96         case 'k': case 'l': case 'm': case 'n': case 'o':
97         case 'p': case 'q': case 'r': case 's': case 't':
98         case 'u': case 'v': case 'w': case 'x': case 'y':
99         case 'z': case '{': case '|': case '}': case '~':
100           /* c is in the ISO C "basic character set", or argv[1] starts
101              with '5' so we are testing all nonnull bytes.  */
102           buf[0] = c;
103           wc = (wchar_t) 0xBADFACE;
104           ret = mbrtowc (&wc, buf, 1, &state);
105           ASSERT (ret == 1);
106           if (c < 0x80)
107             /* c is an ASCII character.  */
108             ASSERT (wc == c);
109           else
110             /* argv[1] starts with '5', that is, we are testing the C or POSIX
111                locale.
112                On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
113                But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF.  */
114             ASSERT (wc == (btowc (c) == 0xDF00 + c ? btowc (c) : c));
115           ASSERT (mbsinit (&state));
116           ret = mbrtowc (NULL, buf, 1, &state);
117           ASSERT (ret == 1);
118           ASSERT (mbsinit (&state));
119           break;
120         }
121   }
122 
123   /* Test special calling convention, passing a NULL pointer.  */
124   {
125     memset (&state, '\0', sizeof (mbstate_t));
126     wc = (wchar_t) 0xBADFACE;
127     ret = mbrtowc (&wc, NULL, 5, &state);
128     ASSERT (ret == 0);
129     ASSERT (wc == (wchar_t) 0xBADFACE);
130     ASSERT (mbsinit (&state));
131   }
132 
133   if (argc > 1)
134     switch (argv[1][0])
135       {
136       case '1':
137         /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
138         {
139           char input[] = "B\374\337er"; /* "Büßer" */
140           memset (&state, '\0', sizeof (mbstate_t));
141 
142           wc = (wchar_t) 0xBADFACE;
143           ret = mbrtowc (&wc, input, 1, &state);
144           ASSERT (ret == 1);
145           ASSERT (wc == 'B');
146           ASSERT (mbsinit (&state));
147           input[0] = '\0';
148 
149           wc = (wchar_t) 0xBADFACE;
150           ret = mbrtowc (&wc, input + 1, 1, &state);
151           ASSERT (ret == 1);
152           ASSERT (wctob (wc) == (unsigned char) '\374');
153           ASSERT (mbsinit (&state));
154           input[1] = '\0';
155 
156           /* Test support of NULL first argument.  */
157           ret = mbrtowc (NULL, input + 2, 3, &state);
158           ASSERT (ret == 1);
159           ASSERT (mbsinit (&state));
160 
161           wc = (wchar_t) 0xBADFACE;
162           ret = mbrtowc (&wc, input + 2, 3, &state);
163           ASSERT (ret == 1);
164           ASSERT (wctob (wc) == (unsigned char) '\337');
165           ASSERT (mbsinit (&state));
166           input[2] = '\0';
167 
168           wc = (wchar_t) 0xBADFACE;
169           ret = mbrtowc (&wc, input + 3, 2, &state);
170           ASSERT (ret == 1);
171           ASSERT (wc == 'e');
172           ASSERT (mbsinit (&state));
173           input[3] = '\0';
174 
175           wc = (wchar_t) 0xBADFACE;
176           ret = mbrtowc (&wc, input + 4, 1, &state);
177           ASSERT (ret == 1);
178           ASSERT (wc == 'r');
179           ASSERT (mbsinit (&state));
180         }
181         return 0;
182 
183       case '2':
184         /* Locale encoding is UTF-8.  */
185         {
186           char input[] = "B\303\274\303\237er"; /* "Büßer" */
187           memset (&state, '\0', sizeof (mbstate_t));
188 
189           wc = (wchar_t) 0xBADFACE;
190           ret = mbrtowc (&wc, input, 1, &state);
191           ASSERT (ret == 1);
192           ASSERT (wc == 'B');
193           ASSERT (mbsinit (&state));
194           input[0] = '\0';
195 
196           wc = (wchar_t) 0xBADFACE;
197           ret = mbrtowc (&wc, input + 1, 1, &state);
198           ASSERT (ret == (size_t)(-2));
199           ASSERT (wc == (wchar_t) 0xBADFACE);
200           ASSERT (!mbsinit (&state));
201           input[1] = '\0';
202 
203           wc = (wchar_t) 0xBADFACE;
204           ret = mbrtowc (&wc, input + 2, 5, &state);
205           ASSERT (ret == 1);
206           ASSERT (wctob (wc) == EOF);
207           ASSERT (mbsinit (&state));
208           input[2] = '\0';
209 
210           /* Test support of NULL first argument.  */
211           ret = mbrtowc (NULL, input + 3, 4, &state);
212           ASSERT (ret == 2);
213           ASSERT (mbsinit (&state));
214 
215           wc = (wchar_t) 0xBADFACE;
216           ret = mbrtowc (&wc, input + 3, 4, &state);
217           ASSERT (ret == 2);
218           ASSERT (wctob (wc) == EOF);
219           ASSERT (mbsinit (&state));
220           input[3] = '\0';
221           input[4] = '\0';
222 
223           wc = (wchar_t) 0xBADFACE;
224           ret = mbrtowc (&wc, input + 5, 2, &state);
225           ASSERT (ret == 1);
226           ASSERT (wc == 'e');
227           ASSERT (mbsinit (&state));
228           input[5] = '\0';
229 
230           wc = (wchar_t) 0xBADFACE;
231           ret = mbrtowc (&wc, input + 6, 1, &state);
232           ASSERT (ret == 1);
233           ASSERT (wc == 'r');
234           ASSERT (mbsinit (&state));
235         }
236         return 0;
237 
238       case '3':
239         /* Locale encoding is EUC-JP.  */
240         {
241           char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
242           memset (&state, '\0', sizeof (mbstate_t));
243 
244           wc = (wchar_t) 0xBADFACE;
245           ret = mbrtowc (&wc, input, 1, &state);
246           ASSERT (ret == 1);
247           ASSERT (wc == '<');
248           ASSERT (mbsinit (&state));
249           input[0] = '\0';
250 
251           wc = (wchar_t) 0xBADFACE;
252           ret = mbrtowc (&wc, input + 1, 2, &state);
253           ASSERT (ret == 2);
254           ASSERT (wctob (wc) == EOF);
255           ASSERT (mbsinit (&state));
256           input[1] = '\0';
257           input[2] = '\0';
258 
259           wc = (wchar_t) 0xBADFACE;
260           ret = mbrtowc (&wc, input + 3, 1, &state);
261           ASSERT (ret == (size_t)(-2));
262           ASSERT (wc == (wchar_t) 0xBADFACE);
263           ASSERT (!mbsinit (&state));
264           input[3] = '\0';
265 
266           wc = (wchar_t) 0xBADFACE;
267           ret = mbrtowc (&wc, input + 4, 4, &state);
268           ASSERT (ret == 1);
269           ASSERT (wctob (wc) == EOF);
270           ASSERT (mbsinit (&state));
271           input[4] = '\0';
272 
273           /* Test support of NULL first argument.  */
274           ret = mbrtowc (NULL, input + 5, 3, &state);
275           ASSERT (ret == 2);
276           ASSERT (mbsinit (&state));
277 
278           wc = (wchar_t) 0xBADFACE;
279           ret = mbrtowc (&wc, input + 5, 3, &state);
280           ASSERT (ret == 2);
281           ASSERT (wctob (wc) == EOF);
282           ASSERT (mbsinit (&state));
283           input[5] = '\0';
284           input[6] = '\0';
285 
286           wc = (wchar_t) 0xBADFACE;
287           ret = mbrtowc (&wc, input + 7, 1, &state);
288           ASSERT (ret == 1);
289           ASSERT (wc == '>');
290           ASSERT (mbsinit (&state));
291         }
292         return 0;
293 
294       case '4':
295         /* Locale encoding is GB18030.  */
296         {
297           char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
298           memset (&state, '\0', sizeof (mbstate_t));
299 
300           wc = (wchar_t) 0xBADFACE;
301           ret = mbrtowc (&wc, input, 1, &state);
302           ASSERT (ret == 1);
303           ASSERT (wc == 'B');
304           ASSERT (mbsinit (&state));
305           input[0] = '\0';
306 
307           wc = (wchar_t) 0xBADFACE;
308           ret = mbrtowc (&wc, input + 1, 1, &state);
309           ASSERT (ret == (size_t)(-2));
310           ASSERT (wc == (wchar_t) 0xBADFACE);
311           ASSERT (!mbsinit (&state));
312           input[1] = '\0';
313 
314           wc = (wchar_t) 0xBADFACE;
315           ret = mbrtowc (&wc, input + 2, 7, &state);
316           ASSERT (ret == 1);
317           ASSERT (wctob (wc) == EOF);
318           ASSERT (mbsinit (&state));
319           input[2] = '\0';
320 
321           /* Test support of NULL first argument.  */
322           ret = mbrtowc (NULL, input + 3, 6, &state);
323           ASSERT (ret == 4);
324           ASSERT (mbsinit (&state));
325 
326           wc = (wchar_t) 0xBADFACE;
327           ret = mbrtowc (&wc, input + 3, 6, &state);
328           ASSERT (ret == 4);
329           ASSERT (wctob (wc) == EOF);
330           ASSERT (mbsinit (&state));
331           input[3] = '\0';
332           input[4] = '\0';
333           input[5] = '\0';
334           input[6] = '\0';
335 
336           wc = (wchar_t) 0xBADFACE;
337           ret = mbrtowc (&wc, input + 7, 2, &state);
338           ASSERT (ret == 1);
339           ASSERT (wc == 'e');
340           ASSERT (mbsinit (&state));
341           input[7] = '\0';
342 
343           wc = (wchar_t) 0xBADFACE;
344           ret = mbrtowc (&wc, input + 8, 1, &state);
345           ASSERT (ret == 1);
346           ASSERT (wc == 'r');
347           ASSERT (mbsinit (&state));
348         }
349         return 0;
350 
351       case '5':
352         /* C locale; tested above.  */
353         return 0;
354       }
355 
356   return 1;
357 }
358