1 /* GLIB - Library of useful routines for C programming
2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 /*
19  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
20  * file for a list of people on the GLib Team.  See the ChangeLog
21  * files for a list of changes.  These files are distributed with
22  * GLib at ftp://ftp.gtk.org/pub/gtk/.
23  */
24 
25 #undef G_DISABLE_ASSERT
26 #undef G_LOG_DOMAIN
27 
28 #include <locale.h>
29 #include <string.h>
30 
31 #include <glib.h>
32 
33 /* Bug 311337 */
34 static void
test_iconv_state(void)35 test_iconv_state (void)
36 {
37   const gchar *in = "\xf4\xe5\xf8\xe5\xed";
38   const gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
39   gchar *out;
40   gsize bytes_read = 0;
41   gsize bytes_written = 0;
42   GError *error = NULL;
43 
44   out = g_convert (in, -1, "UTF-8", "CP1255",
45 		   &bytes_read, &bytes_written, &error);
46 
47   if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
48     return; /* silently skip if CP1255 is not supported, see bug 467707 */
49 
50   g_assert_no_error (error);
51   g_assert_cmpint (bytes_read, ==, 5);
52   g_assert_cmpint (bytes_written, ==, 10);
53   g_assert_cmpstr (out, ==, expected);
54   g_free (out);
55 }
56 
57 /* Some tests involving "vulgar fraction one half" (U+00BD). This is
58  * represented in UTF-8 as \xC2\xBD, in ISO-8859-1 as \xBD, and is not
59  * represented in ISO-8859-15. */
60 static void
test_one_half(void)61 test_one_half (void)
62 {
63   const gchar *in_utf8 = "\xc2\xbd";
64   gchar *out;
65   gsize bytes_read = 0;
66   gsize bytes_written = 0;
67   GError *error = NULL;
68 
69   out = g_convert (in_utf8, -1,
70 		   "ISO-8859-1", "UTF-8",
71 		   &bytes_read, &bytes_written,
72 		   &error);
73 
74   g_assert_no_error (error);
75   g_assert_cmpint (bytes_read, ==, 2);
76   g_assert_cmpint (bytes_written, ==, 1);
77   g_assert_cmpstr (out, ==, "\xbd");
78   g_free (out);
79 
80   out = g_convert (in_utf8, -1,
81 		   "ISO-8859-15", "UTF-8",
82 		   &bytes_read, &bytes_written,
83 		   &error);
84 
85   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
86   g_assert_cmpint (bytes_read, ==, 0);
87   g_assert_cmpint (bytes_written, ==, 0);
88   g_assert_cmpstr (out, ==, NULL);
89   g_clear_error (&error);
90   g_free (out);
91 
92   out = g_convert_with_fallback (in_utf8, -1,
93 				 "ISO8859-15", "UTF-8",
94 				 "a",
95 				 &bytes_read, &bytes_written,
96 				 &error);
97 
98   g_assert_no_error (error);
99   g_assert_cmpint (bytes_read, ==, 2);
100   g_assert_cmpint (bytes_written, ==, 1);
101   g_assert_cmpstr (out, ==, "a");
102   g_free (out);
103 }
104 
105 static void
test_byte_order(void)106 test_byte_order (void)
107 {
108   gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
109   gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
110   const gchar *expected = "\xce\x93";
111   gchar *out;
112   gsize bytes_read = 0;
113   gsize bytes_written = 0;
114   GError *error = NULL;
115 
116   out = g_convert (in_be, sizeof (in_be),
117 		   "UTF-8", "UTF-16",
118 		   &bytes_read, &bytes_written,
119 		   &error);
120 
121   g_assert_no_error (error);
122   g_assert_cmpint (bytes_read, ==, 4);
123   g_assert_cmpint (bytes_written, ==, 2);
124   g_assert_cmpstr (out, ==, expected);
125   g_free (out);
126 
127   out = g_convert (in_le, sizeof (in_le),
128 		   "UTF-8", "UTF-16",
129 		   &bytes_read, &bytes_written,
130 		   &error);
131 
132   g_assert_no_error (error);
133   g_assert_cmpint (bytes_read, ==, 4);
134   g_assert_cmpint (bytes_written, ==, 2);
135   g_assert_cmpstr (out, ==, expected);
136   g_free (out);
137 }
138 
139 static void
check_utf8_to_ucs4(const char * utf8,gsize utf8_len,const gunichar * ucs4,glong ucs4_len,glong error_pos)140 check_utf8_to_ucs4 (const char     *utf8,
141 		    gsize           utf8_len,
142 		    const gunichar *ucs4,
143 		    glong           ucs4_len,
144 		    glong           error_pos)
145 {
146   gunichar *result, *result2, *result3;
147   glong items_read, items_read2;
148   glong items_written, items_written2;
149   GError *error, *error2, *error3;
150   gint i;
151 
152   if (!error_pos)
153     {
154       /* check the fast conversion */
155       result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
156 
157       g_assert_cmpint (items_written, ==, ucs4_len);
158       g_assert (result);
159       for (i = 0; i <= items_written; i++)
160 	g_assert (result[i] == ucs4[i]);
161 
162       g_free (result);
163     }
164 
165   error = NULL;
166   result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
167 
168   if (utf8_len == strlen (utf8))
169     {
170       /* check that len == -1 yields identical results */
171       error2 = NULL;
172       result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
173       g_assert (error || items_read2 == items_read);
174       g_assert (error || items_written2 == items_written);
175       g_assert_cmpint (!!result, ==, !!result2);
176       g_assert_cmpint (!!error, ==, !!error2);
177       if (result)
178 	for (i = 0; i <= items_written; i++)
179 	  g_assert (result[i] == result2[i]);
180 
181       g_free (result2);
182       if (error2)
183 	g_error_free (error2);
184     }
185 
186   error3 = NULL;
187   result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
188 
189   if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
190     {
191       g_assert_no_error (error);
192       g_assert_cmpint (items_read, ==, error_pos);
193       g_assert_cmpint (items_written, ==, ucs4_len);
194       g_assert (result);
195       for (i = 0; i <= items_written; i++)
196 	g_assert (result[i] == ucs4[i]);
197       g_error_free (error3);
198     }
199   else if (error_pos)
200     {
201       g_assert (error != NULL);
202       g_assert (result == NULL);
203       g_assert_cmpint (items_read, ==, error_pos);
204       g_error_free (error);
205 
206       g_assert (error3 != NULL);
207       g_assert (result3 == NULL);
208       g_error_free (error3);
209     }
210   else
211     {
212       g_assert_no_error (error);
213       g_assert_cmpint (items_read, ==, utf8_len);
214       g_assert_cmpint (items_written, ==, ucs4_len);
215       g_assert (result);
216       for (i = 0; i <= items_written; i++)
217 	g_assert (result[i] == ucs4[i]);
218 
219       g_assert_no_error (error3);
220       g_assert (result3);
221       for (i = 0; i <= ucs4_len; i++)
222 	g_assert (result3[i] == ucs4[i]);
223     }
224 
225   g_free (result);
226   g_free (result3);
227 }
228 
229 static void
check_ucs4_to_utf8(const gunichar * ucs4,glong ucs4_len,const char * utf8,glong utf8_len,glong error_pos)230 check_ucs4_to_utf8 (const gunichar *ucs4,
231 		    glong           ucs4_len,
232 		    const char     *utf8,
233 		    glong           utf8_len,
234 		    glong           error_pos)
235 {
236   gchar *result, *result2, *result3;
237   glong items_read, items_read2;
238   glong items_written, items_written2;
239   GError *error, *error2, *error3;
240 
241   error = NULL;
242   result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
243 
244   if (ucs4[ucs4_len] == 0)
245     {
246       /* check that len == -1 yields identical results */
247       error2 = NULL;
248       result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
249 
250       g_assert (error || items_read2 == items_read);
251       g_assert (error || items_written2 == items_written);
252       g_assert_cmpint (!!result, ==, !!result2);
253       g_assert_cmpint (!!error, ==, !!error2);
254       if (result)
255 	g_assert_cmpstr (result, ==, result2);
256 
257       g_free (result2);
258       if (error2)
259 	g_error_free (error2);
260     }
261 
262   error3 = NULL;
263   result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
264 
265   if (error_pos)
266     {
267       g_assert (error != NULL);
268       g_assert (result == NULL);
269       g_assert_cmpint (items_read, ==, error_pos);
270       g_error_free (error);
271 
272       g_assert (error3 != NULL);
273       g_assert (result3 == NULL);
274       g_error_free (error3);
275     }
276   else
277     {
278       g_assert_no_error (error);
279       g_assert_cmpint (items_read, ==, ucs4_len);
280       g_assert_cmpint (items_written, ==, utf8_len);
281       g_assert (result);
282       g_assert_cmpstr (result, ==, utf8);
283 
284       g_assert_no_error (error3);
285       g_assert (result3);
286       g_assert_cmpstr (result3, ==, utf8);
287     }
288 
289   g_free (result);
290   g_free (result3);
291 }
292 
293 static void
check_utf8_to_utf16(const char * utf8,gsize utf8_len,const gunichar2 * utf16,glong utf16_len,glong error_pos)294 check_utf8_to_utf16 (const char      *utf8,
295 		     gsize            utf8_len,
296 		     const gunichar2 *utf16,
297 		     glong            utf16_len,
298 		     glong            error_pos)
299 {
300   gunichar2 *result, *result2, *result3;
301   glong items_read, items_read2;
302   glong items_written, items_written2;
303   GError *error, *error2, *error3;
304   gint i;
305 
306   error = NULL;
307   result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
308 
309   if (utf8_len == strlen (utf8))
310     {
311       /* check that len == -1 yields identical results */
312       error2 = NULL;
313       result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
314       g_assert (error || items_read2 == items_read);
315       g_assert (error || items_written2 == items_written);
316       g_assert_cmpint (!!result, ==, !!result2);
317       g_assert_cmpint (!!error, ==, !!error2);
318       if (result)
319 	for (i = 0; i <= items_written; i++)
320 	  g_assert (result[i] == result2[i]);
321 
322       g_free (result2);
323       if (error2)
324 	g_error_free (error2);
325     }
326 
327   error3 = NULL;
328   result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
329 
330   if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
331     {
332       g_assert_no_error (error);
333       g_assert_cmpint (items_read, ==, error_pos);
334       g_assert_cmpint (items_written, ==, utf16_len);
335       g_assert (result);
336       for (i = 0; i <= items_written; i++)
337 	g_assert (result[i] == utf16[i]);
338       g_error_free (error3);
339     }
340   else if (error_pos)
341     {
342       g_assert (error != NULL);
343       g_assert (result == NULL);
344       g_assert_cmpint (items_read, ==, error_pos);
345       g_error_free (error);
346 
347       g_assert (error3 != NULL);
348       g_assert (result3 == NULL);
349       g_error_free (error3);
350     }
351   else
352     {
353       g_assert_no_error (error);
354       g_assert_cmpint (items_read, ==, utf8_len);
355       g_assert_cmpint (items_written, ==, utf16_len);
356       g_assert (result);
357       for (i = 0; i <= items_written; i++)
358 	g_assert (result[i] == utf16[i]);
359 
360       g_assert_no_error (error3);
361       g_assert (result3);
362       for (i = 0; i <= utf16_len; i++)
363 	g_assert (result3[i] == utf16[i]);
364     }
365 
366   g_free (result);
367   g_free (result3);
368 }
369 
370 static void
check_utf16_to_utf8(const gunichar2 * utf16,glong utf16_len,const char * utf8,glong utf8_len,glong error_pos)371 check_utf16_to_utf8 (const gunichar2 *utf16,
372 		     glong            utf16_len,
373 		     const char      *utf8,
374 		     glong            utf8_len,
375 		     glong            error_pos)
376 {
377   gchar *result, *result2, *result3;
378   glong items_read, items_read2;
379   glong items_written, items_written2;
380   GError *error, *error2, *error3;
381 
382   error = NULL;
383   result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
384   if (utf16[utf16_len] == 0)
385     {
386       /* check that len == -1 yields identical results */
387       error2 = NULL;
388       result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
389 
390       g_assert (error || items_read2 == items_read);
391       g_assert (error || items_written2 == items_written);
392       g_assert_cmpint (!!result, ==, !!result2);
393       g_assert_cmpint (!!error, ==, !!error2);
394       if (result)
395 	g_assert_cmpstr (result, ==, result2);
396 
397       g_free (result2);
398       if (error2)
399 	g_error_free (error2);
400     }
401 
402   error3 = NULL;
403   result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
404 
405   if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
406     {
407       g_assert_no_error (error);
408       g_assert_cmpint (items_read, ==, error_pos);
409       g_assert_cmpint (items_read + 1, ==, utf16_len);
410       g_assert_cmpint (items_written, ==, utf8_len);
411       g_assert (result);
412       g_assert_cmpstr (result, ==, utf8);
413       g_error_free (error3);
414     }
415   else if (error_pos)
416     {
417       g_assert (error != NULL);
418       g_assert (result == NULL);
419       g_assert_cmpint (items_read, ==, error_pos);
420       g_error_free (error);
421 
422       g_assert (error3 != NULL);
423       g_assert (result3 == NULL);
424       g_error_free (error3);
425     }
426   else
427     {
428       g_assert_no_error (error);
429       g_assert_cmpint (items_read, ==, utf16_len);
430       g_assert_cmpint (items_written, ==, utf8_len);
431       g_assert (result);
432       g_assert_cmpstr (result, ==, utf8);
433 
434       g_assert_no_error (error3);
435       g_assert (result3);
436       g_assert_cmpstr (result3, ==, utf8);
437     }
438 
439   g_free (result);
440   g_free (result3);
441 }
442 
443 static void
check_ucs4_to_utf16(const gunichar * ucs4,glong ucs4_len,const gunichar2 * utf16,glong utf16_len,glong error_pos)444 check_ucs4_to_utf16 (const gunichar  *ucs4,
445 		     glong            ucs4_len,
446 		     const gunichar2 *utf16,
447 		     glong            utf16_len,
448 		     glong            error_pos)
449 {
450   gunichar2 *result, *result2, *result3;
451   glong items_read, items_read2;
452   glong items_written, items_written2;
453   GError *error, *error2, *error3;
454   gint i;
455 
456   error = NULL;
457   result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
458 
459   if (ucs4[ucs4_len] == 0)
460     {
461       /* check that len == -1 yields identical results */
462       error2 = NULL;
463       result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
464 
465       g_assert (error || items_read2 == items_read);
466       g_assert (error || items_written2 == items_written);
467       g_assert_cmpint (!!result, ==, !!result2);
468       g_assert_cmpint (!!error, ==, !!error2);
469       if (result)
470       for (i = 0; i <= utf16_len; i++)
471 	g_assert (result[i] == result2[i]);
472 
473       g_free (result2);
474       if (error2)
475 	g_error_free (error2);
476     }
477 
478   error3 = NULL;
479   result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
480 
481   if (error_pos)
482     {
483       g_assert (error != NULL);
484       g_assert (result == NULL);
485       g_assert_cmpint (items_read, ==, error_pos);
486       g_error_free (error);
487 
488       g_assert (error3 != NULL);
489       g_assert (result3 == NULL);
490       g_error_free (error3);
491     }
492   else
493     {
494       g_assert_no_error (error);
495       g_assert_cmpint (items_read, ==, ucs4_len);
496       g_assert_cmpint (items_written, ==, utf16_len);
497       g_assert (result);
498       for (i = 0; i <= utf16_len; i++)
499 	g_assert (result[i] == utf16[i]);
500 
501       g_assert_no_error (error3);
502       g_assert (result3);
503       for (i = 0; i <= utf16_len; i++)
504 	g_assert (result3[i] == utf16[i]);
505     }
506 
507   g_free (result);
508   g_free (result3);
509 }
510 
511 static void
check_utf16_to_ucs4(const gunichar2 * utf16,glong utf16_len,const gunichar * ucs4,glong ucs4_len,glong error_pos)512 check_utf16_to_ucs4 (const gunichar2 *utf16,
513 		     glong            utf16_len,
514 		     const gunichar  *ucs4,
515 		     glong            ucs4_len,
516 		     glong            error_pos)
517 {
518   gunichar *result, *result2, *result3;
519   glong items_read, items_read2;
520   glong items_written, items_written2;
521   GError *error, *error2, *error3;
522   gint i;
523 
524   error = NULL;
525   result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
526   if (utf16[utf16_len] == 0)
527     {
528       /* check that len == -1 yields identical results */
529       error2 = NULL;
530       result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
531       g_assert (error || items_read2 == items_read);
532       g_assert (error || items_written2 == items_written);
533       g_assert_cmpint (!!result, ==, !!result2);
534       g_assert_cmpint (!!error, ==, !!error2);
535       if (result)
536 	for (i = 0; i <= items_written; i++)
537 	  g_assert (result[i] == result2[i]);
538 
539       g_free (result2);
540       if (error2)
541 	g_error_free (error2);
542     }
543 
544   error3 = NULL;
545   result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
546 
547   if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
548     {
549       g_assert_no_error (error);
550       g_assert_cmpint (items_read, ==, error_pos);
551       g_assert_cmpint (items_read + 1, ==, utf16_len);
552       g_assert_cmpint (items_written, ==, ucs4_len);
553       g_assert (result);
554       for (i = 0; i <= items_written; i++)
555 	g_assert (result[i] == ucs4[i]);
556       g_error_free (error3);
557     }
558   else if (error_pos)
559     {
560       g_assert (error != NULL);
561       g_assert (result == NULL);
562       g_assert_cmpint (items_read, ==, error_pos);
563       g_error_free (error);
564 
565       g_assert (error3 != NULL);
566       g_assert (result3 == NULL);
567       g_error_free (error3);
568     }
569   else
570     {
571       g_assert_no_error (error);
572       g_assert_cmpint (items_read, ==, utf16_len);
573       g_assert_cmpint (items_written, ==, ucs4_len);
574       g_assert (result);
575       for (i = 0; i <= ucs4_len; i++)
576 	g_assert (result[i] == ucs4[i]);
577 
578       g_assert_no_error (error3);
579       g_assert (result3);
580       for (i = 0; i <= ucs4_len; i++)
581 	g_assert (result3[i] == ucs4[i]);
582     }
583 
584   g_free (result);
585   g_free (result3);
586 }
587 
588 static void
test_unicode_conversions(void)589 test_unicode_conversions (void)
590 {
591   const char *utf8;
592   gunichar ucs4[100];
593   gunichar2 utf16[100];
594 
595   utf8 = "abc";
596   ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
597   utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
598 
599   check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
600   check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
601   check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
602   check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
603   check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
604   check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
605 
606   utf8 = "\316\261\316\262\316\263";
607   ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
608   utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
609 
610   check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
611   check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
612   check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
613   check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
614   check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
615   check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
616 
617   /* partial utf8 character */
618   utf8 = "abc\316";
619   ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
620   utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
621 
622   check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
623   check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
624 
625   /* invalid utf8 */
626   utf8 = "abc\316\316";
627   ucs4[0] = 0;
628   utf16[0] = 0;
629 
630   check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
631   check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
632 
633   /* partial utf16 character */
634   utf8 = "ab";
635   ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
636   utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
637 
638   check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
639   check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
640 
641   /* invalid utf16 */
642   utf8 = NULL;
643   ucs4[0] = 0;
644   utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
645 
646   check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
647   check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
648 
649   /* invalid ucs4 */
650   utf8 = NULL;
651   ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
652   utf16[0] = 0;
653 
654   check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
655   check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
656 }
657 
658 static void
test_filename_utf8(void)659 test_filename_utf8 (void)
660 {
661   const gchar *filename = "/my/path/to/foo";
662   gchar *utf8;
663   gchar *back;
664   GError *error;
665 
666   error = NULL;
667   utf8 = g_filename_to_utf8 (filename, -1, NULL, NULL, &error);
668   g_assert_no_error (error);
669   back = g_filename_from_utf8 (utf8, -1, NULL, NULL, &error);
670   g_assert_no_error (error);
671   g_assert_cmpstr (back, ==, filename);
672 
673   g_free (utf8);
674   g_free (back);
675 }
676 
677 static void
test_filename_display(void)678 test_filename_display (void)
679 {
680   const gchar *filename = "/my/path/to/foo";
681   char *display;
682 
683   display = g_filename_display_basename (filename);
684   g_assert_cmpstr (display, ==, "foo");
685 
686   g_free (display);
687 }
688 
689 /* g_convert() should accept and produce text buffers with embedded
690  * nul bytes/characters.
691  */
692 static void
test_convert_embedded_nul(void)693 test_convert_embedded_nul (void)
694 {
695   gchar *res;
696   gsize bytes_read, bytes_written;
697   GError *error = NULL;
698 
699   res = g_convert ("ab\0\xf6", 4, "UTF-8", "ISO-8859-1",
700                    &bytes_read, &bytes_written, &error);
701   g_assert_no_error (error);
702   g_assert_cmpuint (bytes_read, ==, 4);
703   g_assert_cmpmem (res, bytes_written, "ab\0\xc3\xb6", 5);
704   g_free (res);
705 }
706 
707 static void
test_locale_to_utf8_embedded_nul(void)708 test_locale_to_utf8_embedded_nul (void)
709 {
710   g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
711   g_test_trap_assert_passed ();
712   g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
713   g_test_trap_assert_passed ();
714 }
715 
716 /* Test that embedded nul characters in UTF-8 input to g_locale_to_utf8()
717  * result in an error.
718  */
719 static void
test_locale_to_utf8_embedded_nul_utf8(void)720 test_locale_to_utf8_embedded_nul_utf8 (void)
721 {
722   gchar *res;
723   gsize bytes_read;
724   GError *error = NULL;
725 
726   setlocale (LC_ALL, "");
727   g_setenv ("CHARSET", "UTF-8", TRUE);
728   g_assert_true (g_get_charset (NULL));
729 
730   res = g_locale_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
731 
732   g_assert_null (res);
733   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
734   g_assert_cmpuint (bytes_read, ==, 2);
735   g_error_free (error);
736 }
737 
738 /* Test that embedded nul characters in output of g_locale_to_utf8(),
739  * when converted from non-UTF8 input, result in an error.
740  */
741 static void
test_locale_to_utf8_embedded_nul_iconv(void)742 test_locale_to_utf8_embedded_nul_iconv (void)
743 {
744   gchar *res;
745   GError *error = NULL;
746 
747   setlocale (LC_ALL, "C");
748   g_setenv ("CHARSET", "US-ASCII", TRUE);
749   g_assert_false (g_get_charset (NULL));
750 
751   res = g_locale_to_utf8 ("ab\0c", 4, NULL, NULL, &error);
752 
753   g_assert_null (res);
754   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
755   g_error_free (error);
756 }
757 
758 static void
test_locale_from_utf8_embedded_nul(void)759 test_locale_from_utf8_embedded_nul (void)
760 {
761   g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
762   g_test_trap_assert_passed ();
763   g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
764   g_test_trap_assert_passed ();
765 }
766 
767 /* Test that embedded nul characters in input to g_locale_from_utf8(),
768  * when converting (copying) to UTF-8 output, result in an error.
769  */
770 static void
test_locale_from_utf8_embedded_nul_utf8(void)771 test_locale_from_utf8_embedded_nul_utf8 (void)
772 {
773   gchar *res;
774   gsize bytes_read;
775   GError *error = NULL;
776 
777   setlocale (LC_ALL, "");
778   g_setenv ("CHARSET", "UTF-8", TRUE);
779   g_assert_true (g_get_charset (NULL));
780 
781   res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
782 
783   g_assert_null (res);
784   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
785   g_assert_cmpuint (bytes_read, ==, 2);
786   g_error_free (error);
787 }
788 
789 /* Test that embedded nul characters in input to g_locale_from_utf8(),
790  * when converting to non-UTF-8 output, result in an error.
791  */
792 static void
test_locale_from_utf8_embedded_nul_iconv(void)793 test_locale_from_utf8_embedded_nul_iconv (void)
794 {
795   gchar *res;
796   gsize bytes_read;
797   GError *error = NULL;
798 
799   setlocale (LC_ALL, "C");
800   g_setenv ("CHARSET", "US-ASCII", TRUE);
801   g_assert_false (g_get_charset (NULL));
802 
803   res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
804 
805   g_assert_null (res);
806   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
807   g_assert_cmpuint (bytes_read, ==, 2);
808   g_error_free (error);
809 }
810 
811 static void
test_filename_to_utf8_embedded_nul(void)812 test_filename_to_utf8_embedded_nul (void)
813 {
814   g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
815   g_test_trap_assert_passed ();
816   g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
817   g_test_trap_assert_passed ();
818 }
819 
820 /* Test that embedded nul characters in UTF-8 input to g_filename_to_utf8()
821  * result in an error.
822  */
823 static void
test_filename_to_utf8_embedded_nul_utf8(void)824 test_filename_to_utf8_embedded_nul_utf8 (void)
825 {
826   gchar *res;
827   gsize bytes_read;
828   GError *error = NULL;
829 
830 #ifndef G_OS_WIN32
831   /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
832   g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
833   g_assert_true (g_get_filename_charsets (NULL));
834 #endif
835 
836   res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
837 
838   g_assert_null (res);
839   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
840   g_assert_cmpuint (bytes_read, ==, 2);
841   g_error_free (error);
842 }
843 
844 /* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
845  * result in an error.
846  */
847 static void
test_filename_to_utf8_embedded_nul_iconv(void)848 test_filename_to_utf8_embedded_nul_iconv (void)
849 {
850   gchar *res;
851   gsize bytes_read;
852   GError *error = NULL;
853 
854 #ifndef G_OS_WIN32
855   /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
856   g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
857   g_assert_false (g_get_filename_charsets (NULL));
858 #endif
859 
860   res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
861 
862   g_assert_null (res);
863   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
864   g_assert_cmpuint (bytes_read, ==, 2);
865   g_error_free (error);
866 }
867 
868 static void
test_filename_from_utf8_embedded_nul(void)869 test_filename_from_utf8_embedded_nul (void)
870 {
871   g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
872   g_test_trap_assert_passed ();
873   g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
874   g_test_trap_assert_passed ();
875 }
876 
877 /* Test that embedded nul characters in input to g_filename_from_utf8(),
878  * when converting (copying) to UTF-8 output, result in an error.
879  */
880 static void
test_filename_from_utf8_embedded_nul_utf8(void)881 test_filename_from_utf8_embedded_nul_utf8 (void)
882 {
883   gchar *res;
884   gsize bytes_read;
885   GError *error = NULL;
886 
887 #ifndef G_OS_WIN32
888   /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
889   g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
890   g_assert_true (g_get_filename_charsets (NULL));
891 #endif
892 
893   res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
894 
895   g_assert_null (res);
896   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
897   g_assert_cmpuint (bytes_read, ==, 2);
898   g_error_free (error);
899 }
900 
901 /* Test that embedded nul characters in input to g_filename_from_utf8(),
902  * when converting to non-UTF-8 output, result in an error.
903  */
904 static void
test_filename_from_utf8_embedded_nul_iconv(void)905 test_filename_from_utf8_embedded_nul_iconv (void)
906 {
907   gchar *res;
908   gsize bytes_read;
909   GError *error = NULL;
910 
911 #ifndef G_OS_WIN32
912   /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
913   g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
914   g_assert_false (g_get_filename_charsets (NULL));
915 #endif
916 
917   res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
918 
919   g_assert_null (res);
920   g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
921   g_assert_cmpuint (bytes_read, ==, 2);
922   g_error_free (error);
923 }
924 
925 static void
test_no_conv(void)926 test_no_conv (void)
927 {
928   const gchar *in = "";
929   gchar *out G_GNUC_UNUSED;
930   gsize bytes_read = 0;
931   gsize bytes_written = 0;
932   GError *error = NULL;
933 
934   out = g_convert (in, -1, "XXX", "UVZ",
935                    &bytes_read, &bytes_written, &error);
936 
937   /* error code is unreliable, since we mishandle errno there */
938   g_assert (error && error->domain == G_CONVERT_ERROR);
939   g_error_free (error);
940 }
941 
942 int
main(int argc,char * argv[])943 main (int argc, char *argv[])
944 {
945   g_test_init (&argc, &argv, NULL);
946 
947   g_test_add_func ("/conversion/no-conv", test_no_conv);
948   g_test_add_func ("/conversion/iconv-state", test_iconv_state);
949   g_test_add_func ("/conversion/illegal-sequence", test_one_half);
950   g_test_add_func ("/conversion/byte-order", test_byte_order);
951   g_test_add_func ("/conversion/unicode", test_unicode_conversions);
952   g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
953   g_test_add_func ("/conversion/filename-display", test_filename_display);
954   g_test_add_func ("/conversion/convert-embedded-nul", test_convert_embedded_nul);
955   g_test_add_func ("/conversion/locale-to-utf8/embedded-nul", test_locale_to_utf8_embedded_nul);
956   g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", test_locale_to_utf8_embedded_nul_utf8);
957   g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", test_locale_to_utf8_embedded_nul_iconv);
958   g_test_add_func ("/conversion/locale-from-utf8/embedded-nul", test_locale_from_utf8_embedded_nul);
959   g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", test_locale_from_utf8_embedded_nul_utf8);
960   g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", test_locale_from_utf8_embedded_nul_iconv);
961   g_test_add_func ("/conversion/filename-to-utf8/embedded-nul", test_filename_to_utf8_embedded_nul);
962   g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", test_filename_to_utf8_embedded_nul_utf8);
963   g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", test_filename_to_utf8_embedded_nul_iconv);
964   g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul);
965   g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_filename_from_utf8_embedded_nul_utf8);
966   g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_filename_from_utf8_embedded_nul_iconv);
967 
968   return g_test_run ();
969 }
970