1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /*
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
23 */
24
25 #undef G_DISABLE_ASSERT
26 #undef G_LOG_DOMAIN
27
28 #include <locale.h>
29 #include <string.h>
30
31 #include <glib.h>
32
33 /* Bug 311337 */
34 static void
test_iconv_state(void)35 test_iconv_state (void)
36 {
37 const gchar *in = "\xf4\xe5\xf8\xe5\xed";
38 const gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
39 gchar *out;
40 gsize bytes_read = 0;
41 gsize bytes_written = 0;
42 GError *error = NULL;
43
44 out = g_convert (in, -1, "UTF-8", "CP1255",
45 &bytes_read, &bytes_written, &error);
46
47 if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
48 return; /* silently skip if CP1255 is not supported, see bug 467707 */
49
50 g_assert_no_error (error);
51 g_assert_cmpint (bytes_read, ==, 5);
52 g_assert_cmpint (bytes_written, ==, 10);
53 g_assert_cmpstr (out, ==, expected);
54 g_free (out);
55 }
56
57 /* Some tests involving "vulgar fraction one half" (U+00BD). This is
58 * represented in UTF-8 as \xC2\xBD, in ISO-8859-1 as \xBD, and is not
59 * represented in ISO-8859-15. */
60 static void
test_one_half(void)61 test_one_half (void)
62 {
63 const gchar *in_utf8 = "\xc2\xbd";
64 gchar *out;
65 gsize bytes_read = 0;
66 gsize bytes_written = 0;
67 GError *error = NULL;
68
69 out = g_convert (in_utf8, -1,
70 "ISO-8859-1", "UTF-8",
71 &bytes_read, &bytes_written,
72 &error);
73
74 g_assert_no_error (error);
75 g_assert_cmpint (bytes_read, ==, 2);
76 g_assert_cmpint (bytes_written, ==, 1);
77 g_assert_cmpstr (out, ==, "\xbd");
78 g_free (out);
79
80 out = g_convert (in_utf8, -1,
81 "ISO-8859-15", "UTF-8",
82 &bytes_read, &bytes_written,
83 &error);
84
85 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
86 g_assert_cmpint (bytes_read, ==, 0);
87 g_assert_cmpint (bytes_written, ==, 0);
88 g_assert_cmpstr (out, ==, NULL);
89 g_clear_error (&error);
90 g_free (out);
91
92 out = g_convert_with_fallback (in_utf8, -1,
93 "ISO8859-15", "UTF-8",
94 "a",
95 &bytes_read, &bytes_written,
96 &error);
97
98 g_assert_no_error (error);
99 g_assert_cmpint (bytes_read, ==, 2);
100 g_assert_cmpint (bytes_written, ==, 1);
101 g_assert_cmpstr (out, ==, "a");
102 g_free (out);
103 }
104
105 static void
test_byte_order(void)106 test_byte_order (void)
107 {
108 gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
109 gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
110 const gchar *expected = "\xce\x93";
111 gchar *out;
112 gsize bytes_read = 0;
113 gsize bytes_written = 0;
114 GError *error = NULL;
115
116 out = g_convert (in_be, sizeof (in_be),
117 "UTF-8", "UTF-16",
118 &bytes_read, &bytes_written,
119 &error);
120
121 g_assert_no_error (error);
122 g_assert_cmpint (bytes_read, ==, 4);
123 g_assert_cmpint (bytes_written, ==, 2);
124 g_assert_cmpstr (out, ==, expected);
125 g_free (out);
126
127 out = g_convert (in_le, sizeof (in_le),
128 "UTF-8", "UTF-16",
129 &bytes_read, &bytes_written,
130 &error);
131
132 g_assert_no_error (error);
133 g_assert_cmpint (bytes_read, ==, 4);
134 g_assert_cmpint (bytes_written, ==, 2);
135 g_assert_cmpstr (out, ==, expected);
136 g_free (out);
137 }
138
139 static void
check_utf8_to_ucs4(const char * utf8,gsize utf8_len,const gunichar * ucs4,glong ucs4_len,glong error_pos)140 check_utf8_to_ucs4 (const char *utf8,
141 gsize utf8_len,
142 const gunichar *ucs4,
143 glong ucs4_len,
144 glong error_pos)
145 {
146 gunichar *result, *result2, *result3;
147 glong items_read, items_read2;
148 glong items_written, items_written2;
149 GError *error, *error2, *error3;
150 gint i;
151
152 if (!error_pos)
153 {
154 /* check the fast conversion */
155 result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
156
157 g_assert_cmpint (items_written, ==, ucs4_len);
158 g_assert (result);
159 for (i = 0; i <= items_written; i++)
160 g_assert (result[i] == ucs4[i]);
161
162 g_free (result);
163 }
164
165 error = NULL;
166 result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
167
168 if (utf8_len == strlen (utf8))
169 {
170 /* check that len == -1 yields identical results */
171 error2 = NULL;
172 result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
173 g_assert (error || items_read2 == items_read);
174 g_assert (error || items_written2 == items_written);
175 g_assert_cmpint (!!result, ==, !!result2);
176 g_assert_cmpint (!!error, ==, !!error2);
177 if (result)
178 for (i = 0; i <= items_written; i++)
179 g_assert (result[i] == result2[i]);
180
181 g_free (result2);
182 if (error2)
183 g_error_free (error2);
184 }
185
186 error3 = NULL;
187 result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
188
189 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
190 {
191 g_assert_no_error (error);
192 g_assert_cmpint (items_read, ==, error_pos);
193 g_assert_cmpint (items_written, ==, ucs4_len);
194 g_assert (result);
195 for (i = 0; i <= items_written; i++)
196 g_assert (result[i] == ucs4[i]);
197 g_error_free (error3);
198 }
199 else if (error_pos)
200 {
201 g_assert (error != NULL);
202 g_assert (result == NULL);
203 g_assert_cmpint (items_read, ==, error_pos);
204 g_error_free (error);
205
206 g_assert (error3 != NULL);
207 g_assert (result3 == NULL);
208 g_error_free (error3);
209 }
210 else
211 {
212 g_assert_no_error (error);
213 g_assert_cmpint (items_read, ==, utf8_len);
214 g_assert_cmpint (items_written, ==, ucs4_len);
215 g_assert (result);
216 for (i = 0; i <= items_written; i++)
217 g_assert (result[i] == ucs4[i]);
218
219 g_assert_no_error (error3);
220 g_assert (result3);
221 for (i = 0; i <= ucs4_len; i++)
222 g_assert (result3[i] == ucs4[i]);
223 }
224
225 g_free (result);
226 g_free (result3);
227 }
228
229 static void
check_ucs4_to_utf8(const gunichar * ucs4,glong ucs4_len,const char * utf8,glong utf8_len,glong error_pos)230 check_ucs4_to_utf8 (const gunichar *ucs4,
231 glong ucs4_len,
232 const char *utf8,
233 glong utf8_len,
234 glong error_pos)
235 {
236 gchar *result, *result2, *result3;
237 glong items_read, items_read2;
238 glong items_written, items_written2;
239 GError *error, *error2, *error3;
240
241 error = NULL;
242 result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
243
244 if (ucs4[ucs4_len] == 0)
245 {
246 /* check that len == -1 yields identical results */
247 error2 = NULL;
248 result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
249
250 g_assert (error || items_read2 == items_read);
251 g_assert (error || items_written2 == items_written);
252 g_assert_cmpint (!!result, ==, !!result2);
253 g_assert_cmpint (!!error, ==, !!error2);
254 if (result)
255 g_assert_cmpstr (result, ==, result2);
256
257 g_free (result2);
258 if (error2)
259 g_error_free (error2);
260 }
261
262 error3 = NULL;
263 result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
264
265 if (error_pos)
266 {
267 g_assert (error != NULL);
268 g_assert (result == NULL);
269 g_assert_cmpint (items_read, ==, error_pos);
270 g_error_free (error);
271
272 g_assert (error3 != NULL);
273 g_assert (result3 == NULL);
274 g_error_free (error3);
275 }
276 else
277 {
278 g_assert_no_error (error);
279 g_assert_cmpint (items_read, ==, ucs4_len);
280 g_assert_cmpint (items_written, ==, utf8_len);
281 g_assert (result);
282 g_assert_cmpstr (result, ==, utf8);
283
284 g_assert_no_error (error3);
285 g_assert (result3);
286 g_assert_cmpstr (result3, ==, utf8);
287 }
288
289 g_free (result);
290 g_free (result3);
291 }
292
293 static void
check_utf8_to_utf16(const char * utf8,gsize utf8_len,const gunichar2 * utf16,glong utf16_len,glong error_pos)294 check_utf8_to_utf16 (const char *utf8,
295 gsize utf8_len,
296 const gunichar2 *utf16,
297 glong utf16_len,
298 glong error_pos)
299 {
300 gunichar2 *result, *result2, *result3;
301 glong items_read, items_read2;
302 glong items_written, items_written2;
303 GError *error, *error2, *error3;
304 gint i;
305
306 error = NULL;
307 result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
308
309 if (utf8_len == strlen (utf8))
310 {
311 /* check that len == -1 yields identical results */
312 error2 = NULL;
313 result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
314 g_assert (error || items_read2 == items_read);
315 g_assert (error || items_written2 == items_written);
316 g_assert_cmpint (!!result, ==, !!result2);
317 g_assert_cmpint (!!error, ==, !!error2);
318 if (result)
319 for (i = 0; i <= items_written; i++)
320 g_assert (result[i] == result2[i]);
321
322 g_free (result2);
323 if (error2)
324 g_error_free (error2);
325 }
326
327 error3 = NULL;
328 result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
329
330 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
331 {
332 g_assert_no_error (error);
333 g_assert_cmpint (items_read, ==, error_pos);
334 g_assert_cmpint (items_written, ==, utf16_len);
335 g_assert (result);
336 for (i = 0; i <= items_written; i++)
337 g_assert (result[i] == utf16[i]);
338 g_error_free (error3);
339 }
340 else if (error_pos)
341 {
342 g_assert (error != NULL);
343 g_assert (result == NULL);
344 g_assert_cmpint (items_read, ==, error_pos);
345 g_error_free (error);
346
347 g_assert (error3 != NULL);
348 g_assert (result3 == NULL);
349 g_error_free (error3);
350 }
351 else
352 {
353 g_assert_no_error (error);
354 g_assert_cmpint (items_read, ==, utf8_len);
355 g_assert_cmpint (items_written, ==, utf16_len);
356 g_assert (result);
357 for (i = 0; i <= items_written; i++)
358 g_assert (result[i] == utf16[i]);
359
360 g_assert_no_error (error3);
361 g_assert (result3);
362 for (i = 0; i <= utf16_len; i++)
363 g_assert (result3[i] == utf16[i]);
364 }
365
366 g_free (result);
367 g_free (result3);
368 }
369
370 static void
check_utf16_to_utf8(const gunichar2 * utf16,glong utf16_len,const char * utf8,glong utf8_len,glong error_pos)371 check_utf16_to_utf8 (const gunichar2 *utf16,
372 glong utf16_len,
373 const char *utf8,
374 glong utf8_len,
375 glong error_pos)
376 {
377 gchar *result, *result2, *result3;
378 glong items_read, items_read2;
379 glong items_written, items_written2;
380 GError *error, *error2, *error3;
381
382 error = NULL;
383 result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
384 if (utf16[utf16_len] == 0)
385 {
386 /* check that len == -1 yields identical results */
387 error2 = NULL;
388 result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
389
390 g_assert (error || items_read2 == items_read);
391 g_assert (error || items_written2 == items_written);
392 g_assert_cmpint (!!result, ==, !!result2);
393 g_assert_cmpint (!!error, ==, !!error2);
394 if (result)
395 g_assert_cmpstr (result, ==, result2);
396
397 g_free (result2);
398 if (error2)
399 g_error_free (error2);
400 }
401
402 error3 = NULL;
403 result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
404
405 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
406 {
407 g_assert_no_error (error);
408 g_assert_cmpint (items_read, ==, error_pos);
409 g_assert_cmpint (items_read + 1, ==, utf16_len);
410 g_assert_cmpint (items_written, ==, utf8_len);
411 g_assert (result);
412 g_assert_cmpstr (result, ==, utf8);
413 g_error_free (error3);
414 }
415 else if (error_pos)
416 {
417 g_assert (error != NULL);
418 g_assert (result == NULL);
419 g_assert_cmpint (items_read, ==, error_pos);
420 g_error_free (error);
421
422 g_assert (error3 != NULL);
423 g_assert (result3 == NULL);
424 g_error_free (error3);
425 }
426 else
427 {
428 g_assert_no_error (error);
429 g_assert_cmpint (items_read, ==, utf16_len);
430 g_assert_cmpint (items_written, ==, utf8_len);
431 g_assert (result);
432 g_assert_cmpstr (result, ==, utf8);
433
434 g_assert_no_error (error3);
435 g_assert (result3);
436 g_assert_cmpstr (result3, ==, utf8);
437 }
438
439 g_free (result);
440 g_free (result3);
441 }
442
443 static void
check_ucs4_to_utf16(const gunichar * ucs4,glong ucs4_len,const gunichar2 * utf16,glong utf16_len,glong error_pos)444 check_ucs4_to_utf16 (const gunichar *ucs4,
445 glong ucs4_len,
446 const gunichar2 *utf16,
447 glong utf16_len,
448 glong error_pos)
449 {
450 gunichar2 *result, *result2, *result3;
451 glong items_read, items_read2;
452 glong items_written, items_written2;
453 GError *error, *error2, *error3;
454 gint i;
455
456 error = NULL;
457 result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
458
459 if (ucs4[ucs4_len] == 0)
460 {
461 /* check that len == -1 yields identical results */
462 error2 = NULL;
463 result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
464
465 g_assert (error || items_read2 == items_read);
466 g_assert (error || items_written2 == items_written);
467 g_assert_cmpint (!!result, ==, !!result2);
468 g_assert_cmpint (!!error, ==, !!error2);
469 if (result)
470 for (i = 0; i <= utf16_len; i++)
471 g_assert (result[i] == result2[i]);
472
473 g_free (result2);
474 if (error2)
475 g_error_free (error2);
476 }
477
478 error3 = NULL;
479 result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
480
481 if (error_pos)
482 {
483 g_assert (error != NULL);
484 g_assert (result == NULL);
485 g_assert_cmpint (items_read, ==, error_pos);
486 g_error_free (error);
487
488 g_assert (error3 != NULL);
489 g_assert (result3 == NULL);
490 g_error_free (error3);
491 }
492 else
493 {
494 g_assert_no_error (error);
495 g_assert_cmpint (items_read, ==, ucs4_len);
496 g_assert_cmpint (items_written, ==, utf16_len);
497 g_assert (result);
498 for (i = 0; i <= utf16_len; i++)
499 g_assert (result[i] == utf16[i]);
500
501 g_assert_no_error (error3);
502 g_assert (result3);
503 for (i = 0; i <= utf16_len; i++)
504 g_assert (result3[i] == utf16[i]);
505 }
506
507 g_free (result);
508 g_free (result3);
509 }
510
511 static void
check_utf16_to_ucs4(const gunichar2 * utf16,glong utf16_len,const gunichar * ucs4,glong ucs4_len,glong error_pos)512 check_utf16_to_ucs4 (const gunichar2 *utf16,
513 glong utf16_len,
514 const gunichar *ucs4,
515 glong ucs4_len,
516 glong error_pos)
517 {
518 gunichar *result, *result2, *result3;
519 glong items_read, items_read2;
520 glong items_written, items_written2;
521 GError *error, *error2, *error3;
522 gint i;
523
524 error = NULL;
525 result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
526 if (utf16[utf16_len] == 0)
527 {
528 /* check that len == -1 yields identical results */
529 error2 = NULL;
530 result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
531 g_assert (error || items_read2 == items_read);
532 g_assert (error || items_written2 == items_written);
533 g_assert_cmpint (!!result, ==, !!result2);
534 g_assert_cmpint (!!error, ==, !!error2);
535 if (result)
536 for (i = 0; i <= items_written; i++)
537 g_assert (result[i] == result2[i]);
538
539 g_free (result2);
540 if (error2)
541 g_error_free (error2);
542 }
543
544 error3 = NULL;
545 result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
546
547 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
548 {
549 g_assert_no_error (error);
550 g_assert_cmpint (items_read, ==, error_pos);
551 g_assert_cmpint (items_read + 1, ==, utf16_len);
552 g_assert_cmpint (items_written, ==, ucs4_len);
553 g_assert (result);
554 for (i = 0; i <= items_written; i++)
555 g_assert (result[i] == ucs4[i]);
556 g_error_free (error3);
557 }
558 else if (error_pos)
559 {
560 g_assert (error != NULL);
561 g_assert (result == NULL);
562 g_assert_cmpint (items_read, ==, error_pos);
563 g_error_free (error);
564
565 g_assert (error3 != NULL);
566 g_assert (result3 == NULL);
567 g_error_free (error3);
568 }
569 else
570 {
571 g_assert_no_error (error);
572 g_assert_cmpint (items_read, ==, utf16_len);
573 g_assert_cmpint (items_written, ==, ucs4_len);
574 g_assert (result);
575 for (i = 0; i <= ucs4_len; i++)
576 g_assert (result[i] == ucs4[i]);
577
578 g_assert_no_error (error3);
579 g_assert (result3);
580 for (i = 0; i <= ucs4_len; i++)
581 g_assert (result3[i] == ucs4[i]);
582 }
583
584 g_free (result);
585 g_free (result3);
586 }
587
588 static void
test_unicode_conversions(void)589 test_unicode_conversions (void)
590 {
591 const char *utf8;
592 gunichar ucs4[100];
593 gunichar2 utf16[100];
594
595 utf8 = "abc";
596 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
597 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
598
599 check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
600 check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
601 check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
602 check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
603 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
604 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
605
606 utf8 = "\316\261\316\262\316\263";
607 ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
608 utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
609
610 check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
611 check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
612 check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
613 check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
614 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
615 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
616
617 /* partial utf8 character */
618 utf8 = "abc\316";
619 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
620 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
621
622 check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
623 check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
624
625 /* invalid utf8 */
626 utf8 = "abc\316\316";
627 ucs4[0] = 0;
628 utf16[0] = 0;
629
630 check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
631 check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
632
633 /* partial utf16 character */
634 utf8 = "ab";
635 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
636 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
637
638 check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
639 check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
640
641 /* invalid utf16 */
642 utf8 = NULL;
643 ucs4[0] = 0;
644 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
645
646 check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
647 check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
648
649 /* invalid ucs4 */
650 utf8 = NULL;
651 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
652 utf16[0] = 0;
653
654 check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
655 check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
656 }
657
658 static void
test_filename_utf8(void)659 test_filename_utf8 (void)
660 {
661 const gchar *filename = "/my/path/to/foo";
662 gchar *utf8;
663 gchar *back;
664 GError *error;
665
666 error = NULL;
667 utf8 = g_filename_to_utf8 (filename, -1, NULL, NULL, &error);
668 g_assert_no_error (error);
669 back = g_filename_from_utf8 (utf8, -1, NULL, NULL, &error);
670 g_assert_no_error (error);
671 g_assert_cmpstr (back, ==, filename);
672
673 g_free (utf8);
674 g_free (back);
675 }
676
677 static void
test_filename_display(void)678 test_filename_display (void)
679 {
680 const gchar *filename = "/my/path/to/foo";
681 char *display;
682
683 display = g_filename_display_basename (filename);
684 g_assert_cmpstr (display, ==, "foo");
685
686 g_free (display);
687 }
688
689 /* g_convert() should accept and produce text buffers with embedded
690 * nul bytes/characters.
691 */
692 static void
test_convert_embedded_nul(void)693 test_convert_embedded_nul (void)
694 {
695 gchar *res;
696 gsize bytes_read, bytes_written;
697 GError *error = NULL;
698
699 res = g_convert ("ab\0\xf6", 4, "UTF-8", "ISO-8859-1",
700 &bytes_read, &bytes_written, &error);
701 g_assert_no_error (error);
702 g_assert_cmpuint (bytes_read, ==, 4);
703 g_assert_cmpmem (res, bytes_written, "ab\0\xc3\xb6", 5);
704 g_free (res);
705 }
706
707 static void
test_locale_to_utf8_embedded_nul(void)708 test_locale_to_utf8_embedded_nul (void)
709 {
710 g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
711 g_test_trap_assert_passed ();
712 g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
713 g_test_trap_assert_passed ();
714 }
715
716 /* Test that embedded nul characters in UTF-8 input to g_locale_to_utf8()
717 * result in an error.
718 */
719 static void
test_locale_to_utf8_embedded_nul_utf8(void)720 test_locale_to_utf8_embedded_nul_utf8 (void)
721 {
722 gchar *res;
723 gsize bytes_read;
724 GError *error = NULL;
725
726 setlocale (LC_ALL, "");
727 g_setenv ("CHARSET", "UTF-8", TRUE);
728 g_assert_true (g_get_charset (NULL));
729
730 res = g_locale_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
731
732 g_assert_null (res);
733 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
734 g_assert_cmpuint (bytes_read, ==, 2);
735 g_error_free (error);
736 }
737
738 /* Test that embedded nul characters in output of g_locale_to_utf8(),
739 * when converted from non-UTF8 input, result in an error.
740 */
741 static void
test_locale_to_utf8_embedded_nul_iconv(void)742 test_locale_to_utf8_embedded_nul_iconv (void)
743 {
744 gchar *res;
745 GError *error = NULL;
746
747 setlocale (LC_ALL, "C");
748 g_setenv ("CHARSET", "US-ASCII", TRUE);
749 g_assert_false (g_get_charset (NULL));
750
751 res = g_locale_to_utf8 ("ab\0c", 4, NULL, NULL, &error);
752
753 g_assert_null (res);
754 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
755 g_error_free (error);
756 }
757
758 static void
test_locale_from_utf8_embedded_nul(void)759 test_locale_from_utf8_embedded_nul (void)
760 {
761 g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
762 g_test_trap_assert_passed ();
763 g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
764 g_test_trap_assert_passed ();
765 }
766
767 /* Test that embedded nul characters in input to g_locale_from_utf8(),
768 * when converting (copying) to UTF-8 output, result in an error.
769 */
770 static void
test_locale_from_utf8_embedded_nul_utf8(void)771 test_locale_from_utf8_embedded_nul_utf8 (void)
772 {
773 gchar *res;
774 gsize bytes_read;
775 GError *error = NULL;
776
777 setlocale (LC_ALL, "");
778 g_setenv ("CHARSET", "UTF-8", TRUE);
779 g_assert_true (g_get_charset (NULL));
780
781 res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
782
783 g_assert_null (res);
784 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
785 g_assert_cmpuint (bytes_read, ==, 2);
786 g_error_free (error);
787 }
788
789 /* Test that embedded nul characters in input to g_locale_from_utf8(),
790 * when converting to non-UTF-8 output, result in an error.
791 */
792 static void
test_locale_from_utf8_embedded_nul_iconv(void)793 test_locale_from_utf8_embedded_nul_iconv (void)
794 {
795 gchar *res;
796 gsize bytes_read;
797 GError *error = NULL;
798
799 setlocale (LC_ALL, "C");
800 g_setenv ("CHARSET", "US-ASCII", TRUE);
801 g_assert_false (g_get_charset (NULL));
802
803 res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
804
805 g_assert_null (res);
806 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
807 g_assert_cmpuint (bytes_read, ==, 2);
808 g_error_free (error);
809 }
810
811 static void
test_filename_to_utf8_embedded_nul(void)812 test_filename_to_utf8_embedded_nul (void)
813 {
814 g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
815 g_test_trap_assert_passed ();
816 g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
817 g_test_trap_assert_passed ();
818 }
819
820 /* Test that embedded nul characters in UTF-8 input to g_filename_to_utf8()
821 * result in an error.
822 */
823 static void
test_filename_to_utf8_embedded_nul_utf8(void)824 test_filename_to_utf8_embedded_nul_utf8 (void)
825 {
826 gchar *res;
827 gsize bytes_read;
828 GError *error = NULL;
829
830 #ifndef G_OS_WIN32
831 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
832 g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
833 g_assert_true (g_get_filename_charsets (NULL));
834 #endif
835
836 res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
837
838 g_assert_null (res);
839 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
840 g_assert_cmpuint (bytes_read, ==, 2);
841 g_error_free (error);
842 }
843
844 /* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
845 * result in an error.
846 */
847 static void
test_filename_to_utf8_embedded_nul_iconv(void)848 test_filename_to_utf8_embedded_nul_iconv (void)
849 {
850 gchar *res;
851 gsize bytes_read;
852 GError *error = NULL;
853
854 #ifndef G_OS_WIN32
855 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
856 g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
857 g_assert_false (g_get_filename_charsets (NULL));
858 #endif
859
860 res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
861
862 g_assert_null (res);
863 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
864 g_assert_cmpuint (bytes_read, ==, 2);
865 g_error_free (error);
866 }
867
868 static void
test_filename_from_utf8_embedded_nul(void)869 test_filename_from_utf8_embedded_nul (void)
870 {
871 g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
872 g_test_trap_assert_passed ();
873 g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
874 g_test_trap_assert_passed ();
875 }
876
877 /* Test that embedded nul characters in input to g_filename_from_utf8(),
878 * when converting (copying) to UTF-8 output, result in an error.
879 */
880 static void
test_filename_from_utf8_embedded_nul_utf8(void)881 test_filename_from_utf8_embedded_nul_utf8 (void)
882 {
883 gchar *res;
884 gsize bytes_read;
885 GError *error = NULL;
886
887 #ifndef G_OS_WIN32
888 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
889 g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
890 g_assert_true (g_get_filename_charsets (NULL));
891 #endif
892
893 res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
894
895 g_assert_null (res);
896 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
897 g_assert_cmpuint (bytes_read, ==, 2);
898 g_error_free (error);
899 }
900
901 /* Test that embedded nul characters in input to g_filename_from_utf8(),
902 * when converting to non-UTF-8 output, result in an error.
903 */
904 static void
test_filename_from_utf8_embedded_nul_iconv(void)905 test_filename_from_utf8_embedded_nul_iconv (void)
906 {
907 gchar *res;
908 gsize bytes_read;
909 GError *error = NULL;
910
911 #ifndef G_OS_WIN32
912 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
913 g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
914 g_assert_false (g_get_filename_charsets (NULL));
915 #endif
916
917 res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
918
919 g_assert_null (res);
920 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
921 g_assert_cmpuint (bytes_read, ==, 2);
922 g_error_free (error);
923 }
924
925 static void
test_no_conv(void)926 test_no_conv (void)
927 {
928 const gchar *in = "";
929 gchar *out G_GNUC_UNUSED;
930 gsize bytes_read = 0;
931 gsize bytes_written = 0;
932 GError *error = NULL;
933
934 out = g_convert (in, -1, "XXX", "UVZ",
935 &bytes_read, &bytes_written, &error);
936
937 /* error code is unreliable, since we mishandle errno there */
938 g_assert (error && error->domain == G_CONVERT_ERROR);
939 g_error_free (error);
940 }
941
942 int
main(int argc,char * argv[])943 main (int argc, char *argv[])
944 {
945 g_test_init (&argc, &argv, NULL);
946
947 g_test_add_func ("/conversion/no-conv", test_no_conv);
948 g_test_add_func ("/conversion/iconv-state", test_iconv_state);
949 g_test_add_func ("/conversion/illegal-sequence", test_one_half);
950 g_test_add_func ("/conversion/byte-order", test_byte_order);
951 g_test_add_func ("/conversion/unicode", test_unicode_conversions);
952 g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
953 g_test_add_func ("/conversion/filename-display", test_filename_display);
954 g_test_add_func ("/conversion/convert-embedded-nul", test_convert_embedded_nul);
955 g_test_add_func ("/conversion/locale-to-utf8/embedded-nul", test_locale_to_utf8_embedded_nul);
956 g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", test_locale_to_utf8_embedded_nul_utf8);
957 g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", test_locale_to_utf8_embedded_nul_iconv);
958 g_test_add_func ("/conversion/locale-from-utf8/embedded-nul", test_locale_from_utf8_embedded_nul);
959 g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", test_locale_from_utf8_embedded_nul_utf8);
960 g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", test_locale_from_utf8_embedded_nul_iconv);
961 g_test_add_func ("/conversion/filename-to-utf8/embedded-nul", test_filename_to_utf8_embedded_nul);
962 g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", test_filename_to_utf8_embedded_nul_utf8);
963 g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", test_filename_to_utf8_embedded_nul_iconv);
964 g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul);
965 g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_filename_from_utf8_embedded_nul_utf8);
966 g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_filename_from_utf8_embedded_nul_iconv);
967
968 return g_test_run ();
969 }
970