1 /*
2 * Copyright (C) 2003 Red Hat, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 /* The interfaces in this file are subject to change at any time. */
20
21 #include "config.h"
22
23 #include <sys/types.h>
24 #include <errno.h>
25 #include <string.h>
26 #include <glib.h>
27 #include "buffer.h"
28 #include "vteconv.h"
29
30 #ifdef VTE_COMPILATION
31 #include "vte-private.h"
32 #else
33 #define VTE_UTF8_BPC (6) /* Maximum number of bytes used per UTF-8 character */
34 #endif
35
36 typedef size_t (*convert_func)(GIConv converter,
37 const guchar **inbuf,
38 gsize *inbytes_left,
39 guchar **outbuf,
40 gsize *outbytes_left);
41 struct _VteConv {
42 GIConv conv;
43 convert_func convert;
44 gint (*close)(GIConv converter);
45 gboolean in_unichar, out_unichar;
46 VteByteArray *in_scratch, *out_scratch;
47 };
48
49 /* We can't use g_utf8_strlen as that's not nul-safe :( */
50 static gsize
_vte_conv_utf8_strlen(const gchar * p,gssize max)51 _vte_conv_utf8_strlen(const gchar *p, gssize max)
52 {
53 const gchar *q = p + max;
54 gsize length = 0;
55 while (p < q) {
56 p = g_utf8_next_char(p);
57 length++;
58 }
59 return length;
60 }
61
62 /* A variant of g_utf8_validate() that allows NUL characters.
63 * Requires that max_len >= 0 && end != NULL. */
64 static gboolean
_vte_conv_utf8_validate(const gchar * str,gssize max_len,const gchar ** end)65 _vte_conv_utf8_validate(const gchar *str,
66 gssize max_len,
67 const gchar **end)
68 {
69 gboolean ret;
70 do {
71 ret = g_utf8_validate(str, max_len, end);
72 max_len -= *end - str;
73 str = *end;
74 /* Hitting a NUL is okay. Clear the error and iterate over them. */
75 while (max_len > 0 && *str == '\0') {
76 ret = TRUE;
77 max_len--;
78 str++;
79 *end = str;
80 }
81 } while (ret && max_len > 0);
82 return ret;
83 }
84
85 /* A variant of g_utf8_get_char_validated() that allows NUL characters.
86 * Requires that max_len >= 0. */
87 static gunichar
_vte_conv_utf8_get_char_validated(const gchar * p,gssize max_len)88 _vte_conv_utf8_get_char_validated(const gchar *p,
89 gssize max_len) {
90 gunichar ret;
91 /* Handle NUL at the beginning. */
92 if (max_len > 0 && p[0] == '\0')
93 return 0;
94 ret = g_utf8_get_char_validated(p, max_len);
95 /* If a partial match is returned but there's a NUL in the buffer
96 * then this is a wrong error, we're facing an invalid character. */
97 if (ret == (gunichar) -2 && memchr(p, '\0', max_len) != NULL)
98 ret = (gunichar) -1;
99 return ret;
100 }
101
102 /* A bogus UTF-8 to UTF-8 conversion function which attempts to provide the
103 * same semantics as g_iconv(). */
104 static size_t
_vte_conv_utf8_utf8(GIConv converter,const gchar ** inbuf,gsize * inbytes_left,gchar ** outbuf,gsize * outbytes_left)105 _vte_conv_utf8_utf8(GIConv converter,
106 const gchar **inbuf,
107 gsize *inbytes_left,
108 gchar **outbuf,
109 gsize *outbytes_left)
110 {
111 gboolean validated;
112 const gchar *endptr;
113 size_t bytes;
114
115 /* We don't tolerate shenanigans! */
116 g_assert_cmpuint(*outbytes_left, >=, *inbytes_left);
117
118 /* The only error we can throw is EILSEQ, so check for that here. */
119 validated = _vte_conv_utf8_validate(*inbuf, *inbytes_left, &endptr);
120
121 /* Copy whatever data was validated. */
122 bytes = endptr - *inbuf;
123 memcpy(*outbuf, *inbuf, bytes);
124 *inbuf += bytes;
125 *outbuf += bytes;
126 *outbytes_left -= bytes;
127 *inbytes_left -= bytes;
128
129 /* Return 0 (number of non-reversible conversions performed) if everything
130 * looked good, else EILSEQ. */
131 if (validated) {
132 return 0;
133 }
134
135 /* Determine why the end of the string is not valid. */
136 if (_vte_conv_utf8_get_char_validated(*inbuf, *inbytes_left) == (gunichar) -2) {
137 /* Prefix of a valid UTF-8 */
138 errno = EINVAL;
139 } else {
140 /* We had enough bytes to validate the character, and
141 * it failed. It just doesn't look right. */
142 errno = EILSEQ;
143 }
144 return (size_t) -1;
145 }
146
147 /* Open a conversion descriptor which, in addition to normal cases, provides
148 * UTF-8 to UTF-8 conversions and a gunichar-compatible source and target
149 * encoding. */
150 VteConv
_vte_conv_open(const char * target,const char * source)151 _vte_conv_open(const char *target, const char *source)
152 {
153 VteConv ret;
154 GIConv conv;
155 gboolean in_unichar, out_unichar, utf8;
156 const char *real_target, *real_source;
157
158 /* No shenanigans. */
159 g_assert(target != NULL);
160 g_assert(source != NULL);
161 g_assert(strlen(target) > 0);
162 g_assert(strlen(source) > 0);
163
164 /* Assume normal iconv usage. */
165 in_unichar = FALSE;
166 out_unichar = FALSE;
167 real_source = source;
168 real_target = target;
169
170 /* Determine if we need to convert gunichars to UTF-8 on input. */
171 if (strcmp(target, VTE_CONV_GUNICHAR_TYPE) == 0) {
172 real_target = "UTF-8";
173 out_unichar = TRUE;
174 }
175
176 /* Determine if we need to convert UTF-8 to gunichars on output. */
177 if (strcmp(source, VTE_CONV_GUNICHAR_TYPE) == 0) {
178 real_source = "UTF-8";
179 in_unichar = TRUE;
180 }
181
182 /* Determine if this is a UTF-8 to UTF-8 conversion. */
183 utf8 = ((g_ascii_strcasecmp(real_target, "UTF-8") == 0) &&
184 (g_ascii_strcasecmp(real_source, "UTF-8") == 0));
185
186 /* If we're doing UTF-8 to UTF-8, just use a dummy function which
187 * checks for bad data. */
188 conv = NULL;
189 if (!utf8) {
190 char *translit_target = g_strdup_printf ("%s//translit", real_target);
191 conv = g_iconv_open(translit_target, real_source);
192 g_free (translit_target);
193 if (conv == ((GIConv) -1)) {
194 conv = g_iconv_open(real_target, real_source);
195 }
196 if (conv == ((GIConv) -1)) {
197 return VTE_INVALID_CONV;
198 }
199 }
200
201 /* Set up the descriptor. */
202 ret = g_slice_new0(struct _VteConv);
203 if (utf8) {
204 ret->conv = NULL;
205 ret->convert = (convert_func) _vte_conv_utf8_utf8;
206 ret->close = NULL;
207 } else {
208 g_assert((conv != NULL) && (conv != ((GIConv) -1)));
209 ret->conv = conv;
210 ret->convert = (convert_func) g_iconv;
211 ret->close = g_iconv_close;
212 }
213
214 /* Initialize other elements. */
215 ret->in_unichar = in_unichar;
216 ret->out_unichar = out_unichar;
217
218 /* Create scratch buffers. */
219 ret->in_scratch = _vte_byte_array_new();
220 ret->out_scratch = _vte_byte_array_new();
221
222 return ret;
223 }
224
225 gint
_vte_conv_close(VteConv converter)226 _vte_conv_close(VteConv converter)
227 {
228 g_assert(converter != NULL);
229 g_assert(converter != VTE_INVALID_CONV);
230
231 /* Close the underlying descriptor, if there is one. */
232 if (converter->conv != NULL) {
233 g_assert(converter->close != NULL);
234 converter->close(converter->conv);
235 }
236
237 /* Free the scratch buffers. */
238 _vte_byte_array_free(converter->in_scratch);
239 _vte_byte_array_free(converter->out_scratch);
240
241 /* Free the structure itself. */
242 g_slice_free(struct _VteConv, converter);
243
244 return 0;
245 }
246
247 size_t
_vte_conv(VteConv converter,const guchar ** inbuf,gsize * inbytes_left,guchar ** outbuf,gsize * outbytes_left)248 _vte_conv(VteConv converter,
249 const guchar **inbuf, gsize *inbytes_left,
250 guchar **outbuf, gsize *outbytes_left)
251 {
252 size_t ret, tmp;
253 const guchar *work_inbuf_start, *work_inbuf_working;
254 guchar *work_outbuf_start, *work_outbuf_working;
255 gsize work_inbytes, work_outbytes;
256
257 g_assert(converter != NULL);
258 g_assert(converter != VTE_INVALID_CONV);
259
260 work_inbuf_start = work_inbuf_working = *inbuf;
261 work_outbuf_start = work_outbuf_working = *outbuf;
262 work_inbytes = *inbytes_left;
263 work_outbytes = *outbytes_left;
264
265 /* Possibly convert the input data from gunichars to UTF-8. */
266 if (converter->in_unichar) {
267 int i, char_count;
268 guchar *p, *end;
269 gunichar *g;
270 /* Make sure the scratch buffer has enough space. */
271 char_count = *inbytes_left / sizeof(gunichar);
272 _vte_byte_array_set_minimum_size(converter->in_scratch,
273 (char_count + 1) * VTE_UTF8_BPC);
274 /* Convert the incoming text. */
275 g = (gunichar*) *inbuf;
276 p = converter->in_scratch->data;
277 end = p + (char_count + 1) * VTE_UTF8_BPC;
278 for (i = 0; i < char_count; i++) {
279 p += g_unichar_to_utf8(g[i], (gchar *)p);
280 g_assert(p <= end);
281 }
282 /* Update our working pointers. */
283 work_inbuf_start = converter->in_scratch->data;
284 work_inbuf_working = work_inbuf_start;
285 work_inbytes = p - work_inbuf_start;
286 }
287
288 /* Possibly set the output pointers to point at our scratch buffer. */
289 if (converter->out_unichar) {
290 work_outbytes = *outbytes_left * VTE_UTF8_BPC;
291 _vte_byte_array_set_minimum_size(converter->out_scratch,
292 work_outbytes);
293 work_outbuf_start = converter->out_scratch->data;
294 work_outbuf_working = work_outbuf_start;
295 }
296
297 /* Call the underlying conversion. */
298 ret = 0;
299 do {
300 tmp = converter->convert(converter->conv,
301 &work_inbuf_working,
302 &work_inbytes,
303 &work_outbuf_working,
304 &work_outbytes);
305 if (tmp == (size_t) -1) {
306 /* Check for zero bytes, which we pass right through. */
307 if (errno == EILSEQ) {
308 if ((work_inbytes > 0) &&
309 (work_inbuf_working[0] == '\0') &&
310 (work_outbytes > 0)) {
311 work_outbuf_working[0] = '\0';
312 work_outbuf_working++;
313 work_inbuf_working++;
314 work_outbytes--;
315 work_inbytes--;
316 ret++;
317 } else {
318 /* No go. */
319 ret = -1;
320 break;
321 }
322 } else {
323 ret = -1;
324 break;
325 }
326 } else {
327 ret += tmp;
328 break;
329 }
330 } while (work_inbytes > 0);
331
332 /* We can't handle this particular failure, and it should
333 * never happen. (If it does, our caller needs fixing.) */
334 g_assert((ret != (size_t)-1) || (errno != E2BIG));
335
336 /* Possibly convert the output from UTF-8 to gunichars. */
337 if (converter->out_unichar) {
338 int left = *outbytes_left;
339 gunichar *g;
340 gchar *p;
341
342 g = (gunichar*) *outbuf;
343 for(p = (gchar *)work_outbuf_start;
344 p < (gchar *)work_outbuf_working;
345 p = g_utf8_next_char(p)) {
346 g_assert(left>=0);
347 *g++ = g_utf8_get_char(p);
348 left -= sizeof(gunichar);
349 }
350 *outbytes_left = left;
351 *outbuf = (guchar*) g;
352 } else {
353 /* Pass on the output results. */
354 *outbuf = work_outbuf_working;
355 *outbytes_left -= (work_outbuf_working - work_outbuf_start);
356 }
357
358 /* Advance the input pointer to the right place. */
359 if (converter->in_unichar) {
360 /* Get an idea of how many characters were converted, and
361 * advance the pointer as required. */
362 gsize chars;
363 chars = _vte_conv_utf8_strlen((const gchar *)work_inbuf_start,
364 work_inbuf_working - work_inbuf_start);
365 *inbuf += (sizeof(gunichar) * chars);
366 *inbytes_left -= (sizeof(gunichar) * chars);
367 } else {
368 /* Pass on the input results. */
369 *inbuf = work_inbuf_working;
370 *inbytes_left -= (work_inbuf_working - work_inbuf_start);
371 }
372
373 return ret;
374 }
375
376 size_t
_vte_conv_cu(VteConv converter,const guchar ** inbuf,gsize * inbytes_left,gunichar ** outbuf,gsize * outbytes_left)377 _vte_conv_cu(VteConv converter,
378 const guchar **inbuf, gsize *inbytes_left,
379 gunichar **outbuf, gsize *outbytes_left)
380 {
381 return _vte_conv(converter,
382 inbuf, inbytes_left,
383 (guchar**)outbuf, outbytes_left);
384 }
385
386 size_t
_vte_conv_uu(VteConv converter,const gunichar ** inbuf,gsize * inbytes_left,gunichar ** outbuf,gsize * outbytes_left)387 _vte_conv_uu(VteConv converter,
388 const gunichar **inbuf, gsize *inbytes_left,
389 gunichar **outbuf, gsize *outbytes_left)
390 {
391 return _vte_conv(converter,
392 (const guchar**)inbuf, inbytes_left,
393 (guchar**)outbuf, outbytes_left);
394 }
395
396 size_t
_vte_conv_uc(VteConv converter,const gunichar ** inbuf,gsize * inbytes_left,guchar ** outbuf,gsize * outbytes_left)397 _vte_conv_uc(VteConv converter,
398 const gunichar **inbuf, gsize *inbytes_left,
399 guchar **outbuf, gsize *outbytes_left)
400 {
401 return _vte_conv(converter,
402 (const guchar**)inbuf, inbytes_left,
403 outbuf, outbytes_left);
404 }
405
406 #ifdef VTECONV_MAIN
407
408 static gsize
ucs4_strlen(gunichar * p,gsize max_len)409 ucs4_strlen(gunichar *p,
410 gsize max_len)
411 {
412 gunichar *q = p + max_len;
413 gsize length = 0;
414 while (p < q && *p++ != 0)
415 length++;
416 return length;
417 }
418 static void
clear(gunichar wide[5],gchar narrow[5])419 clear(gunichar wide[5], gchar narrow[5])
420 {
421 wide[0] = 'T';
422 wide[1] = 'E';
423 wide[2] = 'S';
424 wide[3] = 'T';
425 wide[4] = '\0';
426 strcpy(narrow, "test");
427 }
428
429 static int
mixed_strcmp(gunichar * wide,gchar * narrow)430 mixed_strcmp(gunichar *wide, gchar *narrow)
431 {
432 while (*wide && *narrow) {
433 if (*wide != *narrow) {
434 return -1;
435 }
436 wide++;
437 narrow++;
438 }
439 return 0;
440 }
441
442 /* Test _vte_conv_utf8_strlen, especially where it differs from g_utf8_strlen. */
443 static void
test_utf8_strlen(void)444 test_utf8_strlen (void)
445 {
446 g_assert_cmpuint(_vte_conv_utf8_strlen("", 0), ==, 0);
447 g_assert_cmpuint(_vte_conv_utf8_strlen("\0\0\0\0", 4), ==, 4);
448 g_assert_cmpuint(_vte_conv_utf8_strlen("\0A\0\0", 4), ==, 4);
449 g_assert_cmpuint(_vte_conv_utf8_strlen("\0A\0B", 4), ==, 4);
450 g_assert_cmpuint(_vte_conv_utf8_strlen("A\0B\0", 4), ==, 4);
451 g_assert_cmpuint(_vte_conv_utf8_strlen("ABCD", 4), ==, 4);
452 g_assert_cmpuint(_vte_conv_utf8_strlen("ABCDE", 4), ==, 4);
453 g_assert_cmpuint(_vte_conv_utf8_strlen("\xC2\xA0\xC2\xA0", 4), ==, 2);
454 }
455
456 static void
test_utf8_validate(void)457 test_utf8_validate (void)
458 {
459 static const struct {
460 char input[16];
461 gsize ilen;
462 gsize endlen;
463 gboolean validates;
464 } tests[] = {
465 { "\0\0\0", 0, 0, TRUE },
466 { "\0\0\0", 1, 1, TRUE },
467 { "\0\0\0", 3, 3, TRUE },
468
469 { "ab\0cd\0\0ef", 6, 6, TRUE },
470 { "ab\0cd\0\0ef", 7, 7, TRUE },
471 { "ab\0cd\0\0ef", 9, 9, TRUE },
472
473 { "ab\xE2\x94\x80\0\xE2\x94\x80yz", 11, 11, TRUE },
474
475 { "ab\x80\0cd", 6, 2, FALSE },
476
477 { "ab\xE2\0cd", 6, 2, FALSE },
478 };
479 guint i;
480 const char *end;
481
482 for (i = 0; i < G_N_ELEMENTS (tests); i++) {
483 g_assert(_vte_conv_utf8_validate(tests[i].input, tests[i].ilen, &end) == tests[i].validates);
484 g_assert_cmpuint((gsize)(end - tests[i].input), ==, tests[i].endlen);
485 }
486 }
487
488 /* Test _vte_conv_utf8_get_char_validated. */
489 static void
test_utf8_get_char_validated(void)490 test_utf8_get_char_validated (void)
491 {
492 static const char mbyte_test[] = { 0xe2, 0x94, 0x80 };
493 static const char mbyte_test_break[] = { 0xe2, 0xe2, 0xe2 };
494
495 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("", 0), ==, (gunichar)-2);
496 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\0", 1), ==, 0);
497 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 1), ==, (gunichar)-2);
498 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 2), ==, (gunichar)-2);
499 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 3), ==, 0x2500);
500 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 1), ==, (gunichar)-2);
501 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 2), ==, (gunichar)-1);
502 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 3), ==, (gunichar)-1);
503 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\x80\0", 2), ==, (gunichar)-1);
504 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\0", 2), ==, (gunichar)-1);
505 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\x94\0", 3), ==, (gunichar)-1);
506 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\x94\x80\0", 4), ==, 0x2500);
507 }
508
509 typedef struct {
510 gunichar wide[8];
511 gssize widelen;
512 gchar narrow[8];
513 gssize narrowlen;
514 char target[16];
515 char source[16];
516 } TestData;
517
518 static void
test_narrow_narrow(TestData * tests,gsize n_tests)519 test_narrow_narrow (TestData *tests,
520 gsize n_tests)
521 {
522 VteConv conv;
523 guchar buf[10];
524 const guchar *inbuf;
525 guchar *outbuf;
526 gsize inbytes, outbytes, ret;
527 gsize i;
528
529 for (i = 0; i < n_tests; i++) {
530 memset(buf, 0, sizeof(buf));
531 inbuf = tests[i].narrow;
532 inbytes = tests[i].narrowlen >= 0 ? tests[i].narrowlen : strlen(tests[i].narrow);
533 outbuf = buf;
534 outbytes = sizeof(buf);
535 conv = _vte_conv_open(tests[i].target, tests[i].source);
536 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
537 g_assert_cmpuint(ret, ==, 0);
538 g_assert_cmpuint(inbytes, ==, 0);
539 g_assert_cmpstr(tests[i].narrow, ==, buf);
540 _vte_conv_close(conv);
541 }
542 }
543
544 static void
test_narrow_to_wide(TestData * tests,gsize n_tests)545 test_narrow_to_wide (TestData *tests,
546 gsize n_tests)
547 {
548 gunichar widebuf[5];
549 VteConv conv;
550 const guchar *inbuf;
551 guchar *outbuf;
552 gsize inbytes, outbytes, ret;
553 gsize i;
554
555 for (i = 0; i < n_tests; i++) {
556 memset(widebuf, 0, sizeof(widebuf));
557 inbuf = tests[i].narrow;
558 inbytes = tests[i].narrowlen >= 0 ? tests[i].narrowlen : strlen(tests[i].narrow);
559 outbuf = (gchar*) widebuf;
560 outbytes = sizeof(widebuf);
561 conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, tests[i].source);
562 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
563 g_assert_cmpuint(ret, ==, 0);
564 g_assert_cmpuint(inbytes, ==, 0);
565 g_assert_cmpint(mixed_strcmp(widebuf, tests[i].narrow), ==, 0);
566 _vte_conv_close(conv);
567 }
568 }
569
570 static void
test_wide_to_narrow(TestData * tests,gsize n_tests)571 test_wide_to_narrow (TestData *tests,
572 gsize n_tests)
573 {
574 char buf[10];
575 VteConv conv;
576 const guchar *inbuf;
577 guchar *outbuf;
578 gsize inbytes, outbytes, ret;
579 gsize i;
580
581 for (i = 0; i < n_tests; i++) {
582 memset(buf, 0, sizeof(buf));
583 inbuf = (char*)tests[i].wide;
584 inbytes = tests[i].widelen >= 0 ? tests[i].widelen
585 : ucs4_strlen(tests[i].wide, sizeof(tests[i].wide)) * sizeof(gunichar);
586 outbuf = buf;
587 outbytes = sizeof(buf);
588 conv = _vte_conv_open(tests[i].target, VTE_CONV_GUNICHAR_TYPE);
589 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
590 g_assert_cmpuint(ret, ==, 0);
591 g_assert_cmpuint(inbytes, ==, 0);
592 g_assert_cmpint(mixed_strcmp(tests[i].wide, buf), ==, 0);
593 _vte_conv_close(conv);
594 }
595 }
596
597 static void
test_g_iconv_narrow_narrow(void)598 test_g_iconv_narrow_narrow (void)
599 {
600 static const TestData tests[] = {
601 { { 0, }, -1, "test", -1, "UTF-8", "ISO-8859-1" },
602 { { 0, }, -1, "test", -1, "ISO-8859-1", "UTF-8" },
603 };
604
605 test_narrow_narrow (tests, G_N_ELEMENTS(tests));
606 }
607
608 static void
test_g_iconv_narrow_to_wide(void)609 test_g_iconv_narrow_to_wide (void)
610 {
611 static const TestData tests[] = {
612 { { 0, }, -1, "test", -1, VTE_CONV_GUNICHAR_TYPE, "ISO-8859-1" },
613 };
614
615 test_narrow_to_wide (tests, G_N_ELEMENTS(tests));
616 }
617
618 static void
test_g_iconv_wide_to_narrow(void)619 test_g_iconv_wide_to_narrow (void)
620 {
621 static const TestData tests[] = {
622 { { 'T', 'E', 'S', 'T', 0 }, -1, "", -1, "ISO-8859-1", VTE_CONV_GUNICHAR_TYPE },
623 };
624
625 test_wide_to_narrow (tests, G_N_ELEMENTS(tests));
626 }
627
628 static void
test_utf8_to_utf8(void)629 test_utf8_to_utf8 (void)
630 {
631 static const TestData tests[] = {
632 { { 0, }, -1, "test", -1, "UTF-8", "UTF-8" },
633 };
634
635 test_narrow_narrow (tests, G_N_ELEMENTS (tests));
636 }
637
638 static void
test_zero_byte_passthrough(void)639 test_zero_byte_passthrough (void)
640 {
641 gunichar wide_test[5];
642 gchar narrow_test[5];
643 VteConv conv;
644 const guchar *inbuf;
645 guchar *outbuf;
646 gsize inbytes, outbytes;
647 int i;
648
649 /* Test zero-byte pass-through. */
650 clear(wide_test, narrow_test);
651 memset(wide_test, 0, sizeof(wide_test));
652 inbuf = (gchar*) wide_test;
653 inbytes = 3 * sizeof(gunichar);
654 outbuf = narrow_test;
655 outbytes = sizeof(narrow_test);
656 conv = _vte_conv_open("UTF-8", VTE_CONV_GUNICHAR_TYPE);
657 i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
658 g_assert(inbytes == 0);
659 if ((narrow_test[0] != 0) ||
660 (narrow_test[1] != 0) ||
661 (narrow_test[2] != 0)) {
662 g_error("Conversion 6 failed.\n");
663 }
664 _vte_conv_close(conv);
665
666 /* Test zero-byte pass-through. */
667 clear(wide_test, narrow_test);
668 memset(wide_test, 'A', sizeof(wide_test));
669 memset(narrow_test, 0, sizeof(narrow_test));
670 inbuf = narrow_test;
671 inbytes = 3;
672 outbuf = (char*)wide_test;
673 outbytes = sizeof(wide_test);
674 conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, "UTF-8");
675 i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
676 g_assert(inbytes == 0);
677 if ((wide_test[0] != 0) ||
678 (wide_test[1] != 0) ||
679 (wide_test[2] != 0)) {
680 g_error("Conversion 7 failed.\n");
681 }
682 _vte_conv_close(conv);
683
684 /* Test zero-byte pass-through. */
685 clear(wide_test, narrow_test);
686 memset(wide_test, 'A', sizeof(wide_test));
687 memset(narrow_test, 0, sizeof(narrow_test));
688 inbuf = narrow_test;
689 inbytes = 3;
690 outbuf = (char*)wide_test;
691 outbytes = sizeof(wide_test);
692 conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, "ISO-8859-1");
693 i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
694 g_assert(inbytes == 0);
695 if ((wide_test[0] != 0) ||
696 (wide_test[1] != 0) ||
697 (wide_test[2] != 0)) {
698 g_error("Conversion 8 failed.\n");
699 }
700 _vte_conv_close(conv);
701 }
702
703 static void
test_utf8_to_utf8_error(void)704 test_utf8_to_utf8_error (void)
705 {
706 gchar buf[10];
707 VteConv conv;
708 const guchar *inbuf;
709 guchar *outbuf;
710 gsize inbytes, outbytes;
711 static const char mbyte_test[] = { 0xe2, 0x94, 0x80 };
712 static const char mbyte_test_break[] = { 0xe2, 0xe2, 0xe2 };
713 gsize i;
714
715 /* Test UTF-8 to UTF-8 error reporting, valid multibyte. */
716 for (i = 0; i < sizeof(mbyte_test); i++) {
717 int ret;
718 inbuf = mbyte_test;
719 inbytes = i + 1;
720 outbuf = buf;
721 outbytes = sizeof(buf);
722 conv = _vte_conv_open("UTF-8", "UTF-8");
723 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
724 switch (i) {
725 case 0:
726 g_assert_cmpint(ret, ==, -1);
727 g_assert_cmpint(errno, ==, EINVAL);
728 break;
729 case 1:
730 g_assert_cmpint(ret, ==, -1);
731 g_assert_cmpint(errno, ==, EINVAL);
732 break;
733 case 2:
734 g_assert_cmpint(ret, !=, -1);
735 break;
736 default:
737 g_assert_not_reached();
738 break;
739 }
740 _vte_conv_close(conv);
741 }
742
743 /* Test UTF-8 to UTF-8 error reporting, invalid multibyte. */
744 for (i = 0; i < sizeof(mbyte_test_break); i++) {
745 int ret;
746 inbuf = mbyte_test_break;
747 inbytes = i + 1;
748 outbuf = buf;
749 outbytes = sizeof(buf);
750 conv = _vte_conv_open("UTF-8", "UTF-8");
751 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
752 _vte_conv_close(conv);
753 switch (i) {
754 case 0:
755 g_assert_cmpint(ret, ==, -1);
756 g_assert_cmpint(errno, ==, EINVAL);
757 break;
758 case 1:
759 g_assert_cmpint(ret, ==, -1);
760 g_assert_cmpint(errno, ==, EILSEQ);
761 break;
762 case 2:
763 g_assert_cmpint(ret, ==, -1);
764 g_assert_cmpint(errno, ==, EILSEQ);
765 break;
766 default:
767 g_assert_not_reached();
768 break;
769 }
770 }
771 }
772
773 int
main(int argc,char * argv[])774 main (int argc,
775 char *argv[])
776 {
777 g_test_init (&argc, &argv, NULL);
778
779 g_test_add_func ("/vte/conv/utf8/strlen", test_utf8_strlen);
780 g_test_add_func ("/vte/conv/utf8/validate", test_utf8_validate);
781 g_test_add_func ("/vte/conv/utf8/get-char", test_utf8_get_char_validated);
782 g_test_add_func ("/vte/conv/utf8/conversion", test_utf8_to_utf8);
783 g_test_add_func ("/vte/conv/utf8/conversion-with-error", test_utf8_to_utf8_error);
784 g_test_add_func ("/vte/conv/narrow-narrow", test_g_iconv_narrow_narrow);
785 g_test_add_func ("/vte/conv/narrow-to-wide", test_g_iconv_narrow_to_wide);
786 g_test_add_func ("/vte/conv/wide-to-narrow", test_g_iconv_wide_to_narrow);
787 g_test_add_func ("/vte/conv/zero-byte-passthrough", test_zero_byte_passthrough);
788
789 return g_test_run ();
790 }
791 #endif
792