1 /*
2 * Copyright (C) 2003 Red Hat, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 /* The interfaces in this file are subject to change at any time. */
20
21 #include "vice.h"
22
23 #include <sys/types.h>
24 #include <errno.h>
25 #include <string.h>
26 #include <glib.h>
27 #include "buffer.h"
28 #include "vteconv.h"
29 #include "vtedefines.hh"
30
31 typedef size_t (*convert_func)(GIConv converter,
32 const guchar **inbuf,
33 gsize *inbytes_left,
34 guchar **outbuf,
35 gsize *outbytes_left);
36 struct _VteConv {
37 GIConv conv;
38 convert_func convert;
39 gint (*close)(GIConv converter);
40 gboolean in_unichar, out_unichar;
41 VteByteArray *in_scratch, *out_scratch;
42 };
43
44 /* We can't use g_utf8_strlen as that's not nul-safe :( */
_vte_conv_utf8_strlen(const gchar * p,gssize max)45 static gsize _vte_conv_utf8_strlen(const gchar *p, gssize max)
46 {
47 const gchar *q = p + max;
48 gsize length = 0;
49 while (p < q) {
50 p = g_utf8_next_char(p);
51 length++;
52 }
53 return length;
54 }
55
56 /* A variant of g_utf8_validate() that allows NUL characters.
57 * Requires that max_len >= 0 && end != NULL. */
_vte_conv_utf8_validate(const gchar * str,gssize max_len,const gchar ** end)58 static gboolean _vte_conv_utf8_validate(const gchar *str,
59 gssize max_len,
60 const gchar **end)
61 {
62 gboolean ret;
63 do {
64 ret = g_utf8_validate(str, max_len, end);
65 max_len -= *end - str;
66 str = *end;
67 /* Hitting a NUL is okay. Clear the error and iterate over them. */
68 while (max_len > 0 && *str == '\0') {
69 ret = TRUE;
70 max_len--;
71 str++;
72 *end = str;
73 }
74 } while (ret && max_len > 0);
75 return ret;
76 }
77
78 /* A variant of g_utf8_get_char_validated() that allows NUL characters.
79 * Requires that max_len >= 0. */
_vte_conv_utf8_get_char_validated(const gchar * p,gssize max_len)80 static gunichar _vte_conv_utf8_get_char_validated(const gchar *p, gssize max_len) {
81 gunichar ret;
82 /* Handle NUL at the beginning. */
83 if (max_len > 0 && p[0] == '\0') {
84 return 0;
85 }
86 ret = g_utf8_get_char_validated(p, max_len);
87 /* If a partial match is returned but there's a NUL in the buffer
88 * then this is a wrong error, we're facing an invalid character. */
89 if (ret == (gunichar) -2 && memchr(p, '\0', max_len) != NULL) {
90 ret = (gunichar) -1;
91 }
92 return ret;
93 }
94
95 /* A bogus UTF-8 to UTF-8 conversion function which attempts to provide the
96 * same semantics as g_iconv(). */
_vte_conv_utf8_utf8(GIConv converter,const gchar ** inbuf,gsize * inbytes_left,gchar ** outbuf,gsize * outbytes_left)97 static size_t _vte_conv_utf8_utf8(GIConv converter,
98 const gchar **inbuf,
99 gsize *inbytes_left,
100 gchar **outbuf,
101 gsize *outbytes_left)
102 {
103 gboolean validated;
104 const gchar *endptr;
105 size_t bytes;
106
107 /* We don't tolerate shenanigans! */
108 g_assert_cmpuint(*outbytes_left, >=, *inbytes_left);
109
110 /* The only error we can throw is EILSEQ, so check for that here. */
111 validated = _vte_conv_utf8_validate(*inbuf, *inbytes_left, &endptr);
112
113 /* Copy whatever data was validated. */
114 bytes = endptr - *inbuf;
115 memcpy(*outbuf, *inbuf, bytes);
116 *inbuf += bytes;
117 *outbuf += bytes;
118 *outbytes_left -= bytes;
119 *inbytes_left -= bytes;
120
121 /* Return 0 (number of non-reversible conversions performed) if everything
122 * looked good, else EILSEQ. */
123 if (validated) {
124 return 0;
125 }
126
127 /* Determine why the end of the string is not valid. */
128 if (_vte_conv_utf8_get_char_validated(*inbuf, *inbytes_left) == (gunichar) -2) {
129 /* Prefix of a valid UTF-8 */
130 errno = EINVAL;
131 } else {
132 /* We had enough bytes to validate the character, and
133 * it failed. It just doesn't look right. */
134 errno = EILSEQ;
135 }
136 return (size_t) -1;
137 }
138
139 /* Open a conversion descriptor which, in addition to normal cases, provides
140 * UTF-8 to UTF-8 conversions and a gunichar-compatible source and target
141 * encoding. */
_vte_conv_open(const char * target,const char * source)142 VteConv _vte_conv_open(const char *target, const char *source)
143 {
144 VteConv ret;
145 GIConv conv;
146 gboolean in_unichar, out_unichar, utf8;
147 const char *real_target, *real_source;
148
149 /* No shenanigans. */
150 g_assert(target != NULL);
151 g_assert(source != NULL);
152 g_assert(strlen(target) > 0);
153 g_assert(strlen(source) > 0);
154
155 /* Assume normal iconv usage. */
156 in_unichar = FALSE;
157 out_unichar = FALSE;
158 real_source = source;
159 real_target = target;
160
161 /* Determine if we need to convert gunichars to UTF-8 on input. */
162 if (strcmp(target, VTE_CONV_GUNICHAR_TYPE) == 0) {
163 real_target = "UTF-8";
164 out_unichar = TRUE;
165 }
166
167 /* Determine if we need to convert UTF-8 to gunichars on output. */
168 if (strcmp(source, VTE_CONV_GUNICHAR_TYPE) == 0) {
169 real_source = "UTF-8";
170 in_unichar = TRUE;
171 }
172
173 /* Determine if this is a UTF-8 to UTF-8 conversion. */
174 utf8 = ((g_ascii_strcasecmp(real_target, "UTF-8") == 0) &&
175 (g_ascii_strcasecmp(real_source, "UTF-8") == 0));
176
177 /* If we're doing UTF-8 to UTF-8, just use a dummy function which
178 * checks for bad data. */
179 conv = NULL;
180 if (!utf8) {
181 char *translit_target = g_strdup_printf ("%s//translit", real_target);
182 conv = g_iconv_open(translit_target, real_source);
183 g_free (translit_target);
184 if (conv == ((GIConv) -1)) {
185 conv = g_iconv_open(real_target, real_source);
186 }
187 if (conv == ((GIConv) -1)) {
188 return VTE_INVALID_CONV;
189 }
190 }
191
192 /* Set up the descriptor. */
193 ret = g_slice_new0(struct _VteConv);
194 if (utf8) {
195 ret->conv = NULL;
196 ret->convert = (convert_func) _vte_conv_utf8_utf8;
197 ret->close = NULL;
198 } else {
199 g_assert((conv != NULL) && (conv != ((GIConv) -1)));
200 ret->conv = conv;
201 ret->convert = (convert_func) g_iconv;
202 ret->close = g_iconv_close;
203 }
204
205 /* Initialize other elements. */
206 ret->in_unichar = in_unichar;
207 ret->out_unichar = out_unichar;
208
209 /* Create scratch buffers. */
210 ret->in_scratch = _vte_byte_array_new();
211 ret->out_scratch = _vte_byte_array_new();
212
213 return ret;
214 }
215
_vte_conv_close(VteConv converter)216 gint _vte_conv_close(VteConv converter)
217 {
218 g_assert(converter != NULL);
219 g_assert(converter != VTE_INVALID_CONV);
220
221 /* Close the underlying descriptor, if there is one. */
222 if (converter->conv != NULL) {
223 g_assert(converter->close != NULL);
224 converter->close(converter->conv);
225 }
226
227 /* Free the scratch buffers. */
228 _vte_byte_array_free(converter->in_scratch);
229 _vte_byte_array_free(converter->out_scratch);
230
231 /* Free the structure itself. */
232 g_slice_free(struct _VteConv, converter);
233
234 return 0;
235 }
236
_vte_conv(VteConv converter,const guchar ** inbuf,gsize * inbytes_left,guchar ** outbuf,gsize * outbytes_left)237 size_t _vte_conv(VteConv converter,
238 const guchar **inbuf, gsize *inbytes_left,
239 guchar **outbuf, gsize *outbytes_left)
240 {
241 size_t ret, tmp;
242 const guchar *work_inbuf_start, *work_inbuf_working;
243 guchar *work_outbuf_start, *work_outbuf_working;
244 gsize work_inbytes, work_outbytes;
245
246 g_assert(converter != NULL);
247 g_assert(converter != VTE_INVALID_CONV);
248
249 work_inbuf_start = work_inbuf_working = *inbuf;
250 work_outbuf_start = work_outbuf_working = *outbuf;
251 work_inbytes = *inbytes_left;
252 work_outbytes = *outbytes_left;
253
254 /* Possibly convert the input data from gunichars to UTF-8. */
255 if (converter->in_unichar) {
256 int i, char_count;
257 guchar *p, *end;
258 gunichar *g;
259
260 /* Make sure the scratch buffer has enough space. */
261 char_count = *inbytes_left / sizeof(gunichar);
262 _vte_byte_array_set_minimum_size(converter->in_scratch, (char_count + 1) * VTE_UTF8_BPC);
263
264 /* Convert the incoming text. */
265 g = (gunichar*) *inbuf;
266 p = converter->in_scratch->data;
267 end = p + (char_count + 1) * VTE_UTF8_BPC;
268 for (i = 0; i < char_count; i++) {
269 p += g_unichar_to_utf8(g[i], (gchar *)p);
270 g_assert(p <= end);
271 }
272 /* Update our working pointers. */
273 work_inbuf_start = converter->in_scratch->data;
274 work_inbuf_working = work_inbuf_start;
275 work_inbytes = p - work_inbuf_start;
276 }
277
278 /* Possibly set the output pointers to point at our scratch buffer. */
279 if (converter->out_unichar) {
280 work_outbytes = *outbytes_left * VTE_UTF8_BPC;
281 _vte_byte_array_set_minimum_size(converter->out_scratch, work_outbytes);
282 work_outbuf_start = converter->out_scratch->data;
283 work_outbuf_working = work_outbuf_start;
284 }
285
286 /* Call the underlying conversion. */
287 ret = 0;
288 do {
289 tmp = converter->convert(converter->conv,
290 &work_inbuf_working,
291 &work_inbytes,
292 &work_outbuf_working,
293 &work_outbytes);
294 if (tmp == (size_t) -1) {
295 /* Check for zero bytes, which we pass right through. */
296 if (errno == EILSEQ) {
297 if ((work_inbytes > 0) &&
298 (work_inbuf_working[0] == '\0') &&
299 (work_outbytes > 0)) {
300 work_outbuf_working[0] = '\0';
301 work_outbuf_working++;
302 work_inbuf_working++;
303 work_outbytes--;
304 work_inbytes--;
305 ret++;
306 } else {
307 /* No go. */
308 ret = -1;
309 break;
310 }
311 } else {
312 ret = -1;
313 break;
314 }
315 } else {
316 ret += tmp;
317 break;
318 }
319 } while (work_inbytes > 0);
320
321 /* We can't handle this particular failure, and it should
322 * never happen. (If it does, our caller needs fixing.) */
323 g_assert((ret != (size_t)-1) || (errno != E2BIG));
324
325 /* Possibly convert the output from UTF-8 to gunichars. */
326 if (converter->out_unichar) {
327 int left = *outbytes_left;
328 gunichar *g;
329 gchar *p;
330
331 g = (gunichar*) *outbuf;
332 for(p = (gchar *)work_outbuf_start; p < (gchar *)work_outbuf_working; p = g_utf8_next_char(p)) {
333 g_assert(left>=0);
334 *g++ = g_utf8_get_char(p);
335 left -= sizeof(gunichar);
336 }
337 *outbytes_left = left;
338 *outbuf = (guchar*) g;
339 } else {
340 /* Pass on the output results. */
341 *outbuf = work_outbuf_working;
342 *outbytes_left -= (work_outbuf_working - work_outbuf_start);
343 }
344
345 /* Advance the input pointer to the right place. */
346 if (converter->in_unichar) {
347 /* Get an idea of how many characters were converted, and
348 * advance the pointer as required. */
349 gsize chars;
350 chars = _vte_conv_utf8_strlen((const gchar *)work_inbuf_start,
351 work_inbuf_working - work_inbuf_start);
352 *inbuf += (sizeof(gunichar) * chars);
353 *inbytes_left -= (sizeof(gunichar) * chars);
354 } else {
355 /* Pass on the input results. */
356 *inbuf = work_inbuf_working;
357 *inbytes_left -= (work_inbuf_working - work_inbuf_start);
358 }
359
360 return ret;
361 }
362
_vte_conv_cu(VteConv converter,const guchar ** inbuf,gsize * inbytes_left,gunichar ** outbuf,gsize * outbytes_left)363 size_t _vte_conv_cu(VteConv converter, const guchar **inbuf, gsize *inbytes_left,
364 gunichar **outbuf, gsize *outbytes_left)
365 {
366 return _vte_conv(converter, inbuf, inbytes_left, (guchar**)outbuf, outbytes_left);
367 }
368
_vte_conv_uu(VteConv converter,const gunichar ** inbuf,gsize * inbytes_left,gunichar ** outbuf,gsize * outbytes_left)369 size_t _vte_conv_uu(VteConv converter, const gunichar **inbuf, gsize *inbytes_left,
370 gunichar **outbuf, gsize *outbytes_left)
371 {
372 return _vte_conv(converter, (const guchar**)inbuf, inbytes_left, (guchar**)outbuf, outbytes_left);
373 }
374
_vte_conv_uc(VteConv converter,const gunichar ** inbuf,gsize * inbytes_left,guchar ** outbuf,gsize * outbytes_left)375 size_t _vte_conv_uc(VteConv converter, const gunichar **inbuf, gsize *inbytes_left, guchar **outbuf, gsize *outbytes_left)
376 {
377 return _vte_conv(converter, (const guchar**)inbuf, inbytes_left, outbuf, outbytes_left);
378 }
379
380 #ifdef VTECONV_MAIN
381
ucs4_strlen(const gunichar * p,gsize max_len)382 static gsize ucs4_strlen(const gunichar *p, gsize max_len)
383 {
384 const gunichar *q = p + max_len;
385 gsize length = 0;
386 while (p < q && *p++ != 0) {
387 length++;
388 }
389 return length;
390 }
clear(gunichar wide[5],gchar narrow[5])391 static void clear(gunichar wide[5], gchar narrow[5])
392 {
393 wide[0] = 'T';
394 wide[1] = 'E';
395 wide[2] = 'S';
396 wide[3] = 'T';
397 wide[4] = '\0';
398 strcpy(narrow, "test");
399 }
400
mixed_strcmp(const gunichar * wide,const guchar * narrow)401 static int mixed_strcmp(const gunichar *wide, const guchar *narrow)
402 {
403 while (*wide && *narrow) {
404 if (*wide != (gunichar)*narrow) {
405 return -1;
406 }
407 wide++;
408 narrow++;
409 }
410 return 0;
411 }
412
413 /* Test _vte_conv_utf8_strlen, especially where it differs from g_utf8_strlen. */
test_utf8_strlen(void)414 static void test_utf8_strlen (void)
415 {
416 g_assert_cmpuint(_vte_conv_utf8_strlen("", 0), ==, 0);
417 g_assert_cmpuint(_vte_conv_utf8_strlen("\0\0\0\0", 4), ==, 4);
418 g_assert_cmpuint(_vte_conv_utf8_strlen("\0A\0\0", 4), ==, 4);
419 g_assert_cmpuint(_vte_conv_utf8_strlen("\0A\0B", 4), ==, 4);
420 g_assert_cmpuint(_vte_conv_utf8_strlen("A\0B\0", 4), ==, 4);
421 g_assert_cmpuint(_vte_conv_utf8_strlen("ABCD", 4), ==, 4);
422 g_assert_cmpuint(_vte_conv_utf8_strlen("ABCDE", 4), ==, 4);
423 g_assert_cmpuint(_vte_conv_utf8_strlen("\xC2\xA0\xC2\xA0", 4), ==, 2);
424 }
425
test_utf8_validate(void)426 static void test_utf8_validate (void)
427 {
428 static const struct {
429 char input[16];
430 gsize ilen;
431 gsize endlen;
432 gboolean validates;
433 } tests[] = {
434 { "\0\0\0", 0, 0, TRUE },
435 { "\0\0\0", 1, 1, TRUE },
436 { "\0\0\0", 3, 3, TRUE },
437
438 { "ab\0cd\0\0ef", 6, 6, TRUE },
439 { "ab\0cd\0\0ef", 7, 7, TRUE },
440 { "ab\0cd\0\0ef", 9, 9, TRUE },
441
442 { "ab\xE2\x94\x80\0\xE2\x94\x80yz", 11, 11, TRUE },
443
444 { "ab\x80\0cd", 6, 2, FALSE },
445
446 { "ab\xE2\0cd", 6, 2, FALSE },
447 };
448 guint i;
449 const char *end;
450
451 for (i = 0; i < G_N_ELEMENTS (tests); i++) {
452 g_assert(_vte_conv_utf8_validate(tests[i].input, tests[i].ilen, &end) == tests[i].validates);
453 g_assert_cmpuint((gsize)(end - tests[i].input), ==, tests[i].endlen);
454 }
455 }
456
457 /* Test _vte_conv_utf8_get_char_validated. */
test_utf8_get_char_validated(void)458 static void test_utf8_get_char_validated (void)
459 {
460 static const guchar mbyte_test_u[] = { 0xe2, 0x94, 0x80 };
461 static const guchar mbyte_test_break_u[] = { 0xe2, 0xe2, 0xe2 };
462 const char *mbyte_test = (const char *)mbyte_test_u;
463 const char *mbyte_test_break = (const char *)mbyte_test_break_u;
464
465 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("", 0), ==, (gunichar)-2);
466 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\0", 1), ==, 0);
467 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 1), ==, (gunichar)-2);
468 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 2), ==, (gunichar)-2);
469 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 3), ==, 0x2500);
470 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 1), ==, (gunichar)-2);
471 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 2), ==, (gunichar)-1);
472 g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 3), ==, (gunichar)-1);
473 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\x80\0", 2), ==, (gunichar)-1);
474 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\0", 2), ==, (gunichar)-1);
475 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\x94\0", 3), ==, (gunichar)-1);
476 g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\x94\x80\0", 4), ==, 0x2500);
477 }
478
479 typedef struct {
480 gunichar wide[8];
481 gssize widelen;
482 gchar narrow[8];
483 gssize narrowlen;
484 char target[16];
485 char source[16];
486 } TestData;
487
test_narrow_narrow(const TestData * tests,gsize n_tests)488 static void test_narrow_narrow (const TestData *tests, gsize n_tests)
489 {
490 VteConv conv;
491 guchar buf[10];
492 const guchar *inbuf;
493 guchar *outbuf;
494 gsize inbytes, outbytes, ret;
495 gsize i;
496
497 for (i = 0; i < n_tests; i++) {
498 memset(buf, 0, sizeof(buf));
499 inbuf = (const guchar *)tests[i].narrow;
500 inbytes = tests[i].narrowlen >= 0 ? tests[i].narrowlen : strlen(tests[i].narrow);
501 outbuf = buf;
502 outbytes = sizeof(buf);
503 conv = _vte_conv_open(tests[i].target, tests[i].source);
504 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
505 g_assert_cmpuint(ret, ==, 0);
506 g_assert_cmpuint(inbytes, ==, 0);
507 g_assert_cmpstr(tests[i].narrow, ==, (char *)buf);
508 _vte_conv_close(conv);
509 }
510 }
511
test_narrow_to_wide(const TestData * tests,gsize n_tests)512 static void test_narrow_to_wide (const TestData *tests, gsize n_tests)
513 {
514 gunichar widebuf[5];
515 VteConv conv;
516 const guchar *inbuf;
517 guchar *outbuf;
518 gsize inbytes, outbytes, ret;
519 gsize i;
520
521 for (i = 0; i < n_tests; i++) {
522 memset(widebuf, 0, sizeof(widebuf));
523 inbuf = (const guchar *)tests[i].narrow;
524 inbytes = tests[i].narrowlen >= 0 ? tests[i].narrowlen : strlen(tests[i].narrow);
525 outbuf = (guchar*) widebuf;
526 outbytes = sizeof(widebuf);
527 conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, tests[i].source);
528 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
529 g_assert_cmpuint(ret, ==, 0);
530 g_assert_cmpuint(inbytes, ==, 0);
531 g_assert_cmpint(mixed_strcmp(widebuf, inbuf), ==, 0);
532 _vte_conv_close(conv);
533 }
534 }
535
test_wide_to_narrow(const TestData * tests,gsize n_tests)536 static void test_wide_to_narrow (const TestData *tests, gsize n_tests)
537 {
538 char buf[10];
539 VteConv conv;
540 const guchar *inbuf;
541 guchar *outbuf;
542 gsize inbytes, outbytes, ret;
543 gsize i;
544
545 for (i = 0; i < n_tests; i++) {
546 memset(buf, 0, sizeof(buf));
547 inbuf = (const guchar *)tests[i].wide;
548 inbytes = tests[i].widelen >= 0 ? tests[i].widelen
549 : ucs4_strlen(tests[i].wide, sizeof(tests[i].wide)) * sizeof(gunichar);
550 outbuf = (guchar *)buf;
551 outbytes = sizeof(buf);
552 conv = _vte_conv_open(tests[i].target, VTE_CONV_GUNICHAR_TYPE);
553 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
554 g_assert_cmpuint(ret, ==, 0);
555 g_assert_cmpuint(inbytes, ==, 0);
556 g_assert_cmpint(mixed_strcmp(tests[i].wide, outbuf), ==, 0);
557 _vte_conv_close(conv);
558 }
559 }
560
test_g_iconv_narrow_narrow(void)561 static void test_g_iconv_narrow_narrow (void)
562 {
563 static const TestData tests[] = {
564 { { 0, }, -1, "test", -1, "UTF-8", "ISO-8859-1" },
565 { { 0, }, -1, "test", -1, "ISO-8859-1", "UTF-8" },
566 };
567
568 test_narrow_narrow (tests, G_N_ELEMENTS(tests));
569 }
570
test_g_iconv_narrow_to_wide(void)571 static void test_g_iconv_narrow_to_wide (void)
572 {
573 static const TestData tests[] = {
574 { { 0, }, -1, "test", -1, VTE_CONV_GUNICHAR_TYPE, "ISO-8859-1" },
575 };
576
577 test_narrow_to_wide (tests, G_N_ELEMENTS(tests));
578 }
579
test_g_iconv_wide_to_narrow(void)580 static void test_g_iconv_wide_to_narrow (void)
581 {
582 static const TestData tests[] = {
583 { { 'T', 'E', 'S', 'T', 0 }, -1, "", -1, "ISO-8859-1", VTE_CONV_GUNICHAR_TYPE },
584 };
585
586 test_wide_to_narrow (tests, G_N_ELEMENTS(tests));
587 }
588
test_utf8_to_utf8(void)589 static void test_utf8_to_utf8 (void)
590 {
591 static const TestData tests[] = {
592 { { 0, }, -1, "test", -1, "UTF-8", "UTF-8" },
593 };
594
595 test_narrow_narrow (tests, G_N_ELEMENTS (tests));
596 }
597
test_zero_byte_passthrough(void)598 static void test_zero_byte_passthrough (void)
599 {
600 gunichar wide_test[5];
601 gchar narrow_test[5];
602 VteConv conv;
603 const guchar *inbuf;
604 guchar *outbuf;
605 gsize inbytes, outbytes;
606 int i;
607
608 /* Test zero-byte pass-through. */
609 clear(wide_test, narrow_test);
610 memset(wide_test, 0, sizeof(wide_test));
611 inbuf = (guchar *)wide_test;
612 inbytes = 3 * sizeof(gunichar);
613 outbuf = (guchar *)narrow_test;
614 outbytes = sizeof(narrow_test);
615 conv = _vte_conv_open("UTF-8", VTE_CONV_GUNICHAR_TYPE);
616 i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
617 g_assert(inbytes == 0);
618 if ((narrow_test[0] != 0) ||
619 (narrow_test[1] != 0) ||
620 (narrow_test[2] != 0)) {
621 g_error("Conversion 6 failed.\n");
622 }
623 _vte_conv_close(conv);
624
625 /* Test zero-byte pass-through. */
626 clear(wide_test, narrow_test);
627 memset(wide_test, 'A', sizeof(wide_test));
628 memset(narrow_test, 0, sizeof(narrow_test));
629 inbuf = (guchar *)narrow_test;
630 inbytes = 3;
631 outbuf = (guchar *)wide_test;
632 outbytes = sizeof(wide_test);
633 conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, "UTF-8");
634 i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
635 g_assert(inbytes == 0);
636 if ((wide_test[0] != 0) ||
637 (wide_test[1] != 0) ||
638 (wide_test[2] != 0)) {
639 g_error("Conversion 7 failed.\n");
640 }
641 _vte_conv_close(conv);
642
643 /* Test zero-byte pass-through. */
644 clear(wide_test, narrow_test);
645 memset(wide_test, 'A', sizeof(wide_test));
646 memset(narrow_test, 0, sizeof(narrow_test));
647 inbuf = (guchar *)narrow_test;
648 inbytes = 3;
649 outbuf = (guchar *)wide_test;
650 outbytes = sizeof(wide_test);
651 conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, "ISO-8859-1");
652 i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
653 g_assert(inbytes == 0);
654 if ((wide_test[0] != 0) ||
655 (wide_test[1] != 0) ||
656 (wide_test[2] != 0)) {
657 g_error("Conversion 8 failed.\n");
658 }
659 _vte_conv_close(conv);
660 }
661
test_utf8_to_utf8_error(void)662 static void test_utf8_to_utf8_error (void)
663 {
664 static const guchar mbyte_test[] = { 0xe2, 0x94, 0x80 };
665 static const guchar mbyte_test_break[] = { 0xe2, 0xe2, 0xe2 };
666 gchar buf[10];
667 VteConv conv;
668 const guchar *inbuf;
669 guchar *outbuf;
670 gsize inbytes, outbytes;
671 gsize i;
672
673 /* Test UTF-8 to UTF-8 error reporting, valid multibyte. */
674 for (i = 0; i < sizeof(mbyte_test); i++) {
675 int ret;
676 inbuf = mbyte_test;
677 inbytes = i + 1;
678 outbuf = (guchar *)buf;
679 outbytes = sizeof(buf);
680 conv = _vte_conv_open("UTF-8", "UTF-8");
681 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
682 switch (i) {
683 case 0:
684 g_assert_cmpint(ret, ==, -1);
685 g_assert_cmpint(errno, ==, EINVAL);
686 break;
687 case 1:
688 g_assert_cmpint(ret, ==, -1);
689 g_assert_cmpint(errno, ==, EINVAL);
690 break;
691 case 2:
692 g_assert_cmpint(ret, !=, -1);
693 break;
694 default:
695 g_assert_not_reached();
696 break;
697 }
698 _vte_conv_close(conv);
699 }
700
701 /* Test UTF-8 to UTF-8 error reporting, invalid multibyte. */
702 for (i = 0; i < sizeof(mbyte_test_break); i++) {
703 int ret;
704 inbuf = mbyte_test_break;
705 inbytes = i + 1;
706 outbuf = (guchar *)buf;
707 outbytes = sizeof(buf);
708 conv = _vte_conv_open("UTF-8", "UTF-8");
709 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
710 _vte_conv_close(conv);
711 switch (i) {
712 case 0:
713 g_assert_cmpint(ret, ==, -1);
714 g_assert_cmpint(errno, ==, EINVAL);
715 break;
716 case 1:
717 g_assert_cmpint(ret, ==, -1);
718 g_assert_cmpint(errno, ==, EILSEQ);
719 break;
720 case 2:
721 g_assert_cmpint(ret, ==, -1);
722 g_assert_cmpint(errno, ==, EILSEQ);
723 break;
724 default:
725 g_assert_not_reached();
726 break;
727 }
728 }
729 }
730
main(int argc,char * argv[])731 int main (int argc, char *argv[])
732 {
733 g_test_init (&argc, &argv, nullptr);
734
735 g_test_add_func ("/vte/conv/utf8/strlen", test_utf8_strlen);
736 g_test_add_func ("/vte/conv/utf8/validate", test_utf8_validate);
737 g_test_add_func ("/vte/conv/utf8/get-char", test_utf8_get_char_validated);
738 g_test_add_func ("/vte/conv/utf8/conversion", test_utf8_to_utf8);
739 g_test_add_func ("/vte/conv/utf8/conversion-with-error", test_utf8_to_utf8_error);
740 g_test_add_func ("/vte/conv/narrow-narrow", test_g_iconv_narrow_narrow);
741 g_test_add_func ("/vte/conv/narrow-to-wide", test_g_iconv_narrow_to_wide);
742 g_test_add_func ("/vte/conv/wide-to-narrow", test_g_iconv_wide_to_narrow);
743 g_test_add_func ("/vte/conv/zero-byte-passthrough", test_zero_byte_passthrough);
744
745 return g_test_run ();
746 }
747 #endif
748