1 /*
2  * Copyright (C) 2003 Red Hat, Inc.
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 /* The interfaces in this file are subject to change at any time. */
20 
21 #include "vice.h"
22 
23 #include <sys/types.h>
24 #include <errno.h>
25 #include <string.h>
26 #include <glib.h>
27 #include "buffer.h"
28 #include "vteconv.h"
29 #include "vtedefines.hh"
30 
31 typedef size_t (*convert_func)(GIConv converter,
32                                 const guchar **inbuf,
33                                 gsize *inbytes_left,
34                                 guchar **outbuf,
35                                 gsize *outbytes_left);
36 struct _VteConv {
37     GIConv conv;
38     convert_func convert;
39     gint (*close)(GIConv converter);
40     gboolean in_unichar, out_unichar;
41     VteByteArray *in_scratch, *out_scratch;
42 };
43 
44 /* We can't use g_utf8_strlen as that's not nul-safe :( */
_vte_conv_utf8_strlen(const gchar * p,gssize max)45 static gsize _vte_conv_utf8_strlen(const gchar *p, gssize max)
46 {
47     const gchar *q = p + max;
48     gsize length = 0;
49     while (p < q) {
50         p = g_utf8_next_char(p);
51         length++;
52     }
53     return length;
54 }
55 
56 /* A variant of g_utf8_validate() that allows NUL characters.
57  * Requires that max_len >= 0 && end != NULL. */
_vte_conv_utf8_validate(const gchar * str,gssize max_len,const gchar ** end)58 static gboolean _vte_conv_utf8_validate(const gchar *str,
59                                         gssize max_len,
60                                         const gchar **end)
61 {
62     gboolean ret;
63     do {
64         ret = g_utf8_validate(str, max_len, end);
65         max_len -= *end - str;
66         str = *end;
67         /* Hitting a NUL is okay. Clear the error and iterate over them. */
68         while (max_len > 0 && *str == '\0') {
69             ret = TRUE;
70             max_len--;
71             str++;
72             *end = str;
73         }
74     } while (ret && max_len > 0);
75     return ret;
76 }
77 
78 /* A variant of g_utf8_get_char_validated() that allows NUL characters.
79  * Requires that max_len >= 0. */
_vte_conv_utf8_get_char_validated(const gchar * p,gssize max_len)80 static gunichar _vte_conv_utf8_get_char_validated(const gchar *p, gssize max_len) {
81     gunichar ret;
82     /* Handle NUL at the beginning. */
83     if (max_len > 0 && p[0] == '\0') {
84         return 0;
85     }
86     ret = g_utf8_get_char_validated(p, max_len);
87     /* If a partial match is returned but there's a NUL in the buffer
88         * then this is a wrong error, we're facing an invalid character. */
89     if (ret == (gunichar) -2 && memchr(p, '\0', max_len) != NULL) {
90         ret = (gunichar) -1;
91     }
92     return ret;
93 }
94 
95 /* A bogus UTF-8 to UTF-8 conversion function which attempts to provide the
96  * same semantics as g_iconv(). */
_vte_conv_utf8_utf8(GIConv converter,const gchar ** inbuf,gsize * inbytes_left,gchar ** outbuf,gsize * outbytes_left)97 static size_t _vte_conv_utf8_utf8(GIConv converter,
98                                     const gchar **inbuf,
99                                     gsize *inbytes_left,
100                                     gchar **outbuf,
101                                     gsize *outbytes_left)
102 {
103     gboolean validated;
104     const gchar *endptr;
105     size_t bytes;
106 
107     /* We don't tolerate shenanigans! */
108     g_assert_cmpuint(*outbytes_left, >=, *inbytes_left);
109 
110     /* The only error we can throw is EILSEQ, so check for that here. */
111     validated = _vte_conv_utf8_validate(*inbuf, *inbytes_left, &endptr);
112 
113     /* Copy whatever data was validated. */
114     bytes = endptr - *inbuf;
115     memcpy(*outbuf, *inbuf, bytes);
116     *inbuf += bytes;
117     *outbuf += bytes;
118     *outbytes_left -= bytes;
119     *inbytes_left -= bytes;
120 
121     /* Return 0 (number of non-reversible conversions performed) if everything
122      * looked good, else EILSEQ. */
123     if (validated) {
124         return 0;
125     }
126 
127     /* Determine why the end of the string is not valid. */
128     if (_vte_conv_utf8_get_char_validated(*inbuf, *inbytes_left) == (gunichar) -2) {
129         /* Prefix of a valid UTF-8 */
130         errno = EINVAL;
131     } else {
132         /* We had enough bytes to validate the character, and
133          * it failed.  It just doesn't look right. */
134         errno = EILSEQ;
135     }
136     return (size_t) -1;
137 }
138 
139 /* Open a conversion descriptor which, in addition to normal cases, provides
140  * UTF-8 to UTF-8 conversions and a gunichar-compatible source and target
141  * encoding. */
_vte_conv_open(const char * target,const char * source)142 VteConv _vte_conv_open(const char *target, const char *source)
143 {
144     VteConv ret;
145     GIConv conv;
146     gboolean in_unichar, out_unichar, utf8;
147     const char *real_target, *real_source;
148 
149     /* No shenanigans. */
150     g_assert(target != NULL);
151     g_assert(source != NULL);
152     g_assert(strlen(target) > 0);
153     g_assert(strlen(source) > 0);
154 
155     /* Assume normal iconv usage. */
156     in_unichar = FALSE;
157     out_unichar = FALSE;
158     real_source = source;
159     real_target = target;
160 
161     /* Determine if we need to convert gunichars to UTF-8 on input. */
162     if (strcmp(target, VTE_CONV_GUNICHAR_TYPE) == 0) {
163         real_target = "UTF-8";
164         out_unichar = TRUE;
165     }
166 
167     /* Determine if we need to convert UTF-8 to gunichars on output. */
168     if (strcmp(source, VTE_CONV_GUNICHAR_TYPE) == 0) {
169         real_source = "UTF-8";
170         in_unichar = TRUE;
171     }
172 
173     /* Determine if this is a UTF-8 to UTF-8 conversion. */
174     utf8 = ((g_ascii_strcasecmp(real_target, "UTF-8") == 0) &&
175             (g_ascii_strcasecmp(real_source, "UTF-8") == 0));
176 
177     /* If we're doing UTF-8 to UTF-8, just use a dummy function which
178      * checks for bad data. */
179     conv = NULL;
180     if (!utf8) {
181         char *translit_target = g_strdup_printf ("%s//translit", real_target);
182         conv = g_iconv_open(translit_target, real_source);
183         g_free (translit_target);
184         if (conv == ((GIConv) -1)) {
185             conv = g_iconv_open(real_target, real_source);
186         }
187         if (conv == ((GIConv) -1)) {
188             return VTE_INVALID_CONV;
189         }
190     }
191 
192     /* Set up the descriptor. */
193     ret = g_slice_new0(struct _VteConv);
194     if (utf8) {
195         ret->conv = NULL;
196         ret->convert = (convert_func) _vte_conv_utf8_utf8;
197         ret->close = NULL;
198     } else {
199         g_assert((conv != NULL) && (conv != ((GIConv) -1)));
200         ret->conv = conv;
201         ret->convert = (convert_func) g_iconv;
202         ret->close = g_iconv_close;
203     }
204 
205     /* Initialize other elements. */
206     ret->in_unichar = in_unichar;
207     ret->out_unichar = out_unichar;
208 
209     /* Create scratch buffers. */
210     ret->in_scratch = _vte_byte_array_new();
211     ret->out_scratch = _vte_byte_array_new();
212 
213     return ret;
214 }
215 
_vte_conv_close(VteConv converter)216 gint _vte_conv_close(VteConv converter)
217 {
218     g_assert(converter != NULL);
219     g_assert(converter != VTE_INVALID_CONV);
220 
221     /* Close the underlying descriptor, if there is one. */
222     if (converter->conv != NULL) {
223         g_assert(converter->close != NULL);
224         converter->close(converter->conv);
225     }
226 
227     /* Free the scratch buffers. */
228     _vte_byte_array_free(converter->in_scratch);
229     _vte_byte_array_free(converter->out_scratch);
230 
231     /* Free the structure itself. */
232     g_slice_free(struct _VteConv, converter);
233 
234     return 0;
235 }
236 
_vte_conv(VteConv converter,const guchar ** inbuf,gsize * inbytes_left,guchar ** outbuf,gsize * outbytes_left)237 size_t _vte_conv(VteConv converter,
238       const guchar **inbuf, gsize *inbytes_left,
239       guchar **outbuf, gsize *outbytes_left)
240 {
241     size_t ret, tmp;
242     const guchar *work_inbuf_start, *work_inbuf_working;
243     guchar *work_outbuf_start, *work_outbuf_working;
244     gsize work_inbytes, work_outbytes;
245 
246     g_assert(converter != NULL);
247     g_assert(converter != VTE_INVALID_CONV);
248 
249     work_inbuf_start = work_inbuf_working = *inbuf;
250     work_outbuf_start = work_outbuf_working = *outbuf;
251     work_inbytes = *inbytes_left;
252     work_outbytes = *outbytes_left;
253 
254     /* Possibly convert the input data from gunichars to UTF-8. */
255     if (converter->in_unichar) {
256         int i, char_count;
257         guchar *p, *end;
258         gunichar *g;
259 
260         /* Make sure the scratch buffer has enough space. */
261         char_count = *inbytes_left / sizeof(gunichar);
262         _vte_byte_array_set_minimum_size(converter->in_scratch, (char_count + 1) * VTE_UTF8_BPC);
263 
264         /* Convert the incoming text. */
265         g = (gunichar*) *inbuf;
266         p = converter->in_scratch->data;
267         end = p + (char_count + 1) * VTE_UTF8_BPC;
268         for (i = 0; i < char_count; i++) {
269             p += g_unichar_to_utf8(g[i], (gchar *)p);
270             g_assert(p <= end);
271         }
272         /* Update our working pointers. */
273         work_inbuf_start = converter->in_scratch->data;
274         work_inbuf_working = work_inbuf_start;
275         work_inbytes = p - work_inbuf_start;
276     }
277 
278     /* Possibly set the output pointers to point at our scratch buffer. */
279     if (converter->out_unichar) {
280         work_outbytes = *outbytes_left * VTE_UTF8_BPC;
281         _vte_byte_array_set_minimum_size(converter->out_scratch, work_outbytes);
282         work_outbuf_start = converter->out_scratch->data;
283         work_outbuf_working = work_outbuf_start;
284     }
285 
286     /* Call the underlying conversion. */
287     ret = 0;
288     do {
289         tmp = converter->convert(converter->conv,
290                                     &work_inbuf_working,
291                                     &work_inbytes,
292                                     &work_outbuf_working,
293                                     &work_outbytes);
294         if (tmp == (size_t) -1) {
295             /* Check for zero bytes, which we pass right through. */
296             if (errno == EILSEQ) {
297                 if ((work_inbytes > 0) &&
298                     (work_inbuf_working[0] == '\0') &&
299                     (work_outbytes > 0)) {
300                     work_outbuf_working[0] = '\0';
301                     work_outbuf_working++;
302                     work_inbuf_working++;
303                     work_outbytes--;
304                     work_inbytes--;
305                     ret++;
306                 } else {
307                     /* No go. */
308                     ret = -1;
309                     break;
310                 }
311             } else {
312                 ret = -1;
313                 break;
314             }
315         } else {
316             ret += tmp;
317             break;
318         }
319     } while (work_inbytes > 0);
320 
321     /* We can't handle this particular failure, and it should
322      * never happen.  (If it does, our caller needs fixing.)  */
323     g_assert((ret != (size_t)-1) || (errno != E2BIG));
324 
325     /* Possibly convert the output from UTF-8 to gunichars. */
326     if (converter->out_unichar) {
327         int  left = *outbytes_left;
328         gunichar *g;
329         gchar *p;
330 
331         g = (gunichar*) *outbuf;
332         for(p = (gchar *)work_outbuf_start; p < (gchar *)work_outbuf_working; p = g_utf8_next_char(p)) {
333                g_assert(left>=0);
334                *g++ = g_utf8_get_char(p);
335                left -= sizeof(gunichar);
336         }
337         *outbytes_left = left;
338         *outbuf = (guchar*) g;
339     } else {
340         /* Pass on the output results. */
341         *outbuf = work_outbuf_working;
342         *outbytes_left -= (work_outbuf_working - work_outbuf_start);
343     }
344 
345     /* Advance the input pointer to the right place. */
346     if (converter->in_unichar) {
347         /* Get an idea of how many characters were converted, and
348          * advance the pointer as required. */
349         gsize chars;
350         chars = _vte_conv_utf8_strlen((const gchar *)work_inbuf_start,
351                                         work_inbuf_working - work_inbuf_start);
352         *inbuf += (sizeof(gunichar) * chars);
353         *inbytes_left -= (sizeof(gunichar) * chars);
354     } else {
355         /* Pass on the input results. */
356         *inbuf = work_inbuf_working;
357         *inbytes_left -= (work_inbuf_working - work_inbuf_start);
358     }
359 
360     return ret;
361 }
362 
_vte_conv_cu(VteConv converter,const guchar ** inbuf,gsize * inbytes_left,gunichar ** outbuf,gsize * outbytes_left)363 size_t _vte_conv_cu(VteConv converter, const guchar **inbuf, gsize *inbytes_left,
364                     gunichar **outbuf, gsize *outbytes_left)
365 {
366     return _vte_conv(converter, inbuf, inbytes_left, (guchar**)outbuf, outbytes_left);
367 }
368 
_vte_conv_uu(VteConv converter,const gunichar ** inbuf,gsize * inbytes_left,gunichar ** outbuf,gsize * outbytes_left)369 size_t _vte_conv_uu(VteConv converter, const gunichar **inbuf, gsize *inbytes_left,
370                     gunichar **outbuf, gsize *outbytes_left)
371 {
372     return _vte_conv(converter, (const guchar**)inbuf, inbytes_left, (guchar**)outbuf, outbytes_left);
373 }
374 
_vte_conv_uc(VteConv converter,const gunichar ** inbuf,gsize * inbytes_left,guchar ** outbuf,gsize * outbytes_left)375 size_t _vte_conv_uc(VteConv converter, const gunichar **inbuf, gsize *inbytes_left, guchar **outbuf, gsize *outbytes_left)
376 {
377     return _vte_conv(converter, (const guchar**)inbuf, inbytes_left, outbuf, outbytes_left);
378 }
379 
380 #ifdef VTECONV_MAIN
381 
ucs4_strlen(const gunichar * p,gsize max_len)382 static gsize ucs4_strlen(const gunichar *p, gsize max_len)
383 {
384     const gunichar *q = p + max_len;
385     gsize length = 0;
386     while (p < q && *p++ != 0) {
387         length++;
388     }
389     return length;
390 }
clear(gunichar wide[5],gchar narrow[5])391 static void clear(gunichar wide[5], gchar narrow[5])
392 {
393     wide[0] = 'T';
394     wide[1] = 'E';
395     wide[2] = 'S';
396     wide[3] = 'T';
397     wide[4] = '\0';
398     strcpy(narrow, "test");
399 }
400 
mixed_strcmp(const gunichar * wide,const guchar * narrow)401 static int mixed_strcmp(const gunichar *wide, const guchar *narrow)
402 {
403     while (*wide && *narrow) {
404         if (*wide != (gunichar)*narrow) {
405             return -1;
406         }
407         wide++;
408         narrow++;
409     }
410     return 0;
411 }
412 
413 /* Test _vte_conv_utf8_strlen, especially where it differs from g_utf8_strlen. */
test_utf8_strlen(void)414 static void test_utf8_strlen (void)
415 {
416     g_assert_cmpuint(_vte_conv_utf8_strlen("", 0), ==, 0);
417     g_assert_cmpuint(_vte_conv_utf8_strlen("\0\0\0\0", 4), ==, 4);
418     g_assert_cmpuint(_vte_conv_utf8_strlen("\0A\0\0", 4), ==, 4);
419     g_assert_cmpuint(_vte_conv_utf8_strlen("\0A\0B", 4), ==, 4);
420     g_assert_cmpuint(_vte_conv_utf8_strlen("A\0B\0", 4), ==, 4);
421     g_assert_cmpuint(_vte_conv_utf8_strlen("ABCD", 4), ==, 4);
422     g_assert_cmpuint(_vte_conv_utf8_strlen("ABCDE", 4), ==, 4);
423     g_assert_cmpuint(_vte_conv_utf8_strlen("\xC2\xA0\xC2\xA0", 4), ==, 2);
424 }
425 
test_utf8_validate(void)426 static void test_utf8_validate (void)
427 {
428     static const struct {
429         char input[16];
430         gsize ilen;
431         gsize endlen;
432         gboolean validates;
433     } tests[] = {
434         { "\0\0\0", 0, 0, TRUE },
435         { "\0\0\0", 1, 1, TRUE },
436         { "\0\0\0", 3, 3, TRUE },
437 
438         { "ab\0cd\0\0ef", 6, 6, TRUE },
439         { "ab\0cd\0\0ef", 7, 7, TRUE },
440         { "ab\0cd\0\0ef", 9, 9, TRUE },
441 
442         { "ab\xE2\x94\x80\0\xE2\x94\x80yz", 11, 11, TRUE },
443 
444         { "ab\x80\0cd", 6, 2, FALSE },
445 
446         { "ab\xE2\0cd", 6, 2, FALSE },
447     };
448     guint i;
449     const char *end;
450 
451     for (i = 0; i < G_N_ELEMENTS (tests); i++) {
452         g_assert(_vte_conv_utf8_validate(tests[i].input, tests[i].ilen, &end) == tests[i].validates);
453         g_assert_cmpuint((gsize)(end - tests[i].input), ==, tests[i].endlen);
454     }
455 }
456 
457 /* Test _vte_conv_utf8_get_char_validated. */
test_utf8_get_char_validated(void)458 static void test_utf8_get_char_validated (void)
459 {
460         static const guchar mbyte_test_u[] = { 0xe2, 0x94, 0x80 };
461         static const guchar mbyte_test_break_u[] = { 0xe2, 0xe2, 0xe2 };
462         const char *mbyte_test = (const char *)mbyte_test_u;
463         const char *mbyte_test_break = (const char *)mbyte_test_break_u;
464 
465         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("", 0), ==, (gunichar)-2);
466         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\0", 1), ==, 0);
467         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 1), ==, (gunichar)-2);
468         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 2), ==, (gunichar)-2);
469         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 3), ==, 0x2500);
470         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 1), ==, (gunichar)-2);
471         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 2), ==, (gunichar)-1);
472         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 3), ==, (gunichar)-1);
473         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\x80\0", 2), ==, (gunichar)-1);
474         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\0", 2), ==, (gunichar)-1);
475         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\x94\0", 3), ==, (gunichar)-1);
476         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\x94\x80\0", 4), ==, 0x2500);
477 }
478 
479 typedef struct {
480     gunichar wide[8];
481     gssize widelen;
482     gchar narrow[8];
483     gssize narrowlen;
484     char target[16];
485     char source[16];
486 } TestData;
487 
test_narrow_narrow(const TestData * tests,gsize n_tests)488 static void test_narrow_narrow (const TestData *tests, gsize n_tests)
489 {
490     VteConv conv;
491     guchar buf[10];
492     const guchar *inbuf;
493     guchar *outbuf;
494     gsize inbytes, outbytes, ret;
495     gsize i;
496 
497     for (i = 0; i < n_tests; i++) {
498         memset(buf, 0, sizeof(buf));
499         inbuf = (const guchar *)tests[i].narrow;
500         inbytes = tests[i].narrowlen >= 0 ? tests[i].narrowlen : strlen(tests[i].narrow);
501         outbuf = buf;
502         outbytes = sizeof(buf);
503         conv = _vte_conv_open(tests[i].target, tests[i].source);
504         ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
505         g_assert_cmpuint(ret, ==, 0);
506         g_assert_cmpuint(inbytes, ==, 0);
507         g_assert_cmpstr(tests[i].narrow, ==, (char *)buf);
508         _vte_conv_close(conv);
509     }
510 }
511 
test_narrow_to_wide(const TestData * tests,gsize n_tests)512 static void test_narrow_to_wide (const TestData *tests, gsize n_tests)
513 {
514     gunichar widebuf[5];
515     VteConv conv;
516     const guchar *inbuf;
517     guchar *outbuf;
518     gsize inbytes, outbytes, ret;
519     gsize i;
520 
521     for (i = 0; i < n_tests; i++) {
522         memset(widebuf, 0, sizeof(widebuf));
523         inbuf = (const guchar *)tests[i].narrow;
524         inbytes = tests[i].narrowlen >= 0 ? tests[i].narrowlen : strlen(tests[i].narrow);
525         outbuf = (guchar*) widebuf;
526         outbytes = sizeof(widebuf);
527         conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, tests[i].source);
528         ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
529         g_assert_cmpuint(ret, ==, 0);
530         g_assert_cmpuint(inbytes, ==, 0);
531         g_assert_cmpint(mixed_strcmp(widebuf, inbuf), ==, 0);
532         _vte_conv_close(conv);
533     }
534 }
535 
test_wide_to_narrow(const TestData * tests,gsize n_tests)536 static void test_wide_to_narrow (const TestData *tests, gsize n_tests)
537 {
538     char buf[10];
539     VteConv conv;
540     const guchar *inbuf;
541     guchar *outbuf;
542     gsize inbytes, outbytes, ret;
543     gsize i;
544 
545     for (i = 0; i < n_tests; i++) {
546         memset(buf, 0, sizeof(buf));
547         inbuf = (const guchar *)tests[i].wide;
548         inbytes = tests[i].widelen >= 0 ? tests[i].widelen
549                 : ucs4_strlen(tests[i].wide, sizeof(tests[i].wide)) * sizeof(gunichar);
550         outbuf = (guchar *)buf;
551         outbytes = sizeof(buf);
552         conv = _vte_conv_open(tests[i].target, VTE_CONV_GUNICHAR_TYPE);
553         ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
554         g_assert_cmpuint(ret, ==, 0);
555         g_assert_cmpuint(inbytes, ==, 0);
556         g_assert_cmpint(mixed_strcmp(tests[i].wide, outbuf), ==, 0);
557         _vte_conv_close(conv);
558     }
559 }
560 
test_g_iconv_narrow_narrow(void)561 static void test_g_iconv_narrow_narrow (void)
562 {
563     static const TestData tests[] = {
564         { { 0, }, -1, "test", -1, "UTF-8", "ISO-8859-1" },
565         { { 0, }, -1, "test", -1, "ISO-8859-1", "UTF-8" },
566     };
567 
568     test_narrow_narrow (tests, G_N_ELEMENTS(tests));
569 }
570 
test_g_iconv_narrow_to_wide(void)571 static void test_g_iconv_narrow_to_wide (void)
572 {
573     static const TestData tests[] = {
574         { { 0, }, -1, "test", -1, VTE_CONV_GUNICHAR_TYPE, "ISO-8859-1" },
575     };
576 
577     test_narrow_to_wide (tests, G_N_ELEMENTS(tests));
578 }
579 
test_g_iconv_wide_to_narrow(void)580 static void test_g_iconv_wide_to_narrow (void)
581 {
582     static const TestData tests[] = {
583         { { 'T', 'E', 'S', 'T', 0 }, -1, "", -1, "ISO-8859-1", VTE_CONV_GUNICHAR_TYPE },
584     };
585 
586     test_wide_to_narrow (tests, G_N_ELEMENTS(tests));
587 }
588 
test_utf8_to_utf8(void)589 static void test_utf8_to_utf8 (void)
590 {
591     static const TestData tests[] = {
592         { { 0, }, -1, "test", -1, "UTF-8", "UTF-8" },
593     };
594 
595     test_narrow_narrow (tests, G_N_ELEMENTS (tests));
596 }
597 
test_zero_byte_passthrough(void)598 static void test_zero_byte_passthrough (void)
599 {
600     gunichar wide_test[5];
601     gchar narrow_test[5];
602     VteConv conv;
603     const guchar *inbuf;
604     guchar *outbuf;
605     gsize inbytes, outbytes;
606     int i;
607 
608     /* Test zero-byte pass-through. */
609     clear(wide_test, narrow_test);
610     memset(wide_test, 0, sizeof(wide_test));
611     inbuf = (guchar *)wide_test;
612     inbytes = 3 * sizeof(gunichar);
613     outbuf = (guchar *)narrow_test;
614     outbytes = sizeof(narrow_test);
615     conv = _vte_conv_open("UTF-8", VTE_CONV_GUNICHAR_TYPE);
616     i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
617     g_assert(inbytes == 0);
618     if ((narrow_test[0] != 0) ||
619         (narrow_test[1] != 0) ||
620         (narrow_test[2] != 0)) {
621         g_error("Conversion 6 failed.\n");
622     }
623     _vte_conv_close(conv);
624 
625     /* Test zero-byte pass-through. */
626     clear(wide_test, narrow_test);
627     memset(wide_test, 'A', sizeof(wide_test));
628     memset(narrow_test, 0, sizeof(narrow_test));
629     inbuf = (guchar *)narrow_test;
630     inbytes = 3;
631     outbuf = (guchar *)wide_test;
632     outbytes = sizeof(wide_test);
633     conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, "UTF-8");
634     i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
635     g_assert(inbytes == 0);
636     if ((wide_test[0] != 0) ||
637         (wide_test[1] != 0) ||
638         (wide_test[2] != 0)) {
639         g_error("Conversion 7 failed.\n");
640     }
641     _vte_conv_close(conv);
642 
643     /* Test zero-byte pass-through. */
644     clear(wide_test, narrow_test);
645     memset(wide_test, 'A', sizeof(wide_test));
646     memset(narrow_test, 0, sizeof(narrow_test));
647     inbuf = (guchar *)narrow_test;
648     inbytes = 3;
649     outbuf = (guchar *)wide_test;
650     outbytes = sizeof(wide_test);
651     conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, "ISO-8859-1");
652     i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
653     g_assert(inbytes == 0);
654     if ((wide_test[0] != 0) ||
655         (wide_test[1] != 0) ||
656         (wide_test[2] != 0)) {
657         g_error("Conversion 8 failed.\n");
658     }
659     _vte_conv_close(conv);
660 }
661 
test_utf8_to_utf8_error(void)662 static void test_utf8_to_utf8_error (void)
663 {
664     static const guchar mbyte_test[] = { 0xe2, 0x94, 0x80 };
665     static const guchar mbyte_test_break[] = { 0xe2, 0xe2, 0xe2 };
666     gchar buf[10];
667     VteConv conv;
668     const guchar *inbuf;
669     guchar *outbuf;
670     gsize inbytes, outbytes;
671     gsize i;
672 
673     /* Test UTF-8 to UTF-8 error reporting, valid multibyte. */
674     for (i = 0; i < sizeof(mbyte_test); i++) {
675         int ret;
676         inbuf = mbyte_test;
677         inbytes = i + 1;
678         outbuf = (guchar *)buf;
679         outbytes = sizeof(buf);
680         conv = _vte_conv_open("UTF-8", "UTF-8");
681         ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
682         switch (i) {
683             case 0:
684                 g_assert_cmpint(ret, ==, -1);
685                 g_assert_cmpint(errno, ==, EINVAL);
686                 break;
687             case 1:
688                 g_assert_cmpint(ret, ==, -1);
689                 g_assert_cmpint(errno, ==, EINVAL);
690                 break;
691             case 2:
692                 g_assert_cmpint(ret, !=, -1);
693                 break;
694             default:
695                 g_assert_not_reached();
696                 break;
697         }
698         _vte_conv_close(conv);
699     }
700 
701     /* Test UTF-8 to UTF-8 error reporting, invalid multibyte. */
702     for (i = 0; i < sizeof(mbyte_test_break); i++) {
703         int ret;
704         inbuf = mbyte_test_break;
705         inbytes = i + 1;
706         outbuf = (guchar *)buf;
707         outbytes = sizeof(buf);
708         conv = _vte_conv_open("UTF-8", "UTF-8");
709         ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
710         _vte_conv_close(conv);
711         switch (i) {
712             case 0:
713                 g_assert_cmpint(ret, ==, -1);
714                 g_assert_cmpint(errno, ==, EINVAL);
715                 break;
716             case 1:
717                 g_assert_cmpint(ret, ==, -1);
718                 g_assert_cmpint(errno, ==, EILSEQ);
719                 break;
720             case 2:
721                 g_assert_cmpint(ret, ==, -1);
722                 g_assert_cmpint(errno, ==, EILSEQ);
723                 break;
724             default:
725                 g_assert_not_reached();
726                 break;
727         }
728     }
729 }
730 
main(int argc,char * argv[])731 int main (int argc, char *argv[])
732 {
733         g_test_init (&argc, &argv, nullptr);
734 
735         g_test_add_func ("/vte/conv/utf8/strlen", test_utf8_strlen);
736         g_test_add_func ("/vte/conv/utf8/validate", test_utf8_validate);
737         g_test_add_func ("/vte/conv/utf8/get-char", test_utf8_get_char_validated);
738         g_test_add_func ("/vte/conv/utf8/conversion", test_utf8_to_utf8);
739         g_test_add_func ("/vte/conv/utf8/conversion-with-error", test_utf8_to_utf8_error);
740         g_test_add_func ("/vte/conv/narrow-narrow", test_g_iconv_narrow_narrow);
741         g_test_add_func ("/vte/conv/narrow-to-wide", test_g_iconv_narrow_to_wide);
742         g_test_add_func ("/vte/conv/wide-to-narrow", test_g_iconv_wide_to_narrow);
743         g_test_add_func ("/vte/conv/zero-byte-passthrough", test_zero_byte_passthrough);
744 
745     return g_test_run ();
746 }
747 #endif
748