1 /*
2  * Copyright (C) 2003 Red Hat, Inc.
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 /* The interfaces in this file are subject to change at any time. */
20 
21 #include "config.h"
22 
23 #include <sys/types.h>
24 #include <errno.h>
25 #include <string.h>
26 #include <glib.h>
27 #include "buffer.h"
28 #include "vteconv.h"
29 
30 #ifdef VTE_COMPILATION
31 #include "vte-private.h"
32 #else
33 #define VTE_UTF8_BPC                    (6) /* Maximum number of bytes used per UTF-8 character */
34 #endif
35 
36 typedef size_t (*convert_func)(GIConv converter,
37 			  const guchar **inbuf,
38 			  gsize *inbytes_left,
39 			  guchar **outbuf,
40 			  gsize *outbytes_left);
41 struct _VteConv {
42 	GIConv conv;
43 	convert_func convert;
44 	gint (*close)(GIConv converter);
45 	gboolean in_unichar, out_unichar;
46 	VteByteArray *in_scratch, *out_scratch;
47 };
48 
49 /* We can't use g_utf8_strlen as that's not nul-safe :( */
50 static gsize
_vte_conv_utf8_strlen(const gchar * p,gssize max)51 _vte_conv_utf8_strlen(const gchar *p, gssize max)
52 {
53 	const gchar *q = p + max;
54         gsize length = 0;
55         while (p < q) {
56 		p = g_utf8_next_char(p);
57                 length++;
58         }
59 	return length;
60 }
61 
62 /* A variant of g_utf8_validate() that allows NUL characters.
63  * Requires that max_len >= 0 && end != NULL. */
64 static gboolean
_vte_conv_utf8_validate(const gchar * str,gssize max_len,const gchar ** end)65 _vte_conv_utf8_validate(const gchar *str,
66                         gssize max_len,
67                         const gchar **end)
68 {
69         gboolean ret;
70         do {
71                 ret = g_utf8_validate(str, max_len, end);
72                 max_len -= *end - str;
73                 str = *end;
74                 /* Hitting a NUL is okay. Clear the error and iterate over them. */
75                 while (max_len > 0 && *str == '\0') {
76                         ret = TRUE;
77                         max_len--;
78                         str++;
79                         *end = str;
80                 }
81         } while (ret && max_len > 0);
82         return ret;
83 }
84 
85 /* A variant of g_utf8_get_char_validated() that allows NUL characters.
86  * Requires that max_len >= 0. */
87 static gunichar
_vte_conv_utf8_get_char_validated(const gchar * p,gssize max_len)88 _vte_conv_utf8_get_char_validated(const gchar *p,
89                                   gssize max_len) {
90         gunichar ret;
91         /* Handle NUL at the beginning. */
92         if (max_len > 0 && p[0] == '\0')
93                 return 0;
94         ret = g_utf8_get_char_validated(p, max_len);
95         /* If a partial match is returned but there's a NUL in the buffer
96          * then this is a wrong error, we're facing an invalid character. */
97         if (ret == (gunichar) -2 && memchr(p, '\0', max_len) != NULL)
98                 ret = (gunichar) -1;
99         return ret;
100 }
101 
102 /* A bogus UTF-8 to UTF-8 conversion function which attempts to provide the
103  * same semantics as g_iconv(). */
104 static size_t
_vte_conv_utf8_utf8(GIConv converter,const gchar ** inbuf,gsize * inbytes_left,gchar ** outbuf,gsize * outbytes_left)105 _vte_conv_utf8_utf8(GIConv converter,
106 		    const gchar **inbuf,
107 		    gsize *inbytes_left,
108 		    gchar **outbuf,
109 		    gsize *outbytes_left)
110 {
111 	gboolean validated;
112 	const gchar *endptr;
113 	size_t bytes;
114 
115 	/* We don't tolerate shenanigans! */
116 	g_assert_cmpuint(*outbytes_left, >=, *inbytes_left);
117 
118 	/* The only error we can throw is EILSEQ, so check for that here. */
119         validated = _vte_conv_utf8_validate(*inbuf, *inbytes_left, &endptr);
120 
121 	/* Copy whatever data was validated. */
122 	bytes = endptr - *inbuf;
123 	memcpy(*outbuf, *inbuf, bytes);
124 	*inbuf += bytes;
125 	*outbuf += bytes;
126 	*outbytes_left -= bytes;
127 	*inbytes_left -= bytes;
128 
129 	/* Return 0 (number of non-reversible conversions performed) if everything
130 	 * looked good, else EILSEQ. */
131 	if (validated) {
132 		return 0;
133 	}
134 
135 	/* Determine why the end of the string is not valid. */
136         if (_vte_conv_utf8_get_char_validated(*inbuf, *inbytes_left) == (gunichar) -2) {
137 		/* Prefix of a valid UTF-8 */
138 		errno = EINVAL;
139 	} else {
140 		/* We had enough bytes to validate the character, and
141 		 * it failed.  It just doesn't look right. */
142 		errno = EILSEQ;
143 	}
144 	return (size_t) -1;
145 }
146 
147 /* Open a conversion descriptor which, in addition to normal cases, provides
148  * UTF-8 to UTF-8 conversions and a gunichar-compatible source and target
149  * encoding. */
150 VteConv
_vte_conv_open(const char * target,const char * source)151 _vte_conv_open(const char *target, const char *source)
152 {
153 	VteConv ret;
154 	GIConv conv;
155 	gboolean in_unichar, out_unichar, utf8;
156 	const char *real_target, *real_source;
157 
158 	/* No shenanigans. */
159 	g_assert(target != NULL);
160 	g_assert(source != NULL);
161 	g_assert(strlen(target) > 0);
162 	g_assert(strlen(source) > 0);
163 
164 	/* Assume normal iconv usage. */
165 	in_unichar = FALSE;
166 	out_unichar = FALSE;
167 	real_source = source;
168 	real_target = target;
169 
170 	/* Determine if we need to convert gunichars to UTF-8 on input. */
171 	if (strcmp(target, VTE_CONV_GUNICHAR_TYPE) == 0) {
172 		real_target = "UTF-8";
173 		out_unichar = TRUE;
174 	}
175 
176 	/* Determine if we need to convert UTF-8 to gunichars on output. */
177 	if (strcmp(source, VTE_CONV_GUNICHAR_TYPE) == 0) {
178 		real_source = "UTF-8";
179 		in_unichar = TRUE;
180 	}
181 
182 	/* Determine if this is a UTF-8 to UTF-8 conversion. */
183 	utf8 = ((g_ascii_strcasecmp(real_target, "UTF-8") == 0) &&
184 		(g_ascii_strcasecmp(real_source, "UTF-8") == 0));
185 
186 	/* If we're doing UTF-8 to UTF-8, just use a dummy function which
187 	 * checks for bad data. */
188 	conv = NULL;
189 	if (!utf8) {
190 		char *translit_target = g_strdup_printf ("%s//translit", real_target);
191 		conv = g_iconv_open(translit_target, real_source);
192 		g_free (translit_target);
193 		if (conv == ((GIConv) -1)) {
194 			conv = g_iconv_open(real_target, real_source);
195 		}
196 		if (conv == ((GIConv) -1)) {
197 			return VTE_INVALID_CONV;
198 		}
199 	}
200 
201 	/* Set up the descriptor. */
202 	ret = g_slice_new0(struct _VteConv);
203 	if (utf8) {
204 		ret->conv = NULL;
205 		ret->convert = (convert_func) _vte_conv_utf8_utf8;
206 		ret->close = NULL;
207 	} else {
208 		g_assert((conv != NULL) && (conv != ((GIConv) -1)));
209 		ret->conv = conv;
210 		ret->convert = (convert_func) g_iconv;
211 		ret->close = g_iconv_close;
212 	}
213 
214 	/* Initialize other elements. */
215 	ret->in_unichar = in_unichar;
216 	ret->out_unichar = out_unichar;
217 
218 	/* Create scratch buffers. */
219 	ret->in_scratch = _vte_byte_array_new();
220 	ret->out_scratch = _vte_byte_array_new();
221 
222 	return ret;
223 }
224 
225 gint
_vte_conv_close(VteConv converter)226 _vte_conv_close(VteConv converter)
227 {
228 	g_assert(converter != NULL);
229 	g_assert(converter != VTE_INVALID_CONV);
230 
231 	/* Close the underlying descriptor, if there is one. */
232 	if (converter->conv != NULL) {
233 		g_assert(converter->close != NULL);
234 		converter->close(converter->conv);
235 	}
236 
237 	/* Free the scratch buffers. */
238 	_vte_byte_array_free(converter->in_scratch);
239 	_vte_byte_array_free(converter->out_scratch);
240 
241 	/* Free the structure itself. */
242 	g_slice_free(struct _VteConv, converter);
243 
244 	return 0;
245 }
246 
247 size_t
_vte_conv(VteConv converter,const guchar ** inbuf,gsize * inbytes_left,guchar ** outbuf,gsize * outbytes_left)248 _vte_conv(VteConv converter,
249 	  const guchar **inbuf, gsize *inbytes_left,
250 	  guchar **outbuf, gsize *outbytes_left)
251 {
252 	size_t ret, tmp;
253 	const guchar *work_inbuf_start, *work_inbuf_working;
254 	guchar *work_outbuf_start, *work_outbuf_working;
255 	gsize work_inbytes, work_outbytes;
256 
257 	g_assert(converter != NULL);
258 	g_assert(converter != VTE_INVALID_CONV);
259 
260 	work_inbuf_start = work_inbuf_working = *inbuf;
261 	work_outbuf_start = work_outbuf_working = *outbuf;
262 	work_inbytes = *inbytes_left;
263 	work_outbytes = *outbytes_left;
264 
265 	/* Possibly convert the input data from gunichars to UTF-8. */
266 	if (converter->in_unichar) {
267 		int i, char_count;
268 		guchar *p, *end;
269 		gunichar *g;
270 		/* Make sure the scratch buffer has enough space. */
271 		char_count = *inbytes_left / sizeof(gunichar);
272 		_vte_byte_array_set_minimum_size(converter->in_scratch,
273 					     (char_count + 1) * VTE_UTF8_BPC);
274 		/* Convert the incoming text. */
275 		g = (gunichar*) *inbuf;
276 		p = converter->in_scratch->data;
277 		end = p + (char_count + 1) * VTE_UTF8_BPC;
278 		for (i = 0; i < char_count; i++) {
279 			p += g_unichar_to_utf8(g[i], (gchar *)p);
280 			g_assert(p <= end);
281 		}
282 		/* Update our working pointers. */
283 		work_inbuf_start = converter->in_scratch->data;
284 		work_inbuf_working = work_inbuf_start;
285 		work_inbytes = p - work_inbuf_start;
286 	}
287 
288 	/* Possibly set the output pointers to point at our scratch buffer. */
289 	if (converter->out_unichar) {
290 		work_outbytes = *outbytes_left * VTE_UTF8_BPC;
291 		_vte_byte_array_set_minimum_size(converter->out_scratch,
292 					     work_outbytes);
293 		work_outbuf_start = converter->out_scratch->data;
294 		work_outbuf_working = work_outbuf_start;
295 	}
296 
297 	/* Call the underlying conversion. */
298 	ret = 0;
299 	do {
300 		tmp = converter->convert(converter->conv,
301 					 &work_inbuf_working,
302 					 &work_inbytes,
303 					 &work_outbuf_working,
304 					 &work_outbytes);
305 		if (tmp == (size_t) -1) {
306 			/* Check for zero bytes, which we pass right through. */
307 			if (errno == EILSEQ) {
308 				if ((work_inbytes > 0) &&
309 				    (work_inbuf_working[0] == '\0') &&
310 				    (work_outbytes > 0)) {
311 					work_outbuf_working[0] = '\0';
312 					work_outbuf_working++;
313 					work_inbuf_working++;
314 					work_outbytes--;
315 					work_inbytes--;
316 					ret++;
317 				} else {
318 					/* No go. */
319 					ret = -1;
320 					break;
321 				}
322 			} else {
323 				ret = -1;
324 				break;
325 			}
326 		} else {
327 			ret += tmp;
328 			break;
329 		}
330 	} while (work_inbytes > 0);
331 
332 	/* We can't handle this particular failure, and it should
333 	 * never happen.  (If it does, our caller needs fixing.)  */
334 	g_assert((ret != (size_t)-1) || (errno != E2BIG));
335 
336 	/* Possibly convert the output from UTF-8 to gunichars. */
337 	if (converter->out_unichar) {
338 		int  left = *outbytes_left;
339 		gunichar *g;
340 		gchar *p;
341 
342 		g = (gunichar*) *outbuf;
343 		for(p = (gchar *)work_outbuf_start;
344 				p < (gchar *)work_outbuf_working;
345 				p = g_utf8_next_char(p)) {
346 		       g_assert(left>=0);
347 		       *g++ = g_utf8_get_char(p);
348 		       left -= sizeof(gunichar);
349 		}
350 		*outbytes_left = left;
351 		*outbuf = (guchar*) g;
352 	} else {
353 		/* Pass on the output results. */
354 		*outbuf = work_outbuf_working;
355 		*outbytes_left -= (work_outbuf_working - work_outbuf_start);
356 	}
357 
358 	/* Advance the input pointer to the right place. */
359 	if (converter->in_unichar) {
360 		/* Get an idea of how many characters were converted, and
361 		 * advance the pointer as required. */
362 		gsize chars;
363 		chars = _vte_conv_utf8_strlen((const gchar *)work_inbuf_start,
364 					      work_inbuf_working - work_inbuf_start);
365 		*inbuf += (sizeof(gunichar) * chars);
366 		*inbytes_left -= (sizeof(gunichar) * chars);
367 	} else {
368 		/* Pass on the input results. */
369 		*inbuf = work_inbuf_working;
370 		*inbytes_left -= (work_inbuf_working - work_inbuf_start);
371 	}
372 
373 	return ret;
374 }
375 
376 size_t
_vte_conv_cu(VteConv converter,const guchar ** inbuf,gsize * inbytes_left,gunichar ** outbuf,gsize * outbytes_left)377 _vte_conv_cu(VteConv converter,
378 	     const guchar **inbuf, gsize *inbytes_left,
379 	     gunichar **outbuf, gsize *outbytes_left)
380 {
381 	return _vte_conv(converter,
382 			 inbuf, inbytes_left,
383 			 (guchar**)outbuf, outbytes_left);
384 }
385 
386 size_t
_vte_conv_uu(VteConv converter,const gunichar ** inbuf,gsize * inbytes_left,gunichar ** outbuf,gsize * outbytes_left)387 _vte_conv_uu(VteConv converter,
388 	     const gunichar **inbuf, gsize *inbytes_left,
389 	     gunichar **outbuf, gsize *outbytes_left)
390 {
391 	return _vte_conv(converter,
392 			 (const guchar**)inbuf, inbytes_left,
393 			 (guchar**)outbuf, outbytes_left);
394 }
395 
396 size_t
_vte_conv_uc(VteConv converter,const gunichar ** inbuf,gsize * inbytes_left,guchar ** outbuf,gsize * outbytes_left)397 _vte_conv_uc(VteConv converter,
398 	     const gunichar **inbuf, gsize *inbytes_left,
399 	     guchar **outbuf, gsize *outbytes_left)
400 {
401 	return _vte_conv(converter,
402 			 (const guchar**)inbuf, inbytes_left,
403 			 outbuf, outbytes_left);
404 }
405 
406 #ifdef VTECONV_MAIN
407 
408 static gsize
ucs4_strlen(gunichar * p,gsize max_len)409 ucs4_strlen(gunichar *p,
410             gsize max_len)
411 {
412         gunichar *q = p + max_len;
413         gsize length = 0;
414         while (p < q && *p++ != 0)
415                 length++;
416 	return length;
417 }
418 static void
clear(gunichar wide[5],gchar narrow[5])419 clear(gunichar wide[5], gchar narrow[5])
420 {
421 	wide[0] = 'T';
422 	wide[1] = 'E';
423 	wide[2] = 'S';
424 	wide[3] = 'T';
425 	wide[4] = '\0';
426 	strcpy(narrow, "test");
427 }
428 
429 static int
mixed_strcmp(gunichar * wide,gchar * narrow)430 mixed_strcmp(gunichar *wide, gchar *narrow)
431 {
432 	while (*wide && *narrow) {
433 		if (*wide != *narrow) {
434 			return -1;
435 		}
436 		wide++;
437 		narrow++;
438 	}
439 	return 0;
440 }
441 
442 /* Test _vte_conv_utf8_strlen, especially where it differs from g_utf8_strlen. */
443 static void
test_utf8_strlen(void)444 test_utf8_strlen (void)
445 {
446         g_assert_cmpuint(_vte_conv_utf8_strlen("", 0), ==, 0);
447 	g_assert_cmpuint(_vte_conv_utf8_strlen("\0\0\0\0", 4), ==, 4);
448 	g_assert_cmpuint(_vte_conv_utf8_strlen("\0A\0\0", 4), ==, 4);
449 	g_assert_cmpuint(_vte_conv_utf8_strlen("\0A\0B", 4), ==, 4);
450 	g_assert_cmpuint(_vte_conv_utf8_strlen("A\0B\0", 4), ==, 4);
451         g_assert_cmpuint(_vte_conv_utf8_strlen("ABCD", 4), ==, 4);
452 	g_assert_cmpuint(_vte_conv_utf8_strlen("ABCDE", 4), ==, 4);
453         g_assert_cmpuint(_vte_conv_utf8_strlen("\xC2\xA0\xC2\xA0", 4), ==, 2);
454 }
455 
456 static void
test_utf8_validate(void)457 test_utf8_validate (void)
458 {
459         static const struct {
460                 char input[16];
461                 gsize ilen;
462                 gsize endlen;
463                 gboolean validates;
464         } tests[] = {
465                 { "\0\0\0", 0, 0, TRUE },
466                 { "\0\0\0", 1, 1, TRUE },
467                 { "\0\0\0", 3, 3, TRUE },
468 
469                 { "ab\0cd\0\0ef", 6, 6, TRUE },
470                 { "ab\0cd\0\0ef", 7, 7, TRUE },
471                 { "ab\0cd\0\0ef", 9, 9, TRUE },
472 
473                 { "ab\xE2\x94\x80\0\xE2\x94\x80yz", 11, 11, TRUE },
474 
475                 { "ab\x80\0cd", 6, 2, FALSE },
476 
477                 { "ab\xE2\0cd", 6, 2, FALSE },
478         };
479         guint i;
480         const char *end;
481 
482         for (i = 0; i < G_N_ELEMENTS (tests); i++) {
483                 g_assert(_vte_conv_utf8_validate(tests[i].input, tests[i].ilen, &end) == tests[i].validates);
484                 g_assert_cmpuint((gsize)(end - tests[i].input), ==, tests[i].endlen);
485         }
486 }
487 
488 /* Test _vte_conv_utf8_get_char_validated. */
489 static void
test_utf8_get_char_validated(void)490 test_utf8_get_char_validated (void)
491 {
492 	static const char mbyte_test[] = { 0xe2, 0x94, 0x80 };
493 	static const char mbyte_test_break[] = { 0xe2, 0xe2, 0xe2 };
494 
495         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("", 0), ==, (gunichar)-2);
496         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\0", 1), ==, 0);
497         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 1), ==, (gunichar)-2);
498         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 2), ==, (gunichar)-2);
499         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test, 3), ==, 0x2500);
500         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 1), ==, (gunichar)-2);
501         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 2), ==, (gunichar)-1);
502         g_assert_cmpuint(_vte_conv_utf8_get_char_validated(mbyte_test_break, 3), ==, (gunichar)-1);
503         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\x80\0", 2), ==, (gunichar)-1);
504         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\0", 2), ==, (gunichar)-1);
505         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\x94\0", 3), ==, (gunichar)-1);
506         g_assert_cmpuint(_vte_conv_utf8_get_char_validated("\xE2\x94\x80\0", 4), ==, 0x2500);
507 }
508 
509 typedef struct {
510         gunichar wide[8];
511         gssize widelen;
512         gchar narrow[8];
513         gssize narrowlen;
514         char target[16];
515         char source[16];
516 } TestData;
517 
518 static void
test_narrow_narrow(TestData * tests,gsize n_tests)519 test_narrow_narrow (TestData *tests,
520                     gsize n_tests)
521 {
522 	VteConv conv;
523 	guchar buf[10];
524 	const guchar *inbuf;
525 	guchar *outbuf;
526 	gsize inbytes, outbytes, ret;
527         gsize i;
528 
529         for (i = 0; i < n_tests; i++) {
530                 memset(buf, 0, sizeof(buf));
531                 inbuf = tests[i].narrow;
532                 inbytes = tests[i].narrowlen >= 0 ? tests[i].narrowlen : strlen(tests[i].narrow);
533                 outbuf = buf;
534                 outbytes = sizeof(buf);
535                 conv = _vte_conv_open(tests[i].target, tests[i].source);
536                 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
537                 g_assert_cmpuint(ret, ==, 0);
538                 g_assert_cmpuint(inbytes, ==, 0);
539                 g_assert_cmpstr(tests[i].narrow, ==, buf);
540                 _vte_conv_close(conv);
541         }
542 }
543 
544 static void
test_narrow_to_wide(TestData * tests,gsize n_tests)545 test_narrow_to_wide (TestData *tests,
546                      gsize n_tests)
547 {
548         gunichar widebuf[5];
549 	VteConv conv;
550 	const guchar *inbuf;
551 	guchar *outbuf;
552 	gsize inbytes, outbytes, ret;
553         gsize i;
554 
555         for (i = 0; i < n_tests; i++) {
556                 memset(widebuf, 0, sizeof(widebuf));
557                 inbuf = tests[i].narrow;
558                 inbytes = tests[i].narrowlen >= 0 ? tests[i].narrowlen : strlen(tests[i].narrow);
559                 outbuf = (gchar*) widebuf;
560                 outbytes = sizeof(widebuf);
561                 conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, tests[i].source);
562                 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
563                 g_assert_cmpuint(ret, ==, 0);
564                 g_assert_cmpuint(inbytes, ==, 0);
565                 g_assert_cmpint(mixed_strcmp(widebuf, tests[i].narrow), ==, 0);
566                 _vte_conv_close(conv);
567         }
568 }
569 
570 static void
test_wide_to_narrow(TestData * tests,gsize n_tests)571 test_wide_to_narrow (TestData *tests,
572                      gsize n_tests)
573 {
574         char buf[10];
575 	VteConv conv;
576 	const guchar *inbuf;
577 	guchar *outbuf;
578 	gsize inbytes, outbytes, ret;
579         gsize i;
580 
581         for (i = 0; i < n_tests; i++) {
582                 memset(buf, 0, sizeof(buf));
583                 inbuf = (char*)tests[i].wide;
584                 inbytes = tests[i].widelen >= 0 ? tests[i].widelen
585                         : ucs4_strlen(tests[i].wide, sizeof(tests[i].wide)) * sizeof(gunichar);
586                 outbuf = buf;
587                 outbytes = sizeof(buf);
588                 conv = _vte_conv_open(tests[i].target, VTE_CONV_GUNICHAR_TYPE);
589                 ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
590                 g_assert_cmpuint(ret, ==, 0);
591                 g_assert_cmpuint(inbytes, ==, 0);
592                 g_assert_cmpint(mixed_strcmp(tests[i].wide, buf), ==, 0);
593                 _vte_conv_close(conv);
594         }
595 }
596 
597 static void
test_g_iconv_narrow_narrow(void)598 test_g_iconv_narrow_narrow (void)
599 {
600         static const TestData tests[] = {
601                 { { 0, }, -1, "test", -1, "UTF-8", "ISO-8859-1" },
602                 { { 0, }, -1, "test", -1, "ISO-8859-1", "UTF-8" },
603         };
604 
605         test_narrow_narrow (tests, G_N_ELEMENTS(tests));
606 }
607 
608 static void
test_g_iconv_narrow_to_wide(void)609 test_g_iconv_narrow_to_wide (void)
610 {
611         static const TestData tests[] = {
612                 { { 0, }, -1, "test", -1, VTE_CONV_GUNICHAR_TYPE, "ISO-8859-1" },
613         };
614 
615         test_narrow_to_wide (tests, G_N_ELEMENTS(tests));
616 }
617 
618 static void
test_g_iconv_wide_to_narrow(void)619 test_g_iconv_wide_to_narrow (void)
620 {
621         static const TestData tests[] = {
622                 { { 'T', 'E', 'S', 'T', 0 }, -1, "", -1, "ISO-8859-1", VTE_CONV_GUNICHAR_TYPE },
623         };
624 
625         test_wide_to_narrow (tests, G_N_ELEMENTS(tests));
626 }
627 
628 static void
test_utf8_to_utf8(void)629 test_utf8_to_utf8 (void)
630 {
631         static const TestData tests[] = {
632                 { { 0, }, -1, "test", -1, "UTF-8", "UTF-8" },
633         };
634 
635         test_narrow_narrow (tests, G_N_ELEMENTS (tests));
636 }
637 
638 static void
test_zero_byte_passthrough(void)639 test_zero_byte_passthrough (void)
640 {
641 	gunichar wide_test[5];
642 	gchar narrow_test[5];
643 	VteConv conv;
644 	const guchar *inbuf;
645 	guchar *outbuf;
646 	gsize inbytes, outbytes;
647         int i;
648 
649 	/* Test zero-byte pass-through. */
650 	clear(wide_test, narrow_test);
651 	memset(wide_test, 0, sizeof(wide_test));
652 	inbuf = (gchar*) wide_test;
653 	inbytes = 3 * sizeof(gunichar);
654 	outbuf = narrow_test;
655 	outbytes = sizeof(narrow_test);
656 	conv = _vte_conv_open("UTF-8", VTE_CONV_GUNICHAR_TYPE);
657 	i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
658 	g_assert(inbytes == 0);
659 	if ((narrow_test[0] != 0) ||
660 	    (narrow_test[1] != 0) ||
661 	    (narrow_test[2] != 0)) {
662 		g_error("Conversion 6 failed.\n");
663 	}
664 	_vte_conv_close(conv);
665 
666 	/* Test zero-byte pass-through. */
667 	clear(wide_test, narrow_test);
668 	memset(wide_test, 'A', sizeof(wide_test));
669 	memset(narrow_test, 0, sizeof(narrow_test));
670 	inbuf = narrow_test;
671 	inbytes = 3;
672 	outbuf = (char*)wide_test;
673 	outbytes = sizeof(wide_test);
674 	conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, "UTF-8");
675 	i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
676 	g_assert(inbytes == 0);
677 	if ((wide_test[0] != 0) ||
678 	    (wide_test[1] != 0) ||
679 	    (wide_test[2] != 0)) {
680 		g_error("Conversion 7 failed.\n");
681 	}
682 	_vte_conv_close(conv);
683 
684 	/* Test zero-byte pass-through. */
685 	clear(wide_test, narrow_test);
686 	memset(wide_test, 'A', sizeof(wide_test));
687 	memset(narrow_test, 0, sizeof(narrow_test));
688 	inbuf = narrow_test;
689 	inbytes = 3;
690 	outbuf = (char*)wide_test;
691 	outbytes = sizeof(wide_test);
692 	conv = _vte_conv_open(VTE_CONV_GUNICHAR_TYPE, "ISO-8859-1");
693 	i = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
694 	g_assert(inbytes == 0);
695 	if ((wide_test[0] != 0) ||
696 	    (wide_test[1] != 0) ||
697 	    (wide_test[2] != 0)) {
698 		g_error("Conversion 8 failed.\n");
699 	}
700 	_vte_conv_close(conv);
701 }
702 
703 static void
test_utf8_to_utf8_error(void)704 test_utf8_to_utf8_error (void)
705 {
706 	gchar buf[10];
707 	VteConv conv;
708 	const guchar *inbuf;
709 	guchar *outbuf;
710 	gsize inbytes, outbytes;
711 	static const char mbyte_test[] = { 0xe2, 0x94, 0x80 };
712 	static const char mbyte_test_break[] = { 0xe2, 0xe2, 0xe2 };
713 	gsize i;
714 
715 	/* Test UTF-8 to UTF-8 error reporting, valid multibyte. */
716 	for (i = 0; i < sizeof(mbyte_test); i++) {
717 		int ret;
718 		inbuf = mbyte_test;
719 		inbytes = i + 1;
720 		outbuf = buf;
721 		outbytes = sizeof(buf);
722 		conv = _vte_conv_open("UTF-8", "UTF-8");
723 		ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
724 		switch (i) {
725 		case 0:
726                         g_assert_cmpint(ret, ==, -1);
727                         g_assert_cmpint(errno, ==, EINVAL);
728 			break;
729 		case 1:
730                         g_assert_cmpint(ret, ==, -1);
731                         g_assert_cmpint(errno, ==, EINVAL);
732 			break;
733 		case 2:
734                         g_assert_cmpint(ret, !=, -1);
735 			break;
736 		default:
737 			g_assert_not_reached();
738 			break;
739 		}
740 		_vte_conv_close(conv);
741 	}
742 
743 	/* Test UTF-8 to UTF-8 error reporting, invalid multibyte. */
744 	for (i = 0; i < sizeof(mbyte_test_break); i++) {
745 		int ret;
746 		inbuf = mbyte_test_break;
747 		inbytes = i + 1;
748 		outbuf = buf;
749 		outbytes = sizeof(buf);
750 		conv = _vte_conv_open("UTF-8", "UTF-8");
751 		ret = _vte_conv(conv, &inbuf, &inbytes, &outbuf, &outbytes);
752 		_vte_conv_close(conv);
753 		switch (i) {
754 		case 0:
755                         g_assert_cmpint(ret, ==, -1);
756                         g_assert_cmpint(errno, ==, EINVAL);
757 			break;
758 		case 1:
759                         g_assert_cmpint(ret, ==, -1);
760                         g_assert_cmpint(errno, ==, EILSEQ);
761 			break;
762 		case 2:
763                         g_assert_cmpint(ret, ==, -1);
764                         g_assert_cmpint(errno, ==, EILSEQ);
765 			break;
766 		default:
767 			g_assert_not_reached();
768 			break;
769 		}
770 	}
771 }
772 
773 int
main(int argc,char * argv[])774 main (int argc,
775       char *argv[])
776 {
777         g_test_init (&argc, &argv, NULL);
778 
779         g_test_add_func ("/vte/conv/utf8/strlen", test_utf8_strlen);
780         g_test_add_func ("/vte/conv/utf8/validate", test_utf8_validate);
781         g_test_add_func ("/vte/conv/utf8/get-char", test_utf8_get_char_validated);
782         g_test_add_func ("/vte/conv/utf8/conversion", test_utf8_to_utf8);
783         g_test_add_func ("/vte/conv/utf8/conversion-with-error", test_utf8_to_utf8_error);
784         g_test_add_func ("/vte/conv/narrow-narrow", test_g_iconv_narrow_narrow);
785         g_test_add_func ("/vte/conv/narrow-to-wide", test_g_iconv_narrow_to_wide);
786         g_test_add_func ("/vte/conv/wide-to-narrow", test_g_iconv_wide_to_narrow);
787         g_test_add_func ("/vte/conv/zero-byte-passthrough", test_zero_byte_passthrough);
788 
789 	return g_test_run ();
790 }
791 #endif
792