1 /* $XTermId: ptydata.c,v 1.150 2020/10/12 18:46:28 tom Exp $ */
2 
3 /*
4  * Copyright 1999-2019,2020 by Thomas E. Dickey
5  *
6  *                         All Rights Reserved
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sublicense, and/or sell copies of the Software, and to
13  * permit persons to whom the Software is furnished to do so, subject to
14  * the following conditions:
15  *
16  * The above copyright notice and this permission notice shall be included
17  * in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  * Except as contained in this notice, the name(s) of the above copyright
28  * holders shall not be used in advertising or otherwise to promote the
29  * sale, use or other dealings in this Software without prior written
30  * authorization.
31  */
32 
33 #include <data.h>
34 
35 #if OPT_WIDE_CHARS
36 #include <menu.h>
37 #include <wcwidth.h>
38 #endif
39 
40 #ifdef TEST_DRIVER
41 #undef TRACE
42 #define TRACE(p) if (1) printf p
43 #undef TRACE2
44 #define TRACE2(p) if (0) printf p
45 #define visibleChars(buf, len) "buffer"
46 #endif
47 
48 /*
49  * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX
50  * systems are broken and return EWOULDBLOCK when they should return EAGAIN.
51  * Note that this macro may evaluate its argument more than once.
52  */
53 #if defined(EAGAIN) && defined(EWOULDBLOCK)
54 #define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK)
55 #else
56 #ifdef EAGAIN
57 #define E_TEST(err) ((err) == EAGAIN)
58 #else
59 #define E_TEST(err) ((err) == EWOULDBLOCK)
60 #endif
61 #endif
62 
63 #if OPT_WIDE_CHARS
64 /*
65  * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data.
66  * The number of bytes converted will be nonzero iff there is data.
67  */
68 Bool
decodeUtf8(TScreen * screen,PtyData * data)69 decodeUtf8(TScreen *screen, PtyData *data)
70 {
71     int i;
72     int length = (int) (data->last - data->next);
73     int utf_count = 0;
74     unsigned utf_char = 0;
75 
76     data->utf_size = 0;
77     for (i = 0; i < length; i++) {
78 	unsigned c = data->next[i];
79 
80 	/* Combine UTF-8 into Unicode */
81 	if (c < 0x80) {
82 	    /* We received an ASCII character */
83 	    if (utf_count > 0) {
84 		data->utf_data = UCS_REPL;	/* prev. sequence incomplete */
85 		data->utf_size = i;
86 	    } else {
87 		data->utf_data = (IChar) c;
88 		data->utf_size = 1;
89 	    }
90 	    break;
91 	} else if (screen->vt100_graphics
92 		   && (c < 0x100)
93 		   && (utf_count == 0)
94 		   && screen->gsets[(int) screen->curgr] != nrc_ASCII) {
95 	    data->utf_data = (IChar) c;
96 	    data->utf_size = 1;
97 	    break;
98 	} else if (c < 0xc0) {
99 	    /* We received a continuation byte */
100 	    if (utf_count < 1) {
101 		/*
102 		 * We received a continuation byte before receiving a sequence
103 		 * state.  Or an attempt to use a C1 control string.  Either
104 		 * way, it is mapped to the replacement character, unless
105 		 * allowed by optional feature.
106 		 */
107 		data->utf_data = (IChar) (screen->c1_printable ? c : UCS_REPL);
108 		data->utf_size = (i + 1);
109 		break;
110 	    } else if (screen->utf8_weblike
111 		       && (utf_count == 3
112 			   && utf_char == 0x04
113 			   && c >= 0x90)) {
114 		/* The encoding would form a code point beyond U+10FFFF. */
115 		data->utf_size = i;
116 		data->utf_data = UCS_REPL;
117 		break;
118 	    } else if (screen->utf8_weblike
119 		       && (utf_count == 2
120 			   && utf_char == 0x0d
121 			   && c >= 0xa0)) {
122 		/* The encoding would form a surrogate code point. */
123 		data->utf_size = i;
124 		data->utf_data = UCS_REPL;
125 		break;
126 	    } else {
127 		/* Check for overlong UTF-8 sequences for which a shorter
128 		 * encoding would exist and replace them with UCS_REPL.
129 		 * An overlong UTF-8 sequence can have any of the following
130 		 * forms:
131 		 *   1100000x 10xxxxxx
132 		 *   11100000 100xxxxx 10xxxxxx
133 		 *   11110000 1000xxxx 10xxxxxx 10xxxxxx
134 		 *   11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx
135 		 *   11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
136 		 */
137 		if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) {
138 		    if (screen->utf8_weblike) {
139 			/* overlong sequence continued */
140 			data->utf_data = UCS_REPL;
141 			data->utf_size = i;
142 			break;
143 		    } else {
144 			utf_char = UCS_REPL;
145 		    }
146 		}
147 		utf_char <<= 6;
148 		utf_char |= (c & 0x3f);
149 		if ((utf_char >= 0xd800 &&
150 		     utf_char <= 0xdfff) ||
151 		    (utf_char == 0xfffe) ||
152 		    (utf_char == HIDDEN_CHAR)) {
153 		    utf_char = UCS_REPL;
154 		}
155 		utf_count--;
156 		if (utf_count == 0) {
157 #if !OPT_WIDER_ICHAR
158 		    /* characters outside UCS-2 become UCS_REPL */
159 		    if (utf_char > NARROW_ICHAR) {
160 			TRACE(("using replacement for %#x\n", utf_char));
161 			utf_char = UCS_REPL;
162 		    }
163 #endif
164 		    data->utf_data = (IChar) utf_char;
165 		    data->utf_size = (i + 1);
166 		    break;
167 		}
168 	    }
169 	} else {
170 	    /* We received a sequence start byte */
171 	    if (utf_count > 0) {
172 		/* previous sequence is incomplete */
173 		data->utf_data = UCS_REPL;
174 		data->utf_size = i;
175 		break;
176 	    }
177 	    if (screen->utf8_weblike) {
178 		if (c < 0xe0) {
179 		    if (!(c & 0x1e)) {
180 			/* overlong sequence start */
181 			data->utf_data = UCS_REPL;
182 			data->utf_size = (i + 1);
183 			break;
184 		    }
185 		    utf_count = 1;
186 		    utf_char = (c & 0x1f);
187 		} else if (c < 0xf0) {
188 		    utf_count = 2;
189 		    utf_char = (c & 0x0f);
190 		} else if (c < 0xf5) {
191 		    utf_count = 3;
192 		    utf_char = (c & 0x07);
193 		} else {
194 		    data->utf_data = UCS_REPL;
195 		    data->utf_size = (i + 1);
196 		    break;
197 		}
198 	    } else {
199 		if (c < 0xe0) {
200 		    utf_count = 1;
201 		    utf_char = (c & 0x1f);
202 		    if (!(c & 0x1e)) {
203 			/* overlong sequence */
204 			utf_char = UCS_REPL;
205 		    }
206 		} else if (c < 0xf0) {
207 		    utf_count = 2;
208 		    utf_char = (c & 0x0f);
209 		} else if (c < 0xf8) {
210 		    utf_count = 3;
211 		    utf_char = (c & 0x07);
212 		} else if (c < 0xfc) {
213 		    utf_count = 4;
214 		    utf_char = (c & 0x03);
215 		} else if (c < 0xfe) {
216 		    utf_count = 5;
217 		    utf_char = (c & 0x01);
218 		} else {
219 		    data->utf_data = UCS_REPL;
220 		    data->utf_size = (i + 1);
221 		    break;
222 		}
223 	    }
224 	}
225     }
226 #if OPT_TRACE > 1
227     TRACE(("UTF-8 char %04X [%d..%d]\n",
228 	   data->utf_data,
229 	   (int) (data->next - data->buffer),
230 	   (int) (data->next - data->buffer + data->utf_size - 1)));
231 #endif
232 
233     return (data->utf_size != 0);
234 }
235 #endif
236 
237 int
readPtyData(XtermWidget xw,PtySelect * select_mask,PtyData * data)238 readPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data)
239 {
240     TScreen *screen = TScreenOf(xw);
241     int size = 0;
242 
243 #ifdef VMS
244     if (*select_mask & pty_mask) {
245 	trimPtyData(xw, data);
246 	if (read_queue.flink != 0) {
247 	    size = tt_read(data->next);
248 	    if (size == 0) {
249 		Panic("input: read returned zero\n", 0);
250 	    }
251 	} else {
252 	    sys$hiber();
253 	}
254     }
255 #else /* !VMS */
256     if (FD_ISSET(screen->respond, select_mask)) {
257 	int save_err;
258 	trimPtyData(xw, data);
259 
260 	size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE);
261 	save_err = errno;
262 #if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__)
263 	/*
264 	 * Yes, I know this is a majorly f*ugly hack, however it seems to
265 	 * be necessary for Solaris x86.  DWH 11/15/94
266 	 * Dunno why though..
267 	 * (and now CYGWIN, alanh@xfree86.org 08/15/01
268 	 */
269 	if (size <= 0) {
270 	    if (save_err == EIO || save_err == 0)
271 		NormalExit();
272 	    else if (!E_TEST(save_err))
273 		Panic("input: read returned unexpected error (%d)\n", save_err);
274 	    size = 0;
275 	}
276 #else /* !f*ugly */
277 	if (size < 0) {
278 	    if (save_err == EIO)
279 		NormalExit();
280 	    else if (!E_TEST(save_err))
281 		Panic("input: read returned unexpected error (%d)\n", save_err);
282 	    size = 0;
283 	} else if (size == 0) {
284 #if defined(__FreeBSD__)
285 	    NormalExit();
286 #else
287 	    Panic("input: read returned zero\n", 0);
288 #endif
289 	}
290 #endif /* f*ugly */
291     }
292 #endif /* VMS */
293 
294     if (size) {
295 #if OPT_TRACE
296 	int i;
297 
298 	TRACE(("read %d bytes from pty\n", size));
299 	for (i = 0; i < size; i++) {
300 	    if (!(i % 16))
301 		TRACE(("%s", i ? "\n    " : "READ"));
302 	    TRACE((" %02X", data->last[i]));
303 	}
304 	TRACE(("\n"));
305 #endif
306 	data->last += size;
307 #ifdef ALLOWLOGGING
308 	TScreenOf(term)->logstart = VTbuffer->next;
309 #endif
310     }
311 
312     return (size);
313 }
314 
315 /*
316  * Return the next value from the input buffer.  Note that morePtyData() is
317  * always called before this function, so we can do the UTF-8 input conversion
318  * in that function and simply return the result here.
319  */
320 #if OPT_WIDE_CHARS
321 IChar
nextPtyData(TScreen * screen,PtyData * data)322 nextPtyData(TScreen *screen, PtyData *data)
323 {
324     IChar result;
325     if (screen->utf8_inparse) {
326 	skipPtyData(data, result);
327     } else {
328 	result = *((data)->next++);
329 	if (!screen->output_eight_bits) {
330 	    result = (IChar) (result & 0x7f);
331 	}
332     }
333     TRACE2(("nextPtyData returns %#x\n", result));
334     return result;
335 }
336 #endif
337 
338 #if OPT_WIDE_CHARS
339 /*
340  * Called when UTF-8 mode has been turned on/off.
341  */
342 void
switchPtyData(TScreen * screen,int flag)343 switchPtyData(TScreen *screen, int flag)
344 {
345     if (screen->utf8_mode != flag) {
346 	screen->utf8_mode = flag;
347 	screen->utf8_inparse = (Boolean) (flag != 0);
348 	mk_wcwidth_init(screen->utf8_mode);
349 
350 	TRACE(("turning UTF-8 mode %s\n", BtoS(flag)));
351 	update_font_utf8_mode();
352     }
353 }
354 #endif
355 
356 /*
357  * Allocate a buffer.
358  */
359 void
initPtyData(PtyData ** result)360 initPtyData(PtyData **result)
361 {
362     PtyData *data;
363 
364     TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n",
365 	    FRG_SIZE, BUF_SIZE));
366 
367     if (FRG_SIZE < 64)
368 	FRG_SIZE = 64;
369     if (BUF_SIZE < FRG_SIZE)
370 	BUF_SIZE = FRG_SIZE;
371     if (BUF_SIZE % FRG_SIZE)
372 	BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE);
373 
374     TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n",
375 	    FRG_SIZE, BUF_SIZE));
376 
377     data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE));
378 
379     memset(data, 0, sizeof(*data));
380     data->next = data->buffer;
381     data->last = data->buffer;
382     *result = data;
383 }
384 
385 /*
386  * Initialize a buffer for the caller, using its data in 'next'.
387  */
388 #if OPT_WIDE_CHARS
389 PtyData *
fakePtyData(PtyData * result,Char * next,Char * last)390 fakePtyData(PtyData *result, Char *next, Char *last)
391 {
392     PtyData *data = result;
393 
394     memset(data, 0, sizeof(*data));
395     data->next = next;
396     data->last = last;
397 
398     return data;
399 }
400 #endif
401 
402 /*
403  * Remove used data by shifting the buffer down, to make room for more data,
404  * e.g., a continuation-read.
405  */
406 void
trimPtyData(XtermWidget xw,PtyData * data)407 trimPtyData(XtermWidget xw, PtyData *data)
408 {
409     (void) xw;
410     FlushLog(xw);
411 
412     if (data->next != data->buffer) {
413 	int i;
414 	int n = (int) (data->last - data->next);
415 
416 	TRACE(("shifting buffer down by %d\n", n));
417 	for (i = 0; i < n; ++i) {
418 	    data->buffer[i] = data->next[i];
419 	}
420 	data->next = data->buffer;
421 	data->last = data->next + n;
422     }
423 
424 }
425 
426 /*
427  * Insert new data into the input buffer so the next calls to morePtyData()
428  * and nextPtyData() will return that.
429  */
430 void
fillPtyData(XtermWidget xw,PtyData * data,const char * value,int length)431 fillPtyData(XtermWidget xw, PtyData *data, const char *value, int length)
432 {
433     int size;
434     int n;
435 
436     /* remove the used portion of the buffer */
437     trimPtyData(xw, data);
438 
439     VTbuffer->last += length;
440     size = (int) (VTbuffer->last - VTbuffer->next);
441 
442     /* shift the unused portion up to make room */
443     for (n = size; n >= length; --n)
444 	VTbuffer->next[n] = VTbuffer->next[n - length];
445 
446     /* insert the new bytes to interpret */
447     for (n = 0; n < length; n++)
448 	VTbuffer->next[n] = CharOf(value[n]);
449 }
450 
451 #if OPT_WIDE_CHARS
452 /*
453  * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target
454  * 'lp', and returning a pointer past the converted character.
455  */
456 Char *
convertToUTF8(Char * lp,unsigned c)457 convertToUTF8(Char *lp, unsigned c)
458 {
459 #define CH(n) (Char)((c) >> ((n) * 8))
460     if (c < 0x80) {
461 	/*  0*******  */
462 	*lp++ = (Char) CH(0);
463     } else if (c < 0x800) {
464 	/*  110***** 10******  */
465 	*lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2));
466 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
467     } else if (c < 0x00010000) {
468 	/*  1110**** 10****** 10******  */
469 	*lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4));
470 	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
471 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
472     } else if (c < 0x00200000) {
473 	*lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2));
474 	*lp++ = (Char) (0x80 |
475 			((int) (CH(1) & 0xf0) >> 4) |
476 			((int) (CH(2) & 0x03) << 4));
477 	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
478 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
479     } else if (c < 0x04000000) {
480 	*lp++ = (Char) (0xf8 | (CH(3) & 0x03));
481 	*lp++ = (Char) (0x80 | (CH(2) >> 2));
482 	*lp++ = (Char) (0x80 |
483 			((int) (CH(1) & 0xf0) >> 4) |
484 			((int) (CH(2) & 0x03) << 4));
485 	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
486 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
487     } else {
488 	*lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6));
489 	*lp++ = (Char) (0x80 | (CH(3) & 0x3f));
490 	*lp++ = (Char) (0x80 | (CH(2) >> 2));
491 	*lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4));
492 	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
493 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
494     }
495     return lp;
496 #undef CH
497 }
498 
499 /*
500  * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer
501  * past the converted UTF-8 input.  The first 256 values align with ISO-8859-1,
502  * making it possible to use this to convert to Latin-1.
503  *
504  * If the conversion fails, return null.
505  */
506 Char *
convertFromUTF8(Char * lp,unsigned * cp)507 convertFromUTF8(Char *lp, unsigned *cp)
508 {
509     int want;
510 
511     /*
512      * Find the number of bytes we will need from the source.
513      */
514     if ((*lp & 0x80) == 0) {
515 	want = 1;
516     } else if ((*lp & 0xe0) == 0xc0) {
517 	want = 2;
518     } else if ((*lp & 0xf0) == 0xe0) {
519 	want = 3;
520     } else if ((*lp & 0xf8) == 0xf0) {
521 	want = 4;
522     } else if ((*lp & 0xfc) == 0xf8) {
523 	want = 5;
524     } else if ((*lp & 0xfe) == 0xfc) {
525 	want = 6;
526     } else {
527 	want = 0;
528     }
529 
530     if (want) {
531 	int have = 1;
532 
533 	while (lp[have] != '\0') {
534 	    if ((lp[have] & 0xc0) != 0x80)
535 		break;
536 	    ++have;
537 	}
538 	if (want == have) {
539 	    unsigned mask = 0;
540 	    int j;
541 	    int shift = 0;
542 
543 	    *cp = 0;
544 	    switch (want) {
545 	    case 1:
546 		mask = (*lp);
547 		break;
548 	    case 2:
549 		mask = (*lp & 0x1f);
550 		break;
551 	    case 3:
552 		mask = (*lp & 0x0f);
553 		break;
554 	    case 4:
555 		mask = (*lp & 0x07);
556 		break;
557 	    case 5:
558 		mask = (*lp & 0x03);
559 		break;
560 	    case 6:
561 		mask = (*lp & 0x01);
562 		break;
563 	    default:
564 		mask = 0;
565 		break;
566 	    }
567 
568 	    for (j = 1; j < want; j++) {
569 		*cp |= (unsigned) ((lp[want - j] & 0x3f) << shift);
570 		shift += 6;
571 	    }
572 	    *cp |= mask << shift;
573 	    lp += want;
574 	} else {
575 	    *cp = BAD_ASCII;
576 	    lp = NULL;
577 	}
578     } else {
579 	*cp = BAD_ASCII;
580 	lp = NULL;
581     }
582     return lp;
583 }
584 
585 /*
586  * Returns true if the entire string is valid UTF-8.
587  */
588 Boolean
isValidUTF8(Char * lp)589 isValidUTF8(Char *lp)
590 {
591     Boolean result = True;
592     while (*lp) {
593 	unsigned ch;
594 	Char *next = convertFromUTF8(lp, &ch);
595 	if (next == NULL || ch == 0) {
596 	    result = False;
597 	    break;
598 	}
599 	lp = next;
600     }
601     return result;
602 }
603 
604 /*
605  * Write data back to the PTY
606  */
607 void
writePtyData(int f,IChar * d,unsigned len)608 writePtyData(int f, IChar *d, unsigned len)
609 {
610     unsigned n = (len << 1);
611 
612     if (VTbuffer->write_len <= len) {
613 	VTbuffer->write_len = n;
614 	VTbuffer->write_buf = (Char *) XtRealloc((char *)
615 						 VTbuffer->write_buf, VTbuffer->write_len);
616     }
617 
618     for (n = 0; n < len; n++)
619 	VTbuffer->write_buf[n] = (Char) d[n];
620 
621     TRACE(("writePtyData %u:%s\n", n,
622 	   visibleChars(VTbuffer->write_buf, n)));
623     v_write(f, VTbuffer->write_buf, n);
624 }
625 #endif /* OPT_WIDE_CHARS */
626 
627 #ifdef NO_LEAKS
628 void
noleaks_ptydata(void)629 noleaks_ptydata(void)
630 {
631     if (VTbuffer != 0) {
632 #if OPT_WIDE_CHARS
633 	free(VTbuffer->write_buf);
634 #endif
635 	FreeAndNull(VTbuffer);
636     }
637 }
638 #endif
639 
640 #ifdef TEST_DRIVER
641 
642 #include "data.c"
643 
644 void
NormalExit(void)645 NormalExit(void)
646 {
647     fprintf(stderr, "NormalExit!\n");
648     exit(EXIT_SUCCESS);
649 }
650 
651 void
Panic(const char * s,int a)652 Panic(const char *s, int a)
653 {
654     (void) s;
655     (void) a;
656     fprintf(stderr, "Panic!\n");
657     exit(EXIT_FAILURE);
658 }
659 
660 #if OPT_WIDE_CHARS
661 
662 #ifdef ALLOWLOGGING
663 void
FlushLog(XtermWidget xw)664 FlushLog(XtermWidget xw)
665 {
666     (void) xw;
667 }
668 #endif
669 
670 void
v_write(int f,const Char * data,unsigned len)671 v_write(int f, const Char *data, unsigned len)
672 {
673     (void) f;
674     (void) data;
675     (void) len;
676 }
677 
678 void
mk_wcwidth_init(int mode)679 mk_wcwidth_init(int mode)
680 {
681     (void) mode;
682 }
683 
684 void
update_font_utf8_mode(void)685 update_font_utf8_mode(void)
686 {
687 }
688 
689 static int message_level = 0;
690 static int opt_all = 0;
691 static int opt_illegal = 0;
692 static int opt_convert = 0;
693 static int opt_reverse = 0;
694 static long total_test = 0;
695 static long total_errs = 0;
696 
697 static void
usage(void)698 usage(void)
699 {
700     static const char *msg[] =
701     {
702 	"Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]",
703 	"",
704 	"Options:",
705 	" -a  exercise all legal encode/decode to/from UTF-8",
706 	" -c  call convertFromUTF8 rather than decodeUTF8",
707 	" -i  ignore illegal UTF-8 when testing -r option",
708 	" -q  quieter",
709 	" -r  reverse/decode from UTF-8 byte-string to/from Unicode",
710 	" -v  more verbose"
711     };
712     size_t n;
713     for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
714 	fprintf(stderr, "%s\n", msg[n]);
715     }
716     exit(EXIT_FAILURE);
717 }
718 
719 /*
720  * http://www.unicode.org/versions/corrigendum1.html, table 3.1B
721  */
722 #define OkRange(n,lo,hi) \
723  	if (value[n] < lo || value[n] > hi) { \
724 	    result = False; \
725 	    break; \
726 	}
727 static Bool
is_legal_utf8(const Char * value)728 is_legal_utf8(const Char *value)
729 {
730     Bool result = True;
731     Char ch;
732     while ((ch = *value) != '\0') {
733 	if (ch <= 0x7f) {
734 	    ++value;
735 	} else if (ch >= 0xc2 && ch <= 0xdf) {
736 	    OkRange(1, 0x80, 0xbf);
737 	    value += 2;
738 	} else if (ch == 0xe0) {
739 	    OkRange(1, 0xa0, 0xbf);
740 	    OkRange(2, 0x80, 0xbf);
741 	    value += 3;
742 	} else if (ch >= 0xe1 && ch <= 0xef) {
743 	    OkRange(1, 0x80, 0xbf);
744 	    OkRange(2, 0x80, 0xbf);
745 	    value += 3;
746 	} else if (ch == 0xf0) {
747 	    OkRange(1, 0x90, 0xbf);
748 	    OkRange(2, 0x80, 0xbf);
749 	    OkRange(3, 0x80, 0xbf);
750 	    value += 4;
751 	} else if (ch >= 0xf1 && ch <= 0xf3) {
752 	    OkRange(1, 0x80, 0xbf);
753 	    OkRange(2, 0x80, 0xbf);
754 	    OkRange(3, 0x80, 0xbf);
755 	    value += 4;
756 	} else if (ch == 0xf4) {
757 	    OkRange(1, 0x80, 0x8f);
758 	    OkRange(2, 0x80, 0xbf);
759 	    OkRange(3, 0x80, 0xbf);
760 	    value += 4;
761 	} else {
762 	    result = False;
763 	    break;
764 	}
765     }
766     return result;
767 }
768 
769 static void
test_utf8_convert(void)770 test_utf8_convert(void)
771 {
772     unsigned c_in, c_out;
773     Char buffer[10];
774     Char *result;
775     unsigned limit = 0x110000;
776     unsigned success = 0;
777     unsigned bucket[256];
778 
779     memset(bucket, 0, sizeof(bucket));
780     for (c_in = 0; c_in < limit; ++c_in) {
781 	memset(buffer, 0, sizeof(buffer));
782 	if ((result = convertToUTF8(buffer, c_in)) == 0) {
783 	    TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in));
784 	} else {
785 	    if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
786 		TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in));
787 	    } else if (c_in != c_out) {
788 		TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n",
789 		       c_in, c_out));
790 	    } else {
791 		while (result-- != buffer) {
792 		    bucket[*result]++;
793 		}
794 		++success;
795 	    }
796 	}
797     }
798     TRACE(("%u/%u successful\n", success, limit));
799     for (c_in = 0; c_in < 256; ++c_in) {
800 	if ((c_in % 8) == 0) {
801 	    TRACE((" %02X:", c_in));
802 	}
803 	TRACE((" %8X", bucket[c_in]));
804 	if (((c_in + 1) % 8) == 0) {
805 	    TRACE(("\n"));
806 	}
807     }
808 }
809 
810 static int
decode_one(const char * source,char ** target)811 decode_one(const char *source, char **target)
812 {
813     int result = -1;
814     long check;
815     int radix = 0;
816     if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
817 	source += 2;
818 	radix = 16;
819     } else if (source[0] == '0' && source[1] == 'b') {
820 	source += 2;
821 	radix = 2;
822     }
823     check = strtol(source, target, radix);
824     if (*target != NULL && *target != source)
825 	result = (int) check;
826     return result;
827 }
828 
829 static int
decode_range(const char * source,int * lo,int * hi)830 decode_range(const char *source, int *lo, int *hi)
831 {
832     int result = 0;
833     char *after1;
834     char *after2;
835     if ((*lo = decode_one(source, &after1)) >= 0) {
836 	after1 += strspn(after1, ":-.\t ");
837 	if ((*hi = decode_one(after1, &after2)) < 0) {
838 	    *hi = *lo;
839 	}
840 	result = 1;
841     }
842     return result;
843 }
844 
845 #define MAX_BYTES 6
846 
847 static void
do_range(const char * source)848 do_range(const char *source)
849 {
850     int lo, hi;
851 
852     TScreen screen;
853     memset(&screen, 0, sizeof(screen));
854 
855     if (decode_range(source, &lo, &hi)) {
856 	while (lo <= hi) {
857 	    unsigned c_in = (unsigned) lo++;
858 	    PtyData *data;
859 	    Char *next;
860 	    Char buffer[MAX_BYTES + 1];
861 
862 	    if (opt_reverse) {
863 		Bool skip = False;
864 		Bool first = True;
865 		int j, k;
866 		for (j = 0; j < MAX_BYTES; ++j) {
867 		    unsigned long bits = ((unsigned long) c_in >> (8 * j));
868 		    if ((buffer[j] = (Char) bits) == 0) {
869 			skip = (bits != 0);
870 			break;
871 		    }
872 		}
873 		if (skip)
874 		    continue;
875 		initPtyData(&data);
876 		for (k = 0; k <= j; ++k) {
877 		    data->buffer[k] = buffer[j - k - 1];
878 		}
879 		if (opt_illegal && !is_legal_utf8(data->buffer)) {
880 		    free(data);
881 		    continue;
882 		}
883 		if (message_level > 1) {
884 		    printf("TEST ");
885 		    for (k = 0; k < j; ++k) {
886 			printf("%02X", data->buffer[k]);
887 		    }
888 		}
889 		data->next = data->buffer;
890 		data->last = data->buffer + j;
891 		while (decodeUtf8(&screen, data)) {
892 		    total_test++;
893 		    if (data->utf_data == UCS_REPL)
894 			total_errs++;
895 		    data->next += data->utf_size;
896 		    if (message_level > 1) {
897 			printf("%s%04X", first ? " ->" : ", ", data->utf_data);
898 		    }
899 		    first = False;
900 		}
901 		if (!first)
902 		    total_test--;
903 		if (message_level > 1) {
904 		    printf("\n");
905 		    fflush(stdout);
906 		}
907 		free(data);
908 	    } else if (opt_convert) {
909 		unsigned c_out;
910 		Char *result;
911 
912 		memset(buffer, 0, sizeof(buffer));
913 		if ((result = next = convertToUTF8(buffer, c_in)) == 0) {
914 		    fprintf(stderr,
915 			    "conversion of U+%04X to UTF-8 failed\n", c_in);
916 		} else if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
917 		    fprintf(stderr,
918 			    "conversion of U+%04X from UTF-8 failed\n", c_in);
919 		    total_errs++;
920 		} else if (c_in != c_out) {
921 		    fprintf(stderr,
922 			    "conversion of U+%04X to/from UTF-8 gave U+%04X\n",
923 			    c_in, c_out);
924 		} else if (message_level > 1) {
925 		    *next = '\0';
926 		    printf("TEST %04X (%d:%s) ->%04X\n", c_in,
927 			   (int) (next - buffer),
928 			   buffer,
929 			   c_out);
930 		    fflush(stdout);
931 		}
932 	    } else {
933 		initPtyData(&data);
934 		next = convertToUTF8(data->buffer, c_in);
935 		*next = 0;
936 		data->next = data->buffer;
937 		data->last = next;
938 		decodeUtf8(&screen, data);
939 		if (message_level > 1) {
940 		    printf("TEST %04X (%d:%s) ->%04X\n", c_in,
941 			   (int) (next - data->buffer),
942 			   data->buffer,
943 			   data->utf_data);
944 		    fflush(stdout);
945 		}
946 		if (c_in != data->utf_data) {
947 		    fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data);
948 		    total_errs++;
949 		}
950 		free(data);
951 	    }
952 	    total_test++;
953 	}
954     }
955 }
956 
957 int
main(int argc,char ** argv)958 main(int argc, char **argv)
959 {
960     int ch;
961 
962     setlocale(LC_ALL, "");
963     while ((ch = getopt(argc, argv, "aciqrv")) != -1) {
964 	switch (ch) {
965 	case 'a':
966 	    opt_all = 1;
967 	    break;
968 	case 'c':
969 	    opt_convert = 1;
970 	    break;
971 	case 'i':
972 	    opt_illegal = 1;
973 	    break;
974 	case 'q':
975 	    message_level--;
976 	    break;
977 	case 'r':
978 	    opt_reverse = 1;
979 	    break;
980 	case 'v':
981 	    message_level++;
982 	    break;
983 	default:
984 	    usage();
985 	}
986     }
987     if (opt_all) {
988 	test_utf8_convert();
989     } else {
990 	if (optind >= argc)
991 	    usage();
992 	while (optind < argc) {
993 	    do_range(argv[optind++]);
994 	}
995 	if (total_test) {
996 	    printf("%ld/%ld mismatches (%.0f%%)\n",
997 		   total_errs,
998 		   total_test,
999 		   (100.0 * (double) total_errs) / (double) total_test);
1000 	}
1001     }
1002     return EXIT_SUCCESS;
1003 }
1004 #else
1005 int
main(int argc,char ** argv)1006 main(int argc, char **argv)
1007 {
1008     (void) argc;
1009     (void) argv;
1010     printf("Nothing to be done here...\n");
1011     return EXIT_SUCCESS;
1012 }
1013 #endif /* OPT_WIDE_CHARS */
1014 #endif
1015