1 /* $XTermId: iso2022.c,v 1.40 2018/06/27 20:41:53 tom Exp $ */
2 
3 /*
4 Copyright 2011-2013,2018 by Thomas E. Dickey
5 Copyright (c) 2001 by Juliusz Chroboczek
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 THE SOFTWARE.
24 */
25 
26 #include <iso2022.h>
27 
28 #include <unistd.h>
29 #include <errno.h>
30 
31 #include <sys.h>
32 
33 #define BUFFERED_INPUT_SIZE 4
34 static unsigned char buffered_input[BUFFERED_INPUT_SIZE];
35 static int buffered_input_count = 0;
36 
37 static void terminateEsc(Iso2022Ptr, int, unsigned char *, unsigned);
38 static void terminate(Iso2022Ptr, int);
39 
40 #define OUTBUF_FREE(is, count) ((is)->outbuf_count + (count) <= BUFFER_SIZE)
41 #define OUTBUF_MAKE_FREE(is, fd, count) \
42     if(!OUTBUF_FREE((is), (count))) outbuf_flush((is), (fd))
43 
44 #ifdef OPT_TRACE
45 static void
trace_charset(const char * tag,const CharsetRec * ptr)46 trace_charset(const char *tag, const CharsetRec * ptr)
47 {
48     if (ptr != NULL) {
49 	TRACE(("%s:", NonNull(tag)));
50 	TRACE((" name:%s", NonNull(ptr->name)));
51 	TRACE((" type:%d", ptr->type));
52 	if (ptr->final)
53 	    TRACE((" final:%c", ptr->final));
54 	if (ptr->data != NULL)
55 	    TRACE((" data"));
56 	if (ptr->recode != NULL)
57 	    TRACE((" recode"));
58 	if (ptr->reverse != NULL)
59 	    TRACE((" reverse"));
60 	if (ptr->other_stack != NULL)
61 	    TRACE((" other_stack"));
62 	if (ptr->other_aux != NULL)
63 	    TRACE((" other_aux"));
64 	if (ptr->other_recode != NULL)
65 	    TRACE((" other_recode"));
66 	if (ptr->other_reverse != NULL)
67 	    TRACE((" other_reverse"));
68 	TRACE(("\n"));
69     }
70 }
71 
72 static void
trace_iso2022(const char * tag,const Iso2022Ptr ptr)73 trace_iso2022(const char *tag, const Iso2022Ptr ptr)
74 {
75     TRACE(("%s:\n", NonNull(tag)));
76     trace_charset("\tGL()", GL(ptr));
77     trace_charset("\tGR()", GR(ptr));
78     trace_charset("\tG0()", G0(ptr));
79     trace_charset("\tG1()", G1(ptr));
80     trace_charset("\tG2()", G2(ptr));
81     trace_charset("\tG3()", G3(ptr));
82     trace_charset("\tOTHER()", OTHER(ptr));
83 }
84 
85 #else
86 #define trace_iso2022(tag, ptr)	/* nothing */
87 #endif
88 
89 static void
outbuf_flush(Iso2022Ptr is,int fd)90 outbuf_flush(Iso2022Ptr is, int fd)
91 {
92     int rc;
93     unsigned i = 0;
94 
95     if (olog >= 0)
96 	IGNORE_RC(write(olog, is->outbuf, is->outbuf_count));
97 
98     while (i < is->outbuf_count) {
99 	rc = (int) write(fd, is->outbuf + i, is->outbuf_count - i);
100 	if (rc > 0) {
101 	    i += (unsigned) rc;
102 	} else {
103 	    if (rc < 0 && errno == EINTR)
104 		continue;
105 	    else if ((rc == 0) || ((rc < 0) && (errno == EAGAIN))) {
106 		if (waitForOutput(fd) == IO_Closed)
107 		    break;
108 		continue;
109 	    } else
110 		break;
111 	}
112     }
113     is->outbuf_count = 0;
114 }
115 
116 static void
outbufOne(Iso2022Ptr is,int fd,unsigned c)117 outbufOne(Iso2022Ptr is, int fd, unsigned c)
118 {
119     OUTBUF_MAKE_FREE(is, fd, 1);
120     is->outbuf[is->outbuf_count++] = UChar(c);
121 }
122 
123 /* Discards null codepoints */
124 static void
outbufUTF8(Iso2022Ptr is,int fd,unsigned c)125 outbufUTF8(Iso2022Ptr is, int fd, unsigned c)
126 {
127     if (c == 0)
128 	return;
129 
130     if (c <= 0x7F) {
131 	OUTBUF_MAKE_FREE(is, fd, 1);
132 	is->outbuf[is->outbuf_count++] = UChar(c);
133     } else if (c <= 0x7FF) {
134 	OUTBUF_MAKE_FREE(is, fd, 2);
135 	is->outbuf[is->outbuf_count++] = UChar(0xC0 | ((c >> 6) & 0x1F));
136 	is->outbuf[is->outbuf_count++] = UChar(0x80 | (c & 0x3F));
137     } else {
138 	OUTBUF_MAKE_FREE(is, fd, 3);
139 	is->outbuf[is->outbuf_count++] = UChar(0xE0 | ((c >> 12) & 0x0F));
140 	is->outbuf[is->outbuf_count++] = UChar(0x80 | ((c >> 6) & 0x3F));
141 	is->outbuf[is->outbuf_count++] = UChar(0x80 | (c & 0x3F));
142     }
143 }
144 
145 static void
buffer(Iso2022Ptr is,unsigned c)146 buffer(Iso2022Ptr is, unsigned c)
147 {
148     if (is->buffered == NULL) {
149 	is->buffered_len = 10;
150 	is->buffered = malloc(is->buffered_len);
151 	if (is->buffered == NULL)
152 	    FatalError("Couldn't allocate buffered.\n");
153     }
154 
155     if (is->buffered_count >= is->buffered_len) {
156 	is->buffered = realloc(is->buffered, 2 * is->buffered_len + 1);
157 	if (is->buffered == NULL) {
158 	    FatalError("Couldn't grow buffered.\n");
159 	}
160 	is->buffered_len = 2 * is->buffered_len + 1;
161     }
162 
163     is->buffered[is->buffered_count++] = UChar(c);
164 }
165 
166 static void
outbuf_buffered_carefully(Iso2022Ptr is,int fd)167 outbuf_buffered_carefully(Iso2022Ptr is, int fd)
168 {
169     /* This should never happen in practice */
170     unsigned i = 0;
171 
172     while (i < is->buffered_count) {
173 	OUTBUF_MAKE_FREE(is, fd, 1);
174 	is->outbuf[is->outbuf_count++] = is->buffered[i++];
175     }
176     is->buffered_count = 0;
177 }
178 
179 static void
outbuf_buffered(Iso2022Ptr is,int fd)180 outbuf_buffered(Iso2022Ptr is, int fd)
181 {
182     if (is->buffered_count > BUFFER_SIZE)
183 	outbuf_buffered_carefully(is, fd);
184 
185     OUTBUF_MAKE_FREE(is, fd, is->buffered_count);
186     memcpy(is->outbuf + is->outbuf_count, is->buffered, is->buffered_count);
187     is->outbuf_count += is->buffered_count;
188     is->buffered_count = 0;
189 }
190 
191 static void
discard_buffered(Iso2022Ptr is)192 discard_buffered(Iso2022Ptr is)
193 {
194     is->buffered_count = 0;
195 }
196 
197 Iso2022Ptr
allocIso2022(void)198 allocIso2022(void)
199 {
200     Iso2022Ptr is;
201     is = TypeCalloc(Iso2022Rec);
202     if (!is)
203 	return NULL;
204     is->glp = is->grp = NULL;
205     G0(is) = G1(is) = G2(is) = G3(is) = OTHER(is) = NULL;
206 
207     is->parserState = P_NORMAL;
208     is->shiftState = S_NORMAL;
209 
210     is->inputFlags = IF_EIGHTBIT | IF_SS | IF_SSGR;
211     is->outputFlags = OF_SS | OF_LS | OF_SELECT;
212 
213     is->buffered = NULL;
214     is->buffered_len = 0;
215     is->buffered_count = 0;
216 
217     is->buffered_ku = -1;
218 
219     is->outbuf = malloc((size_t) BUFFER_SIZE);
220     if (!is->outbuf) {
221 	free(is);
222 	return NULL;
223     }
224     is->outbuf_count = 0;
225 
226     return is;
227 }
228 
229 #ifdef NO_LEAKS
230 void
destroyIso2022(Iso2022Ptr is)231 destroyIso2022(Iso2022Ptr is)
232 {
233     if (is->buffered)
234 	free(is->buffered);
235     if (is->outbuf)
236 	free(is->outbuf);
237     free(is);
238 }
239 #endif
240 
241 static int
identifyCharset(Iso2022Ptr i,const CharsetRec ** p)242 identifyCharset(Iso2022Ptr i, const CharsetRec * *p)
243 {
244     if (p == &G0(i)) {
245 	return 0;
246     } else if (p == &G1(i)) {
247 	return 1;
248     } else if (p == &G2(i)) {
249 	return 2;
250     } else if (p == &G3(i)) {
251 	return 3;
252     } else {
253 	abort();
254 	/* NOTREACHED */
255     }
256 }
257 
258 #define G_name(n) ((i != 0 && i->g[n] != 0) ? NonNull(i->g[n]->name) : "unset")
259 
260 void
reportIso2022(const char * tag,Iso2022Ptr i)261 reportIso2022(const char *tag, Iso2022Ptr i)
262 {
263     Message("%s: ", tag);
264     if (OTHER(i) != NULL) {
265 	Message("%s, non-ISO-2022 encoding.\n", OTHER(i)->name);
266 	return;
267     }
268     Message("G0 is %s, ", G_name(0));
269     Message("G1 is %s, ", G_name(1));
270     Message("G2 is %s, ", G_name(2));
271     Message("G3 is %s.\n", G_name(3));
272     Message("GL is G%d, ", identifyCharset(i, i->glp));
273     Message("GR is G%d.\n", identifyCharset(i, i->grp));
274 }
275 
276 int
initIso2022(const char * locale,const char * charset,Iso2022Ptr i)277 initIso2022(const char *locale, const char *charset, Iso2022Ptr i)
278 {
279     int gl = 0, gr = 2;
280     const CharsetRec *g0 = NULL;
281     const CharsetRec *g1 = NULL;
282     const CharsetRec *g2 = NULL;
283     const CharsetRec *g3 = NULL;
284     const CharsetRec *other = NULL;
285     int rc;
286 
287     TRACE(("initIso2022(locale=%s, charset=%s)\n", NonNull(locale), NonNull(charset)));
288     rc = getLocaleState(locale, charset, &gl, &gr, &g0, &g1, &g2, &g3, &other);
289     if (rc < 0) {
290 	if (charset) {
291 	    Warning("couldn't find charset %s; "
292 		    "using ISO 8859-1.\n", charset);
293 	} else if (ignore_locale) {
294 	    Warning("couldn't find charset data for %s; "
295 		    "using ISO 8859-1.\n", locale);
296 	} else {
297 	    Warning("couldn't find charset data for locale %s; "
298 		    "using ISO 8859-1.\n", locale);
299 	}
300     }
301 
302     if (G0(i) == NULL) {
303 	if (g0)
304 	    G0(i) = g0;
305 	else
306 	    G0(i) = getCharsetByName("ASCII");
307     }
308 
309     if (G1(i) == NULL) {
310 	if (g1)
311 	    G1(i) = g1;
312 	else
313 	    G1(i) = getUnknownCharset(T_94);
314     }
315 
316     if (G2(i) == NULL) {
317 	if (g2)
318 	    G2(i) = g2;
319 	else
320 	    G2(i) = getCharsetByName("ISO 8859-1");
321     }
322 
323     if (G3(i) == NULL) {
324 	if (g3)
325 	    G3(i) = g3;
326 	else
327 	    G3(i) = getUnknownCharset(T_94);
328     }
329 
330     if (OTHER(i) == NULL) {
331 	if (other)
332 	    OTHER(i) = other;
333 	else
334 	    OTHER(i) = NULL;
335     }
336 
337     if (i->glp == NULL) {
338 	i->glp = &i->g[gl];
339     }
340 
341     if (i->grp == NULL) {
342 	i->grp = &i->g[gr];
343     }
344     trace_iso2022("...initIso2022", i);
345     return 0;
346 }
347 
348 int
mergeIso2022(Iso2022Ptr d,Iso2022Ptr s)349 mergeIso2022(Iso2022Ptr d, Iso2022Ptr s)
350 {
351     if (G0(d) == NULL)
352 	G0(d) = G0(s);
353     if (G1(d) == NULL)
354 	G1(d) = G1(s);
355     if (G2(d) == NULL)
356 	G2(d) = G2(s);
357     if (G3(d) == NULL)
358 	G3(d) = G3(s);
359     if (OTHER(d) == NULL)
360 	OTHER(d) = OTHER(s);
361     if (d->glp == NULL)
362 	d->glp = &(d->g[identifyCharset(s, s->glp)]);
363     if (d->grp == NULL)
364 	d->grp = &(d->g[identifyCharset(s, s->grp)]);
365     trace_iso2022("...mergeIso2022", d);
366     return 0;
367 }
368 
369 static int
utf8Count(unsigned c)370 utf8Count(unsigned c)
371 {
372     /* All return values must be less than BUFFERED_INPUT_SIZE */
373     if ((c & 0x80) == 0)
374 	return 1;
375     else if ((c & 0x40) == 0)
376 	return 1;		/* incorrect UTF-8 */
377     else if ((c & 0x60) == 0x40)
378 	return 2;
379     else if ((c & 0x70) == 0x60)
380 	return 3;
381     else if ((c & 0x78) == 0x70)
382 	return 4;
383     else
384 	return 1;
385 }
386 
387 static int
fromUtf8(unsigned char * b)388 fromUtf8(unsigned char *b)
389 {
390     if ((b[0] & 0x80) == 0)
391 	return b[0];
392     else if ((b[0] & 0x40) == 0)
393 	return -1;		/* incorrect UTF-8 */
394     else if ((b[0] & 0x60) == 0x40)
395 	return ((b[0] & 0x1F) << 6) | (b[1] & 0x3F);
396     else if ((b[0] & 0x70) == 0x60)
397 	return (((b[0] & 0x0F) << 12) |
398 		((b[1] & 0x3F) << 6) |
399 		((b[2] & 0x3F)));
400     else if ((b[0] & 0x78) == 0x70)
401 	return (((b[0] & 0x03) << 18) |
402 		((b[1] & 0x3F) << 12) |
403 		((b[2] & 0x3F) << 6) |
404 		((b[3] & 0x3F)));
405     else
406 	return -1;
407 }
408 
409 void
copyIn(Iso2022Ptr is,int fd,unsigned char * buf,int count)410 copyIn(Iso2022Ptr is, int fd, unsigned char *buf, int count)
411 {
412     unsigned char *c;
413     int codepoint, rem;
414 
415     c = buf;
416     rem = count;
417 
418 #define NEXT do {c++; rem--;} while(0)
419 
420     while (rem > 0) {
421 	codepoint = -1;
422 	if (is->parserState == P_ESC) {
423 	    assert(buffered_input_count == 0);
424 	    codepoint = *c;
425 	    NEXT;
426 	    if (*c == CSI_7)
427 		is->parserState = P_CSI;
428 	    else if (IS_FINAL_ESC(codepoint))
429 		is->parserState = P_NORMAL;
430 	} else if (is->parserState == P_CSI) {
431 	    assert(buffered_input_count == 0);
432 	    codepoint = *c;
433 	    NEXT;
434 	    if (IS_FINAL_CSI(codepoint))
435 		is->parserState = P_NORMAL;
436 	} else if (!(*c & 0x80)) {
437 	    if (buffered_input_count > 0) {
438 		buffered_input_count = 0;
439 		continue;
440 	    } else {
441 		codepoint = *c;
442 		NEXT;
443 		if (codepoint == ESC)
444 		    is->parserState = P_ESC;
445 	    }
446 	} else if ((*c & 0x40)) {
447 	    if (buffered_input_count > 0) {
448 		buffered_input_count = 0;
449 		continue;
450 	    } else {
451 		buffered_input[buffered_input_count] = *c;
452 		buffered_input_count++;
453 		NEXT;
454 	    }
455 	} else {
456 	    if (buffered_input_count <= 0) {
457 		buffered_input_count = 0;
458 		NEXT;
459 		continue;
460 	    } else {
461 		buffered_input[buffered_input_count] = *c;
462 		buffered_input_count++;
463 		NEXT;
464 		if (buffered_input_count >= utf8Count(buffered_input[0])) {
465 		    codepoint = fromUtf8(buffered_input);
466 		    buffered_input_count = 0;
467 		    if (codepoint == CSI)
468 			is->parserState = P_CSI;
469 		}
470 	    }
471 	}
472 #undef NEXT
473 
474 	if (codepoint >= 0) {
475 	    int i;
476 	    unsigned ucode = (unsigned) codepoint;
477 	    unsigned char obuf[4];
478 
479 #define WRITE_1(i) do { \
480 	    obuf[0] = UChar(i); \
481 	    IGNORE_RC(write(fd, obuf, (size_t) 1)); \
482 	} while(0)
483 #define WRITE_2(i) do { \
484 	    obuf[0] = UChar(((i) >> 8) & 0xFF); \
485 	    obuf[1] = UChar((i) & 0xFF); \
486 	    IGNORE_RC(write(fd, obuf, (size_t) 2)); \
487 	} while(0)
488 
489 #define WRITE_3(i) do { \
490 	    obuf[0] = UChar(((i) >> 16) & 0xFF); \
491 	    obuf[1] = UChar(((i) >>  8) & 0xFF); \
492 	    obuf[2] = UChar((i) & 0xFF); \
493 	    IGNORE_RC(write(fd, obuf, (size_t) 3)); \
494 	} while(0)
495 
496 #define WRITE_4(i) do { \
497 	    obuf[0] = UChar(((i) >> 24) & 0xFF); \
498 	    obuf[1] = UChar(((i) >> 16) & 0xFF); \
499 	    obuf[2] = UChar(((i) >>  8) & 0xFF); \
500 	    obuf[3] = UChar((i) & 0xFF); \
501 	    IGNORE_RC(write(fd, obuf, (size_t) 4)); \
502        } while(0)
503 
504 #define WRITE_1_P_8bit(p, i) { \
505 	    obuf[0] = UChar(p); \
506 	    obuf[1] = UChar(i); \
507 	    IGNORE_RC(write(fd, obuf, (size_t) 2)); \
508 	}
509 
510 #define WRITE_1_P_7bit(p, i) { \
511 	    obuf[0] = ESC; \
512 	    obuf[1] = UChar((p) - 0x40); \
513 	    obuf[2] = UChar(i); \
514 	    IGNORE_RC(write(fd, obuf, (size_t) 3)); \
515 	}
516 
517 #define WRITE_1_P(p,i) do { \
518 	if(is->inputFlags & IF_EIGHTBIT) \
519 	    WRITE_1_P_8bit(p,i) else \
520 	    WRITE_1_P_7bit(p,i) \
521 	} while(0)
522 
523 #define WRITE_2_P_8bit(p, i) { \
524 	    obuf[0] = UChar(p); \
525 	    obuf[1] = UChar(((i) >> 8) & 0xFF); \
526 	    obuf[2] = UChar((i) & 0xFF); \
527 	    IGNORE_RC(write(fd, obuf, (size_t) 3)); \
528 	}
529 
530 #define WRITE_2_P_7bit(p, i) { \
531 	    obuf[0] = ESC; \
532 	    obuf[1] = UChar((p) - 0x40); \
533 	    obuf[2] = UChar(((i) >> 8) & 0xFF); \
534 	    obuf[3] = UChar((i) & 0xFF); \
535 	    IGNORE_RC(write(fd, obuf, (size_t) 4)); \
536 	}
537 
538 #define WRITE_2_P(p,i) do { \
539 	    if(is->inputFlags & IF_EIGHTBIT) \
540 		WRITE_2_P_8bit(p,i) \
541 	    else \
542 		WRITE_2_P_7bit(p,i) \
543 	} while(0)
544 
545 #define WRITE_1_P_S(p,i,s) do { \
546 	    obuf[0] = UChar(p); \
547 	    obuf[1] = UChar((i) & 0xFF); \
548 	    obuf[2] = UChar(s); \
549 	    IGNORE_RC(write(fd, obuf, (size_t) 3)); \
550 	} while(0)
551 
552 #define WRITE_2_P_S(p,i,s) do { \
553 	    obuf[0] = UChar(p); \
554 	    obuf[1] = UChar(((i) >> 8) & 0xFF); \
555 	    obuf[2] = UChar((i) & 0xFF); \
556 	    obuf[3] = UChar(s); \
557 	    IGNORE_RC(write(fd, obuf, (size_t) 4)); \
558 	} while(0)
559 
560 	    if (ucode < 0x20 ||
561 		(OTHER(is) == NULL && CHARSET_REGULAR(GR(is)) &&
562 		 (ucode >= 0x80 && ucode < 0xA0))) {
563 		WRITE_1(ucode);
564 		continue;
565 	    }
566 	    if (OTHER(is) != NULL
567 		&& OTHER(is)->other_reverse != NULL) {
568 		unsigned int c2;
569 		c2 = OTHER(is)->other_reverse(ucode, OTHER(is)->other_aux);
570 		if (c2 >> 24)
571 		    WRITE_4(c2);
572 		else if (c2 >> 16)
573 		    WRITE_3(c2);
574 		else if (c2 >> 8)
575 		    WRITE_2(c2);
576 		else if (c2)
577 		    WRITE_1(c2);
578 		continue;
579 	    }
580 	    i = (GL(is)->reverse) (ucode, GL(is));
581 	    if (i >= 0) {
582 		switch (GL(is)->type) {
583 		case T_94:
584 		case T_96:
585 		case T_128:
586 		    if (i >= 0x20)
587 			WRITE_1(i);
588 		    break;
589 		case T_9494:
590 		case T_9696:
591 		case T_94192:
592 		    if (i >= 0x2020)
593 			WRITE_2(i);
594 		    break;
595 		default:
596 		    abort();
597 		    /* NOTREACHED */
598 		}
599 		continue;
600 	    }
601 	    if (is->inputFlags & IF_EIGHTBIT) {
602 		i = GR(is)->reverse(ucode, GR(is));
603 		if (i >= 0) {
604 		    switch (GR(is)->type) {
605 		    case T_94:
606 		    case T_96:
607 		    case T_128:
608 			/* we allow C1 characters if T_128 in GR */
609 			WRITE_1(i | 0x80);
610 			break;
611 		    case T_9494:
612 		    case T_9696:
613 			WRITE_2(i | 0x8080);
614 			break;
615 		    case T_94192:
616 			WRITE_2(i | 0x8000);
617 			break;
618 		    default:
619 			abort();
620 			/* NOTREACHED */
621 		    }
622 		    continue;
623 		}
624 	    }
625 	    if (is->inputFlags & IF_SS) {
626 		i = G2(is)->reverse(ucode, G2(is));
627 		if (i >= 0) {
628 		    switch (GR(is)->type) {
629 		    case T_94:
630 		    case T_96:
631 		    case T_128:
632 			if (i >= 0x20) {
633 			    if ((is->inputFlags & IF_EIGHTBIT) &&
634 				(is->inputFlags & IF_SSGR))
635 				i |= 0x80;
636 			    WRITE_1_P(SS2, i);
637 			}
638 			break;
639 		    case T_9494:
640 		    case T_9696:
641 			if (i >= 0x2020) {
642 			    if ((is->inputFlags & IF_EIGHTBIT) &&
643 				(is->inputFlags & IF_SSGR))
644 				i |= 0x8080;
645 			    WRITE_2_P(SS2, i);
646 			}
647 			break;
648 		    case T_94192:
649 			if (i >= 0x2020) {
650 			    if ((is->inputFlags & IF_EIGHTBIT) &&
651 				(is->inputFlags & IF_SSGR))
652 				i |= 0x8000;
653 			    WRITE_2_P(SS2, i);
654 			}
655 			break;
656 		    default:
657 			abort();
658 			/* NOTREACHED */
659 		    }
660 		    continue;
661 		}
662 	    }
663 	    if (is->inputFlags & IF_SS) {
664 		i = G3(is)->reverse(ucode, G3(is));
665 		switch (GR(is)->type) {
666 		case T_94:
667 		case T_96:
668 		case T_128:
669 		    if (i >= 0x20) {
670 			if ((is->inputFlags & IF_EIGHTBIT) &&
671 			    (is->inputFlags & IF_SSGR))
672 			    i |= 0x80;
673 			WRITE_1_P(SS3, i);
674 		    }
675 		    break;
676 		case T_9494:
677 		case T_9696:
678 		    if (i >= 0x2020) {
679 			if ((is->inputFlags & IF_EIGHTBIT) &&
680 			    (is->inputFlags & IF_SSGR))
681 			    i |= 0x8080;
682 			WRITE_2_P(SS3, i);
683 		    }
684 		    break;
685 		case T_94192:
686 		    if (i >= 0x2020) {
687 			if ((is->inputFlags & IF_EIGHTBIT) &&
688 			    (is->inputFlags & IF_SSGR))
689 			    i |= 0x8000;
690 			WRITE_2_P(SS3, i);
691 		    }
692 		    break;
693 		default:
694 		    abort();
695 		    /* NOTREACHED */
696 		}
697 		continue;
698 	    }
699 	    if (is->inputFlags & IF_LS) {
700 		i = GR(is)->reverse(ucode, GR(is));
701 		if (i >= 0) {
702 		    switch (GR(is)->type) {
703 		    case T_94:
704 		    case T_96:
705 		    case T_128:
706 			WRITE_1_P_S(LS1, i, LS0);
707 			break;
708 		    case T_9494:
709 		    case T_9696:
710 			WRITE_2_P_S(LS1, i, LS0);
711 			break;
712 		    case T_94192:
713 			WRITE_2_P_S(LS1, i, LS0);
714 			break;
715 		    default:
716 			abort();
717 			/* NOTREACHED */
718 		    }
719 		    continue;
720 		}
721 	    }
722 #undef WRITE_1
723 #undef WRITE_2
724 #undef WRITE_1_P
725 #undef WRITE_1_P_7bit
726 #undef WRITE_1_P_8bit
727 #undef WRITE_2_P
728 #undef WRITE_2_P_7bit
729 #undef WRITE_2_P_8bit
730 	}
731     }
732 }
733 
734 #define PAIR(a,b) ((unsigned) ((a) << 8) | (b))
735 
736 void
copyOut(Iso2022Ptr is,int fd,unsigned char * buf,unsigned count)737 copyOut(Iso2022Ptr is, int fd, unsigned char *buf, unsigned count)
738 {
739     unsigned char *s = buf;
740 
741     if (ilog >= 0)
742 	IGNORE_RC(write(ilog, buf, (size_t) count));
743 
744     while (s < buf + count) {
745 	switch (is->parserState) {
746 	case P_NORMAL:
747 	  resynch:
748 	    if (is->buffered_ku < 0) {
749 		if (*s == ESC) {
750 		    buffer(is, *s++);
751 		    is->parserState = P_ESC;
752 		} else if (OTHER(is) != NULL
753 			   && OTHER(is)->other_recode != NULL
754 			   && OTHER(is)->other_stack != NULL
755 			   && OTHER(is)->other_aux != NULL) {
756 		    int c = OTHER(is)->other_stack(*s, OTHER(is)->other_aux);
757 		    if (c >= 0) {
758 			unsigned ucode = (unsigned) c;
759 			outbufUTF8(is, fd,
760 				   OTHER(is)->other_recode(ucode, OTHER(is)->other_aux));
761 			is->shiftState = S_NORMAL;
762 		    }
763 		    s++;
764 		} else if (*s == CSI && CHARSET_REGULAR(GR(is))) {
765 		    buffer(is, *s++);
766 		    is->parserState = P_CSI;
767 		} else if ((*s == SS2 ||
768 			    *s == SS3 ||
769 			    *s == LS0 ||
770 			    *s == LS1) &&
771 			   CHARSET_REGULAR(GR(is))) {
772 		    buffer(is, *s++);
773 		    terminate(is, fd);
774 		    is->parserState = P_NORMAL;
775 		} else if (*s <= 0x20 && is->shiftState == S_NORMAL) {
776 		    /* Pass through C0 when GL is not regular */
777 		    outbufOne(is, fd, *s);
778 		    s++;
779 		} else {
780 		    const CharsetRec *charset;
781 		    unsigned char code = 0;
782 		    if (*s <= 0x7F) {
783 			switch (is->shiftState) {
784 			case S_NORMAL:
785 			    charset = GL(is);
786 			    break;
787 			case S_SS2:
788 			    charset = G2(is);
789 			    break;
790 			case S_SS3:
791 			    charset = G3(is);
792 			    break;
793 			default:
794 			    abort();
795 			    /* NOTREACHED */
796 			}
797 			code = *s;
798 		    } else {
799 			switch (is->shiftState) {
800 			case S_NORMAL:
801 			    charset = GR(is);
802 			    break;
803 			case S_SS2:
804 			    charset = G2(is);
805 			    break;
806 			case S_SS3:
807 			    charset = G3(is);
808 			    break;
809 			default:
810 			    abort();
811 			    /* NOTREACHED */
812 			}
813 			code = UChar(*s - 0x80);
814 		    }
815 
816 		    switch (charset->type) {
817 		    case T_94:
818 			if (code >= 0x21 && code <= 0x7E)
819 			    outbufUTF8(is, fd, charset->recode(code, charset));
820 			else
821 			    outbufUTF8(is, fd, *s);
822 			s++;
823 			is->shiftState = S_NORMAL;
824 			break;
825 		    case T_96:
826 			if (code >= 0x20)
827 			    outbufUTF8(is, fd, charset->recode(code, charset));
828 			else
829 			    outbufUTF8(is, fd, *s);
830 			is->shiftState = S_NORMAL;
831 			s++;
832 			break;
833 		    case T_128:
834 			outbufUTF8(is, fd, charset->recode(code, charset));
835 			is->shiftState = S_NORMAL;
836 			s++;
837 			break;
838 		    default:
839 			/* First byte of a multibyte sequence */
840 			is->buffered_ku = *s;
841 			s++;
842 		    }
843 		}
844 	    } else {		/* buffered_ku */
845 		const CharsetRec *charset;
846 		unsigned char ku_code;
847 		unsigned code = 0;
848 		if (is->buffered_ku <= 0x7F) {
849 		    switch (is->shiftState) {
850 		    case S_NORMAL:
851 			charset = GL(is);
852 			break;
853 		    case S_SS2:
854 			charset = G2(is);
855 			break;
856 		    case S_SS3:
857 			charset = G3(is);
858 			break;
859 		    default:
860 			abort();
861 			/* NOTREACHED */
862 		    }
863 		    ku_code = UChar(is->buffered_ku);
864 		    if (*s < 0x80)
865 			code = *s;
866 		} else {
867 		    switch (is->shiftState) {
868 		    case S_NORMAL:
869 			charset = GR(is);
870 			break;
871 		    case S_SS2:
872 			charset = G2(is);
873 			break;
874 		    case S_SS3:
875 			charset = G3(is);
876 			break;
877 		    default:
878 			abort();
879 			/* NOTREACHED */
880 		    }
881 		    ku_code = UChar(is->buffered_ku - 0x80);
882 		    if (*s >= 0x80)
883 			code = UChar(*s - 0x80);
884 		}
885 		switch (charset->type) {
886 		case T_94:
887 		case T_96:
888 		case T_128:
889 		    abort();
890 		    /* NOTREACHED */
891 		    break;
892 		case T_9494:
893 		    if (code >= 0x21 && code <= 0x7E) {
894 			outbufUTF8(is, fd,
895 				   charset->recode(PAIR(ku_code, code), charset));
896 			is->buffered_ku = -1;
897 			is->shiftState = S_NORMAL;
898 		    } else {
899 			is->buffered_ku = -1;
900 			is->shiftState = S_NORMAL;
901 			goto resynch;
902 		    }
903 		    s++;
904 		    break;
905 		case T_9696:
906 		    if (code >= 0x20) {
907 			outbufUTF8(is, fd,
908 				   charset->recode(PAIR(ku_code, code), charset));
909 			is->buffered_ku = -1;
910 			is->shiftState = S_NORMAL;
911 		    } else {
912 			is->buffered_ku = -1;
913 			is->shiftState = S_NORMAL;
914 			goto resynch;
915 		    }
916 		    s++;
917 		    break;
918 		case T_94192:
919 		    /* Use *s, not code */
920 		    if (((*s >= 0x21) && (*s <= 0x7E)) ||
921 			((*s >= 0xA1) && (*s <= 0xFE))) {
922 			unsigned ucode = PAIR(ku_code, *s);
923 			outbufUTF8(is, fd,
924 				   charset->recode(ucode, charset));
925 			is->buffered_ku = -1;
926 			is->shiftState = S_NORMAL;
927 		    } else {
928 			is->buffered_ku = -1;
929 			is->shiftState = S_NORMAL;
930 			goto resynch;
931 		    }
932 		    s++;
933 		    break;
934 		default:
935 		    abort();
936 		    /* NOTREACHED */
937 		}
938 	    }
939 	    break;
940 	case P_ESC:
941 	    assert(is->buffered_ku == -1);
942 	    if (*s == CSI_7) {
943 		buffer(is, *s++);
944 		is->parserState = P_CSI;
945 	    } else if (IS_FINAL_ESC(*s)) {
946 		buffer(is, *s++);
947 		terminate(is, fd);
948 		is->parserState = P_NORMAL;
949 	    } else {
950 		buffer(is, *s++);
951 	    }
952 	    break;
953 	case P_CSI:
954 	    if (IS_FINAL_CSI(*s)) {
955 		buffer(is, *s++);
956 		terminate(is, fd);
957 		is->parserState = P_NORMAL;
958 	    } else {
959 		buffer(is, *s++);
960 	    }
961 	    break;
962 	default:
963 	    abort();
964 	    /* NOTREACHED */
965 	}
966     }
967     outbuf_flush(is, fd);
968 }
969 
970 static void
terminate(Iso2022Ptr is,int fd)971 terminate(Iso2022Ptr is, int fd)
972 {
973     if (is->outputFlags & OF_PASSTHRU) {
974 	outbuf_buffered(is, fd);
975 	return;
976     }
977 
978     switch (is->buffered[0]) {
979     case SS2:
980 	if (is->outputFlags & OF_SS)
981 	    is->shiftState = S_SS2;
982 	discard_buffered(is);
983 	return;
984     case SS3:
985 	if (is->outputFlags & OF_SS)
986 	    is->shiftState = S_SS3;
987 	discard_buffered(is);
988 	return;
989     case LS0:
990 	if (is->outputFlags & OF_LS)
991 	    is->glp = &G0(is);
992 	discard_buffered(is);
993 	return;
994     case LS1:
995 	if (is->outputFlags & OF_LS)
996 	    is->glp = &G1(is);
997 	discard_buffered(is);
998 	return;
999     case ESC:
1000 	assert(is->buffered_count >= 2);
1001 	switch (is->buffered[1]) {
1002 	case SS2_7:
1003 	    if (is->outputFlags & OF_SS)
1004 		is->shiftState = S_SS2;
1005 	    discard_buffered(is);
1006 	    return;
1007 	case SS3_7:
1008 	    if (is->outputFlags & OF_SS)
1009 		is->shiftState = S_SS3;
1010 	    discard_buffered(is);
1011 	    return;
1012 	case LS2_7:
1013 	    if (is->outputFlags & OF_SS)
1014 		is->glp = &G2(is);
1015 	    discard_buffered(is);
1016 	    return;
1017 	case LS3_7:
1018 	    if (is->outputFlags & OF_LS)
1019 		is->glp = &G3(is);
1020 	    discard_buffered(is);
1021 	    return;
1022 	case LS1R_7:
1023 	    if (is->outputFlags & OF_LS)
1024 		is->grp = &G1(is);
1025 	    discard_buffered(is);
1026 	    return;
1027 	case LS2R_7:
1028 	    if (is->outputFlags & OF_LS)
1029 		is->grp = &G2(is);
1030 	    discard_buffered(is);
1031 	    return;
1032 	case LS3R_7:
1033 	    if (is->outputFlags & OF_LS)
1034 		is->grp = &G3(is);
1035 	    discard_buffered(is);
1036 	    return;
1037 	default:
1038 	    terminateEsc(is, fd,
1039 			 is->buffered + 1,
1040 			 (unsigned) (is->buffered_count - 1));
1041 	    break;
1042 	}
1043 	return;
1044     default:
1045 	outbuf_buffered(is, fd);
1046     }
1047 }
1048 
1049 static void
terminateEsc(Iso2022Ptr is,int fd,unsigned char * s_start,unsigned count)1050 terminateEsc(Iso2022Ptr is, int fd, unsigned char *s_start, unsigned count)
1051 {
1052     const CharsetRec *charset;
1053 
1054     /* ISO 2022 doesn't allow 2C, but Emacs/MULE uses it in 7-bit
1055        mode */
1056 
1057     if ((s_start[0] == 0x28 || s_start[0] == 0x29 ||
1058 	 s_start[0] == 0x2A || s_start[0] == 0x2B ||
1059 	 s_start[0] == 0x2C || s_start[0] == 0x2D ||
1060 	 s_start[0] == 0x2E || s_start[0] == 0x2F) &&
1061 	count >= 2) {
1062 	if (is->outputFlags & OF_SELECT) {
1063 	    if (s_start[0] <= 0x2B)
1064 		charset = getCharset(s_start[1], T_94);
1065 	    else
1066 		charset = getCharset(s_start[1], T_96);
1067 	    switch (s_start[0]) {
1068 	    case 0x28:
1069 	    case 0x2C:
1070 		G0(is) = charset;
1071 		break;
1072 	    case 0x29:
1073 	    case 0x2D:
1074 		G1(is) = charset;
1075 		break;
1076 	    case 0x2A:
1077 	    case 0x2E:
1078 		G2(is) = charset;
1079 		break;
1080 	    case 0x2B:
1081 	    case 0x2F:
1082 		G3(is) = charset;
1083 		break;
1084 	    }
1085 	}
1086 	discard_buffered(is);
1087     } else if (s_start[0] == 0x24 && count == 2) {
1088 	if (is->outputFlags & OF_SELECT) {
1089 	    charset = getCharset(s_start[1], T_9494);
1090 	    G0(is) = charset;
1091 	}
1092 	discard_buffered(is);
1093     } else if (s_start[0] == 0x24 && count >= 2 &&
1094 	       (s_start[1] == 0x28 || s_start[1] == 0x29 ||
1095 		s_start[1] == 0x2A || s_start[1] == 0x2B ||
1096 		s_start[1] == 0x2D || s_start[1] == 0x2E ||
1097 		s_start[1] == 0x2F) &&
1098 	       count >= 3) {
1099 	if (is->outputFlags & OF_SELECT) {
1100 	    if (s_start[1] <= 0x2B)
1101 		charset = getCharset(s_start[2], T_9494);
1102 	    else
1103 		charset = getCharset(s_start[2], T_9696);
1104 	    switch (s_start[1]) {
1105 	    case 0x28:
1106 		G0(is) = charset;
1107 		break;
1108 	    case 0x29:
1109 	    case 0x2D:
1110 		G1(is) = charset;
1111 		break;
1112 	    case 0x2A:
1113 	    case 0x2E:
1114 		G2(is) = charset;
1115 		break;
1116 	    case 0x2B:
1117 	    case 0x2F:
1118 		G3(is) = charset;
1119 		break;
1120 	    }
1121 	}
1122 	discard_buffered(is);
1123     } else
1124 	outbuf_buffered(is, fd);
1125 }
1126 
1127 #ifdef NO_LEAKS
1128 void
iso2022_leaks(void)1129 iso2022_leaks(void)
1130 {
1131 }
1132 #endif
1133