1 /* $XTermId: iso2022.c,v 1.40 2018/06/27 20:41:53 tom Exp $ */
2
3 /*
4 Copyright 2011-2013,2018 by Thomas E. Dickey
5 Copyright (c) 2001 by Juliusz Chroboczek
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 THE SOFTWARE.
24 */
25
26 #include <iso2022.h>
27
28 #include <unistd.h>
29 #include <errno.h>
30
31 #include <sys.h>
32
33 #define BUFFERED_INPUT_SIZE 4
34 static unsigned char buffered_input[BUFFERED_INPUT_SIZE];
35 static int buffered_input_count = 0;
36
37 static void terminateEsc(Iso2022Ptr, int, unsigned char *, unsigned);
38 static void terminate(Iso2022Ptr, int);
39
40 #define OUTBUF_FREE(is, count) ((is)->outbuf_count + (count) <= BUFFER_SIZE)
41 #define OUTBUF_MAKE_FREE(is, fd, count) \
42 if(!OUTBUF_FREE((is), (count))) outbuf_flush((is), (fd))
43
44 #ifdef OPT_TRACE
45 static void
trace_charset(const char * tag,const CharsetRec * ptr)46 trace_charset(const char *tag, const CharsetRec * ptr)
47 {
48 if (ptr != NULL) {
49 TRACE(("%s:", NonNull(tag)));
50 TRACE((" name:%s", NonNull(ptr->name)));
51 TRACE((" type:%d", ptr->type));
52 if (ptr->final)
53 TRACE((" final:%c", ptr->final));
54 if (ptr->data != NULL)
55 TRACE((" data"));
56 if (ptr->recode != NULL)
57 TRACE((" recode"));
58 if (ptr->reverse != NULL)
59 TRACE((" reverse"));
60 if (ptr->other_stack != NULL)
61 TRACE((" other_stack"));
62 if (ptr->other_aux != NULL)
63 TRACE((" other_aux"));
64 if (ptr->other_recode != NULL)
65 TRACE((" other_recode"));
66 if (ptr->other_reverse != NULL)
67 TRACE((" other_reverse"));
68 TRACE(("\n"));
69 }
70 }
71
72 static void
trace_iso2022(const char * tag,const Iso2022Ptr ptr)73 trace_iso2022(const char *tag, const Iso2022Ptr ptr)
74 {
75 TRACE(("%s:\n", NonNull(tag)));
76 trace_charset("\tGL()", GL(ptr));
77 trace_charset("\tGR()", GR(ptr));
78 trace_charset("\tG0()", G0(ptr));
79 trace_charset("\tG1()", G1(ptr));
80 trace_charset("\tG2()", G2(ptr));
81 trace_charset("\tG3()", G3(ptr));
82 trace_charset("\tOTHER()", OTHER(ptr));
83 }
84
85 #else
86 #define trace_iso2022(tag, ptr) /* nothing */
87 #endif
88
89 static void
outbuf_flush(Iso2022Ptr is,int fd)90 outbuf_flush(Iso2022Ptr is, int fd)
91 {
92 int rc;
93 unsigned i = 0;
94
95 if (olog >= 0)
96 IGNORE_RC(write(olog, is->outbuf, is->outbuf_count));
97
98 while (i < is->outbuf_count) {
99 rc = (int) write(fd, is->outbuf + i, is->outbuf_count - i);
100 if (rc > 0) {
101 i += (unsigned) rc;
102 } else {
103 if (rc < 0 && errno == EINTR)
104 continue;
105 else if ((rc == 0) || ((rc < 0) && (errno == EAGAIN))) {
106 if (waitForOutput(fd) == IO_Closed)
107 break;
108 continue;
109 } else
110 break;
111 }
112 }
113 is->outbuf_count = 0;
114 }
115
116 static void
outbufOne(Iso2022Ptr is,int fd,unsigned c)117 outbufOne(Iso2022Ptr is, int fd, unsigned c)
118 {
119 OUTBUF_MAKE_FREE(is, fd, 1);
120 is->outbuf[is->outbuf_count++] = UChar(c);
121 }
122
123 /* Discards null codepoints */
124 static void
outbufUTF8(Iso2022Ptr is,int fd,unsigned c)125 outbufUTF8(Iso2022Ptr is, int fd, unsigned c)
126 {
127 if (c == 0)
128 return;
129
130 if (c <= 0x7F) {
131 OUTBUF_MAKE_FREE(is, fd, 1);
132 is->outbuf[is->outbuf_count++] = UChar(c);
133 } else if (c <= 0x7FF) {
134 OUTBUF_MAKE_FREE(is, fd, 2);
135 is->outbuf[is->outbuf_count++] = UChar(0xC0 | ((c >> 6) & 0x1F));
136 is->outbuf[is->outbuf_count++] = UChar(0x80 | (c & 0x3F));
137 } else {
138 OUTBUF_MAKE_FREE(is, fd, 3);
139 is->outbuf[is->outbuf_count++] = UChar(0xE0 | ((c >> 12) & 0x0F));
140 is->outbuf[is->outbuf_count++] = UChar(0x80 | ((c >> 6) & 0x3F));
141 is->outbuf[is->outbuf_count++] = UChar(0x80 | (c & 0x3F));
142 }
143 }
144
145 static void
buffer(Iso2022Ptr is,unsigned c)146 buffer(Iso2022Ptr is, unsigned c)
147 {
148 if (is->buffered == NULL) {
149 is->buffered_len = 10;
150 is->buffered = malloc(is->buffered_len);
151 if (is->buffered == NULL)
152 FatalError("Couldn't allocate buffered.\n");
153 }
154
155 if (is->buffered_count >= is->buffered_len) {
156 is->buffered = realloc(is->buffered, 2 * is->buffered_len + 1);
157 if (is->buffered == NULL) {
158 FatalError("Couldn't grow buffered.\n");
159 }
160 is->buffered_len = 2 * is->buffered_len + 1;
161 }
162
163 is->buffered[is->buffered_count++] = UChar(c);
164 }
165
166 static void
outbuf_buffered_carefully(Iso2022Ptr is,int fd)167 outbuf_buffered_carefully(Iso2022Ptr is, int fd)
168 {
169 /* This should never happen in practice */
170 unsigned i = 0;
171
172 while (i < is->buffered_count) {
173 OUTBUF_MAKE_FREE(is, fd, 1);
174 is->outbuf[is->outbuf_count++] = is->buffered[i++];
175 }
176 is->buffered_count = 0;
177 }
178
179 static void
outbuf_buffered(Iso2022Ptr is,int fd)180 outbuf_buffered(Iso2022Ptr is, int fd)
181 {
182 if (is->buffered_count > BUFFER_SIZE)
183 outbuf_buffered_carefully(is, fd);
184
185 OUTBUF_MAKE_FREE(is, fd, is->buffered_count);
186 memcpy(is->outbuf + is->outbuf_count, is->buffered, is->buffered_count);
187 is->outbuf_count += is->buffered_count;
188 is->buffered_count = 0;
189 }
190
191 static void
discard_buffered(Iso2022Ptr is)192 discard_buffered(Iso2022Ptr is)
193 {
194 is->buffered_count = 0;
195 }
196
197 Iso2022Ptr
allocIso2022(void)198 allocIso2022(void)
199 {
200 Iso2022Ptr is;
201 is = TypeCalloc(Iso2022Rec);
202 if (!is)
203 return NULL;
204 is->glp = is->grp = NULL;
205 G0(is) = G1(is) = G2(is) = G3(is) = OTHER(is) = NULL;
206
207 is->parserState = P_NORMAL;
208 is->shiftState = S_NORMAL;
209
210 is->inputFlags = IF_EIGHTBIT | IF_SS | IF_SSGR;
211 is->outputFlags = OF_SS | OF_LS | OF_SELECT;
212
213 is->buffered = NULL;
214 is->buffered_len = 0;
215 is->buffered_count = 0;
216
217 is->buffered_ku = -1;
218
219 is->outbuf = malloc((size_t) BUFFER_SIZE);
220 if (!is->outbuf) {
221 free(is);
222 return NULL;
223 }
224 is->outbuf_count = 0;
225
226 return is;
227 }
228
229 #ifdef NO_LEAKS
230 void
destroyIso2022(Iso2022Ptr is)231 destroyIso2022(Iso2022Ptr is)
232 {
233 if (is->buffered)
234 free(is->buffered);
235 if (is->outbuf)
236 free(is->outbuf);
237 free(is);
238 }
239 #endif
240
241 static int
identifyCharset(Iso2022Ptr i,const CharsetRec ** p)242 identifyCharset(Iso2022Ptr i, const CharsetRec * *p)
243 {
244 if (p == &G0(i)) {
245 return 0;
246 } else if (p == &G1(i)) {
247 return 1;
248 } else if (p == &G2(i)) {
249 return 2;
250 } else if (p == &G3(i)) {
251 return 3;
252 } else {
253 abort();
254 /* NOTREACHED */
255 }
256 }
257
258 #define G_name(n) ((i != 0 && i->g[n] != 0) ? NonNull(i->g[n]->name) : "unset")
259
260 void
reportIso2022(const char * tag,Iso2022Ptr i)261 reportIso2022(const char *tag, Iso2022Ptr i)
262 {
263 Message("%s: ", tag);
264 if (OTHER(i) != NULL) {
265 Message("%s, non-ISO-2022 encoding.\n", OTHER(i)->name);
266 return;
267 }
268 Message("G0 is %s, ", G_name(0));
269 Message("G1 is %s, ", G_name(1));
270 Message("G2 is %s, ", G_name(2));
271 Message("G3 is %s.\n", G_name(3));
272 Message("GL is G%d, ", identifyCharset(i, i->glp));
273 Message("GR is G%d.\n", identifyCharset(i, i->grp));
274 }
275
276 int
initIso2022(const char * locale,const char * charset,Iso2022Ptr i)277 initIso2022(const char *locale, const char *charset, Iso2022Ptr i)
278 {
279 int gl = 0, gr = 2;
280 const CharsetRec *g0 = NULL;
281 const CharsetRec *g1 = NULL;
282 const CharsetRec *g2 = NULL;
283 const CharsetRec *g3 = NULL;
284 const CharsetRec *other = NULL;
285 int rc;
286
287 TRACE(("initIso2022(locale=%s, charset=%s)\n", NonNull(locale), NonNull(charset)));
288 rc = getLocaleState(locale, charset, &gl, &gr, &g0, &g1, &g2, &g3, &other);
289 if (rc < 0) {
290 if (charset) {
291 Warning("couldn't find charset %s; "
292 "using ISO 8859-1.\n", charset);
293 } else if (ignore_locale) {
294 Warning("couldn't find charset data for %s; "
295 "using ISO 8859-1.\n", locale);
296 } else {
297 Warning("couldn't find charset data for locale %s; "
298 "using ISO 8859-1.\n", locale);
299 }
300 }
301
302 if (G0(i) == NULL) {
303 if (g0)
304 G0(i) = g0;
305 else
306 G0(i) = getCharsetByName("ASCII");
307 }
308
309 if (G1(i) == NULL) {
310 if (g1)
311 G1(i) = g1;
312 else
313 G1(i) = getUnknownCharset(T_94);
314 }
315
316 if (G2(i) == NULL) {
317 if (g2)
318 G2(i) = g2;
319 else
320 G2(i) = getCharsetByName("ISO 8859-1");
321 }
322
323 if (G3(i) == NULL) {
324 if (g3)
325 G3(i) = g3;
326 else
327 G3(i) = getUnknownCharset(T_94);
328 }
329
330 if (OTHER(i) == NULL) {
331 if (other)
332 OTHER(i) = other;
333 else
334 OTHER(i) = NULL;
335 }
336
337 if (i->glp == NULL) {
338 i->glp = &i->g[gl];
339 }
340
341 if (i->grp == NULL) {
342 i->grp = &i->g[gr];
343 }
344 trace_iso2022("...initIso2022", i);
345 return 0;
346 }
347
348 int
mergeIso2022(Iso2022Ptr d,Iso2022Ptr s)349 mergeIso2022(Iso2022Ptr d, Iso2022Ptr s)
350 {
351 if (G0(d) == NULL)
352 G0(d) = G0(s);
353 if (G1(d) == NULL)
354 G1(d) = G1(s);
355 if (G2(d) == NULL)
356 G2(d) = G2(s);
357 if (G3(d) == NULL)
358 G3(d) = G3(s);
359 if (OTHER(d) == NULL)
360 OTHER(d) = OTHER(s);
361 if (d->glp == NULL)
362 d->glp = &(d->g[identifyCharset(s, s->glp)]);
363 if (d->grp == NULL)
364 d->grp = &(d->g[identifyCharset(s, s->grp)]);
365 trace_iso2022("...mergeIso2022", d);
366 return 0;
367 }
368
369 static int
utf8Count(unsigned c)370 utf8Count(unsigned c)
371 {
372 /* All return values must be less than BUFFERED_INPUT_SIZE */
373 if ((c & 0x80) == 0)
374 return 1;
375 else if ((c & 0x40) == 0)
376 return 1; /* incorrect UTF-8 */
377 else if ((c & 0x60) == 0x40)
378 return 2;
379 else if ((c & 0x70) == 0x60)
380 return 3;
381 else if ((c & 0x78) == 0x70)
382 return 4;
383 else
384 return 1;
385 }
386
387 static int
fromUtf8(unsigned char * b)388 fromUtf8(unsigned char *b)
389 {
390 if ((b[0] & 0x80) == 0)
391 return b[0];
392 else if ((b[0] & 0x40) == 0)
393 return -1; /* incorrect UTF-8 */
394 else if ((b[0] & 0x60) == 0x40)
395 return ((b[0] & 0x1F) << 6) | (b[1] & 0x3F);
396 else if ((b[0] & 0x70) == 0x60)
397 return (((b[0] & 0x0F) << 12) |
398 ((b[1] & 0x3F) << 6) |
399 ((b[2] & 0x3F)));
400 else if ((b[0] & 0x78) == 0x70)
401 return (((b[0] & 0x03) << 18) |
402 ((b[1] & 0x3F) << 12) |
403 ((b[2] & 0x3F) << 6) |
404 ((b[3] & 0x3F)));
405 else
406 return -1;
407 }
408
409 void
copyIn(Iso2022Ptr is,int fd,unsigned char * buf,int count)410 copyIn(Iso2022Ptr is, int fd, unsigned char *buf, int count)
411 {
412 unsigned char *c;
413 int codepoint, rem;
414
415 c = buf;
416 rem = count;
417
418 #define NEXT do {c++; rem--;} while(0)
419
420 while (rem > 0) {
421 codepoint = -1;
422 if (is->parserState == P_ESC) {
423 assert(buffered_input_count == 0);
424 codepoint = *c;
425 NEXT;
426 if (*c == CSI_7)
427 is->parserState = P_CSI;
428 else if (IS_FINAL_ESC(codepoint))
429 is->parserState = P_NORMAL;
430 } else if (is->parserState == P_CSI) {
431 assert(buffered_input_count == 0);
432 codepoint = *c;
433 NEXT;
434 if (IS_FINAL_CSI(codepoint))
435 is->parserState = P_NORMAL;
436 } else if (!(*c & 0x80)) {
437 if (buffered_input_count > 0) {
438 buffered_input_count = 0;
439 continue;
440 } else {
441 codepoint = *c;
442 NEXT;
443 if (codepoint == ESC)
444 is->parserState = P_ESC;
445 }
446 } else if ((*c & 0x40)) {
447 if (buffered_input_count > 0) {
448 buffered_input_count = 0;
449 continue;
450 } else {
451 buffered_input[buffered_input_count] = *c;
452 buffered_input_count++;
453 NEXT;
454 }
455 } else {
456 if (buffered_input_count <= 0) {
457 buffered_input_count = 0;
458 NEXT;
459 continue;
460 } else {
461 buffered_input[buffered_input_count] = *c;
462 buffered_input_count++;
463 NEXT;
464 if (buffered_input_count >= utf8Count(buffered_input[0])) {
465 codepoint = fromUtf8(buffered_input);
466 buffered_input_count = 0;
467 if (codepoint == CSI)
468 is->parserState = P_CSI;
469 }
470 }
471 }
472 #undef NEXT
473
474 if (codepoint >= 0) {
475 int i;
476 unsigned ucode = (unsigned) codepoint;
477 unsigned char obuf[4];
478
479 #define WRITE_1(i) do { \
480 obuf[0] = UChar(i); \
481 IGNORE_RC(write(fd, obuf, (size_t) 1)); \
482 } while(0)
483 #define WRITE_2(i) do { \
484 obuf[0] = UChar(((i) >> 8) & 0xFF); \
485 obuf[1] = UChar((i) & 0xFF); \
486 IGNORE_RC(write(fd, obuf, (size_t) 2)); \
487 } while(0)
488
489 #define WRITE_3(i) do { \
490 obuf[0] = UChar(((i) >> 16) & 0xFF); \
491 obuf[1] = UChar(((i) >> 8) & 0xFF); \
492 obuf[2] = UChar((i) & 0xFF); \
493 IGNORE_RC(write(fd, obuf, (size_t) 3)); \
494 } while(0)
495
496 #define WRITE_4(i) do { \
497 obuf[0] = UChar(((i) >> 24) & 0xFF); \
498 obuf[1] = UChar(((i) >> 16) & 0xFF); \
499 obuf[2] = UChar(((i) >> 8) & 0xFF); \
500 obuf[3] = UChar((i) & 0xFF); \
501 IGNORE_RC(write(fd, obuf, (size_t) 4)); \
502 } while(0)
503
504 #define WRITE_1_P_8bit(p, i) { \
505 obuf[0] = UChar(p); \
506 obuf[1] = UChar(i); \
507 IGNORE_RC(write(fd, obuf, (size_t) 2)); \
508 }
509
510 #define WRITE_1_P_7bit(p, i) { \
511 obuf[0] = ESC; \
512 obuf[1] = UChar((p) - 0x40); \
513 obuf[2] = UChar(i); \
514 IGNORE_RC(write(fd, obuf, (size_t) 3)); \
515 }
516
517 #define WRITE_1_P(p,i) do { \
518 if(is->inputFlags & IF_EIGHTBIT) \
519 WRITE_1_P_8bit(p,i) else \
520 WRITE_1_P_7bit(p,i) \
521 } while(0)
522
523 #define WRITE_2_P_8bit(p, i) { \
524 obuf[0] = UChar(p); \
525 obuf[1] = UChar(((i) >> 8) & 0xFF); \
526 obuf[2] = UChar((i) & 0xFF); \
527 IGNORE_RC(write(fd, obuf, (size_t) 3)); \
528 }
529
530 #define WRITE_2_P_7bit(p, i) { \
531 obuf[0] = ESC; \
532 obuf[1] = UChar((p) - 0x40); \
533 obuf[2] = UChar(((i) >> 8) & 0xFF); \
534 obuf[3] = UChar((i) & 0xFF); \
535 IGNORE_RC(write(fd, obuf, (size_t) 4)); \
536 }
537
538 #define WRITE_2_P(p,i) do { \
539 if(is->inputFlags & IF_EIGHTBIT) \
540 WRITE_2_P_8bit(p,i) \
541 else \
542 WRITE_2_P_7bit(p,i) \
543 } while(0)
544
545 #define WRITE_1_P_S(p,i,s) do { \
546 obuf[0] = UChar(p); \
547 obuf[1] = UChar((i) & 0xFF); \
548 obuf[2] = UChar(s); \
549 IGNORE_RC(write(fd, obuf, (size_t) 3)); \
550 } while(0)
551
552 #define WRITE_2_P_S(p,i,s) do { \
553 obuf[0] = UChar(p); \
554 obuf[1] = UChar(((i) >> 8) & 0xFF); \
555 obuf[2] = UChar((i) & 0xFF); \
556 obuf[3] = UChar(s); \
557 IGNORE_RC(write(fd, obuf, (size_t) 4)); \
558 } while(0)
559
560 if (ucode < 0x20 ||
561 (OTHER(is) == NULL && CHARSET_REGULAR(GR(is)) &&
562 (ucode >= 0x80 && ucode < 0xA0))) {
563 WRITE_1(ucode);
564 continue;
565 }
566 if (OTHER(is) != NULL
567 && OTHER(is)->other_reverse != NULL) {
568 unsigned int c2;
569 c2 = OTHER(is)->other_reverse(ucode, OTHER(is)->other_aux);
570 if (c2 >> 24)
571 WRITE_4(c2);
572 else if (c2 >> 16)
573 WRITE_3(c2);
574 else if (c2 >> 8)
575 WRITE_2(c2);
576 else if (c2)
577 WRITE_1(c2);
578 continue;
579 }
580 i = (GL(is)->reverse) (ucode, GL(is));
581 if (i >= 0) {
582 switch (GL(is)->type) {
583 case T_94:
584 case T_96:
585 case T_128:
586 if (i >= 0x20)
587 WRITE_1(i);
588 break;
589 case T_9494:
590 case T_9696:
591 case T_94192:
592 if (i >= 0x2020)
593 WRITE_2(i);
594 break;
595 default:
596 abort();
597 /* NOTREACHED */
598 }
599 continue;
600 }
601 if (is->inputFlags & IF_EIGHTBIT) {
602 i = GR(is)->reverse(ucode, GR(is));
603 if (i >= 0) {
604 switch (GR(is)->type) {
605 case T_94:
606 case T_96:
607 case T_128:
608 /* we allow C1 characters if T_128 in GR */
609 WRITE_1(i | 0x80);
610 break;
611 case T_9494:
612 case T_9696:
613 WRITE_2(i | 0x8080);
614 break;
615 case T_94192:
616 WRITE_2(i | 0x8000);
617 break;
618 default:
619 abort();
620 /* NOTREACHED */
621 }
622 continue;
623 }
624 }
625 if (is->inputFlags & IF_SS) {
626 i = G2(is)->reverse(ucode, G2(is));
627 if (i >= 0) {
628 switch (GR(is)->type) {
629 case T_94:
630 case T_96:
631 case T_128:
632 if (i >= 0x20) {
633 if ((is->inputFlags & IF_EIGHTBIT) &&
634 (is->inputFlags & IF_SSGR))
635 i |= 0x80;
636 WRITE_1_P(SS2, i);
637 }
638 break;
639 case T_9494:
640 case T_9696:
641 if (i >= 0x2020) {
642 if ((is->inputFlags & IF_EIGHTBIT) &&
643 (is->inputFlags & IF_SSGR))
644 i |= 0x8080;
645 WRITE_2_P(SS2, i);
646 }
647 break;
648 case T_94192:
649 if (i >= 0x2020) {
650 if ((is->inputFlags & IF_EIGHTBIT) &&
651 (is->inputFlags & IF_SSGR))
652 i |= 0x8000;
653 WRITE_2_P(SS2, i);
654 }
655 break;
656 default:
657 abort();
658 /* NOTREACHED */
659 }
660 continue;
661 }
662 }
663 if (is->inputFlags & IF_SS) {
664 i = G3(is)->reverse(ucode, G3(is));
665 switch (GR(is)->type) {
666 case T_94:
667 case T_96:
668 case T_128:
669 if (i >= 0x20) {
670 if ((is->inputFlags & IF_EIGHTBIT) &&
671 (is->inputFlags & IF_SSGR))
672 i |= 0x80;
673 WRITE_1_P(SS3, i);
674 }
675 break;
676 case T_9494:
677 case T_9696:
678 if (i >= 0x2020) {
679 if ((is->inputFlags & IF_EIGHTBIT) &&
680 (is->inputFlags & IF_SSGR))
681 i |= 0x8080;
682 WRITE_2_P(SS3, i);
683 }
684 break;
685 case T_94192:
686 if (i >= 0x2020) {
687 if ((is->inputFlags & IF_EIGHTBIT) &&
688 (is->inputFlags & IF_SSGR))
689 i |= 0x8000;
690 WRITE_2_P(SS3, i);
691 }
692 break;
693 default:
694 abort();
695 /* NOTREACHED */
696 }
697 continue;
698 }
699 if (is->inputFlags & IF_LS) {
700 i = GR(is)->reverse(ucode, GR(is));
701 if (i >= 0) {
702 switch (GR(is)->type) {
703 case T_94:
704 case T_96:
705 case T_128:
706 WRITE_1_P_S(LS1, i, LS0);
707 break;
708 case T_9494:
709 case T_9696:
710 WRITE_2_P_S(LS1, i, LS0);
711 break;
712 case T_94192:
713 WRITE_2_P_S(LS1, i, LS0);
714 break;
715 default:
716 abort();
717 /* NOTREACHED */
718 }
719 continue;
720 }
721 }
722 #undef WRITE_1
723 #undef WRITE_2
724 #undef WRITE_1_P
725 #undef WRITE_1_P_7bit
726 #undef WRITE_1_P_8bit
727 #undef WRITE_2_P
728 #undef WRITE_2_P_7bit
729 #undef WRITE_2_P_8bit
730 }
731 }
732 }
733
734 #define PAIR(a,b) ((unsigned) ((a) << 8) | (b))
735
736 void
copyOut(Iso2022Ptr is,int fd,unsigned char * buf,unsigned count)737 copyOut(Iso2022Ptr is, int fd, unsigned char *buf, unsigned count)
738 {
739 unsigned char *s = buf;
740
741 if (ilog >= 0)
742 IGNORE_RC(write(ilog, buf, (size_t) count));
743
744 while (s < buf + count) {
745 switch (is->parserState) {
746 case P_NORMAL:
747 resynch:
748 if (is->buffered_ku < 0) {
749 if (*s == ESC) {
750 buffer(is, *s++);
751 is->parserState = P_ESC;
752 } else if (OTHER(is) != NULL
753 && OTHER(is)->other_recode != NULL
754 && OTHER(is)->other_stack != NULL
755 && OTHER(is)->other_aux != NULL) {
756 int c = OTHER(is)->other_stack(*s, OTHER(is)->other_aux);
757 if (c >= 0) {
758 unsigned ucode = (unsigned) c;
759 outbufUTF8(is, fd,
760 OTHER(is)->other_recode(ucode, OTHER(is)->other_aux));
761 is->shiftState = S_NORMAL;
762 }
763 s++;
764 } else if (*s == CSI && CHARSET_REGULAR(GR(is))) {
765 buffer(is, *s++);
766 is->parserState = P_CSI;
767 } else if ((*s == SS2 ||
768 *s == SS3 ||
769 *s == LS0 ||
770 *s == LS1) &&
771 CHARSET_REGULAR(GR(is))) {
772 buffer(is, *s++);
773 terminate(is, fd);
774 is->parserState = P_NORMAL;
775 } else if (*s <= 0x20 && is->shiftState == S_NORMAL) {
776 /* Pass through C0 when GL is not regular */
777 outbufOne(is, fd, *s);
778 s++;
779 } else {
780 const CharsetRec *charset;
781 unsigned char code = 0;
782 if (*s <= 0x7F) {
783 switch (is->shiftState) {
784 case S_NORMAL:
785 charset = GL(is);
786 break;
787 case S_SS2:
788 charset = G2(is);
789 break;
790 case S_SS3:
791 charset = G3(is);
792 break;
793 default:
794 abort();
795 /* NOTREACHED */
796 }
797 code = *s;
798 } else {
799 switch (is->shiftState) {
800 case S_NORMAL:
801 charset = GR(is);
802 break;
803 case S_SS2:
804 charset = G2(is);
805 break;
806 case S_SS3:
807 charset = G3(is);
808 break;
809 default:
810 abort();
811 /* NOTREACHED */
812 }
813 code = UChar(*s - 0x80);
814 }
815
816 switch (charset->type) {
817 case T_94:
818 if (code >= 0x21 && code <= 0x7E)
819 outbufUTF8(is, fd, charset->recode(code, charset));
820 else
821 outbufUTF8(is, fd, *s);
822 s++;
823 is->shiftState = S_NORMAL;
824 break;
825 case T_96:
826 if (code >= 0x20)
827 outbufUTF8(is, fd, charset->recode(code, charset));
828 else
829 outbufUTF8(is, fd, *s);
830 is->shiftState = S_NORMAL;
831 s++;
832 break;
833 case T_128:
834 outbufUTF8(is, fd, charset->recode(code, charset));
835 is->shiftState = S_NORMAL;
836 s++;
837 break;
838 default:
839 /* First byte of a multibyte sequence */
840 is->buffered_ku = *s;
841 s++;
842 }
843 }
844 } else { /* buffered_ku */
845 const CharsetRec *charset;
846 unsigned char ku_code;
847 unsigned code = 0;
848 if (is->buffered_ku <= 0x7F) {
849 switch (is->shiftState) {
850 case S_NORMAL:
851 charset = GL(is);
852 break;
853 case S_SS2:
854 charset = G2(is);
855 break;
856 case S_SS3:
857 charset = G3(is);
858 break;
859 default:
860 abort();
861 /* NOTREACHED */
862 }
863 ku_code = UChar(is->buffered_ku);
864 if (*s < 0x80)
865 code = *s;
866 } else {
867 switch (is->shiftState) {
868 case S_NORMAL:
869 charset = GR(is);
870 break;
871 case S_SS2:
872 charset = G2(is);
873 break;
874 case S_SS3:
875 charset = G3(is);
876 break;
877 default:
878 abort();
879 /* NOTREACHED */
880 }
881 ku_code = UChar(is->buffered_ku - 0x80);
882 if (*s >= 0x80)
883 code = UChar(*s - 0x80);
884 }
885 switch (charset->type) {
886 case T_94:
887 case T_96:
888 case T_128:
889 abort();
890 /* NOTREACHED */
891 break;
892 case T_9494:
893 if (code >= 0x21 && code <= 0x7E) {
894 outbufUTF8(is, fd,
895 charset->recode(PAIR(ku_code, code), charset));
896 is->buffered_ku = -1;
897 is->shiftState = S_NORMAL;
898 } else {
899 is->buffered_ku = -1;
900 is->shiftState = S_NORMAL;
901 goto resynch;
902 }
903 s++;
904 break;
905 case T_9696:
906 if (code >= 0x20) {
907 outbufUTF8(is, fd,
908 charset->recode(PAIR(ku_code, code), charset));
909 is->buffered_ku = -1;
910 is->shiftState = S_NORMAL;
911 } else {
912 is->buffered_ku = -1;
913 is->shiftState = S_NORMAL;
914 goto resynch;
915 }
916 s++;
917 break;
918 case T_94192:
919 /* Use *s, not code */
920 if (((*s >= 0x21) && (*s <= 0x7E)) ||
921 ((*s >= 0xA1) && (*s <= 0xFE))) {
922 unsigned ucode = PAIR(ku_code, *s);
923 outbufUTF8(is, fd,
924 charset->recode(ucode, charset));
925 is->buffered_ku = -1;
926 is->shiftState = S_NORMAL;
927 } else {
928 is->buffered_ku = -1;
929 is->shiftState = S_NORMAL;
930 goto resynch;
931 }
932 s++;
933 break;
934 default:
935 abort();
936 /* NOTREACHED */
937 }
938 }
939 break;
940 case P_ESC:
941 assert(is->buffered_ku == -1);
942 if (*s == CSI_7) {
943 buffer(is, *s++);
944 is->parserState = P_CSI;
945 } else if (IS_FINAL_ESC(*s)) {
946 buffer(is, *s++);
947 terminate(is, fd);
948 is->parserState = P_NORMAL;
949 } else {
950 buffer(is, *s++);
951 }
952 break;
953 case P_CSI:
954 if (IS_FINAL_CSI(*s)) {
955 buffer(is, *s++);
956 terminate(is, fd);
957 is->parserState = P_NORMAL;
958 } else {
959 buffer(is, *s++);
960 }
961 break;
962 default:
963 abort();
964 /* NOTREACHED */
965 }
966 }
967 outbuf_flush(is, fd);
968 }
969
970 static void
terminate(Iso2022Ptr is,int fd)971 terminate(Iso2022Ptr is, int fd)
972 {
973 if (is->outputFlags & OF_PASSTHRU) {
974 outbuf_buffered(is, fd);
975 return;
976 }
977
978 switch (is->buffered[0]) {
979 case SS2:
980 if (is->outputFlags & OF_SS)
981 is->shiftState = S_SS2;
982 discard_buffered(is);
983 return;
984 case SS3:
985 if (is->outputFlags & OF_SS)
986 is->shiftState = S_SS3;
987 discard_buffered(is);
988 return;
989 case LS0:
990 if (is->outputFlags & OF_LS)
991 is->glp = &G0(is);
992 discard_buffered(is);
993 return;
994 case LS1:
995 if (is->outputFlags & OF_LS)
996 is->glp = &G1(is);
997 discard_buffered(is);
998 return;
999 case ESC:
1000 assert(is->buffered_count >= 2);
1001 switch (is->buffered[1]) {
1002 case SS2_7:
1003 if (is->outputFlags & OF_SS)
1004 is->shiftState = S_SS2;
1005 discard_buffered(is);
1006 return;
1007 case SS3_7:
1008 if (is->outputFlags & OF_SS)
1009 is->shiftState = S_SS3;
1010 discard_buffered(is);
1011 return;
1012 case LS2_7:
1013 if (is->outputFlags & OF_SS)
1014 is->glp = &G2(is);
1015 discard_buffered(is);
1016 return;
1017 case LS3_7:
1018 if (is->outputFlags & OF_LS)
1019 is->glp = &G3(is);
1020 discard_buffered(is);
1021 return;
1022 case LS1R_7:
1023 if (is->outputFlags & OF_LS)
1024 is->grp = &G1(is);
1025 discard_buffered(is);
1026 return;
1027 case LS2R_7:
1028 if (is->outputFlags & OF_LS)
1029 is->grp = &G2(is);
1030 discard_buffered(is);
1031 return;
1032 case LS3R_7:
1033 if (is->outputFlags & OF_LS)
1034 is->grp = &G3(is);
1035 discard_buffered(is);
1036 return;
1037 default:
1038 terminateEsc(is, fd,
1039 is->buffered + 1,
1040 (unsigned) (is->buffered_count - 1));
1041 break;
1042 }
1043 return;
1044 default:
1045 outbuf_buffered(is, fd);
1046 }
1047 }
1048
1049 static void
terminateEsc(Iso2022Ptr is,int fd,unsigned char * s_start,unsigned count)1050 terminateEsc(Iso2022Ptr is, int fd, unsigned char *s_start, unsigned count)
1051 {
1052 const CharsetRec *charset;
1053
1054 /* ISO 2022 doesn't allow 2C, but Emacs/MULE uses it in 7-bit
1055 mode */
1056
1057 if ((s_start[0] == 0x28 || s_start[0] == 0x29 ||
1058 s_start[0] == 0x2A || s_start[0] == 0x2B ||
1059 s_start[0] == 0x2C || s_start[0] == 0x2D ||
1060 s_start[0] == 0x2E || s_start[0] == 0x2F) &&
1061 count >= 2) {
1062 if (is->outputFlags & OF_SELECT) {
1063 if (s_start[0] <= 0x2B)
1064 charset = getCharset(s_start[1], T_94);
1065 else
1066 charset = getCharset(s_start[1], T_96);
1067 switch (s_start[0]) {
1068 case 0x28:
1069 case 0x2C:
1070 G0(is) = charset;
1071 break;
1072 case 0x29:
1073 case 0x2D:
1074 G1(is) = charset;
1075 break;
1076 case 0x2A:
1077 case 0x2E:
1078 G2(is) = charset;
1079 break;
1080 case 0x2B:
1081 case 0x2F:
1082 G3(is) = charset;
1083 break;
1084 }
1085 }
1086 discard_buffered(is);
1087 } else if (s_start[0] == 0x24 && count == 2) {
1088 if (is->outputFlags & OF_SELECT) {
1089 charset = getCharset(s_start[1], T_9494);
1090 G0(is) = charset;
1091 }
1092 discard_buffered(is);
1093 } else if (s_start[0] == 0x24 && count >= 2 &&
1094 (s_start[1] == 0x28 || s_start[1] == 0x29 ||
1095 s_start[1] == 0x2A || s_start[1] == 0x2B ||
1096 s_start[1] == 0x2D || s_start[1] == 0x2E ||
1097 s_start[1] == 0x2F) &&
1098 count >= 3) {
1099 if (is->outputFlags & OF_SELECT) {
1100 if (s_start[1] <= 0x2B)
1101 charset = getCharset(s_start[2], T_9494);
1102 else
1103 charset = getCharset(s_start[2], T_9696);
1104 switch (s_start[1]) {
1105 case 0x28:
1106 G0(is) = charset;
1107 break;
1108 case 0x29:
1109 case 0x2D:
1110 G1(is) = charset;
1111 break;
1112 case 0x2A:
1113 case 0x2E:
1114 G2(is) = charset;
1115 break;
1116 case 0x2B:
1117 case 0x2F:
1118 G3(is) = charset;
1119 break;
1120 }
1121 }
1122 discard_buffered(is);
1123 } else
1124 outbuf_buffered(is, fd);
1125 }
1126
1127 #ifdef NO_LEAKS
1128 void
iso2022_leaks(void)1129 iso2022_leaks(void)
1130 {
1131 }
1132 #endif
1133