1 /*
2  * Copyright (c) 2013, 2014, 2019  Paul Mattes.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the names of Paul Mattes nor the names of his contributors
13  *       may be used to endorse or promote products derived from this software
14  *       without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY PAUL MATTES "AS IS" AND ANY EXPRESS OR IMPLIED
17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19  * EVENT SHALL PAUL MATTES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * pr3287 custom translation table support (-xtable).
30  */
31 
32 #include "globals.h"
33 #include <errno.h>
34 
35 #include "xtablec.h"
36 
37 /* Symbolically-named ASCII control characters. */
38 static struct {
39     const char *name;
40     int value;
41 } cc[] = {
42     { "bs", '\b' },
43     { "cr", '\r' },
44     { "bel", '\a' },
45     { "esc", 27 },
46     { "escape", 27 },
47     { "ff", '\f' },
48     { "ht", '\t' },
49     { "lf", 10 },
50     { "nl", 10 },
51     { "nul", 0 },
52     { "space", 32 },
53     { "tab", '\t' },
54     { "vt", '\v' },
55     { NULL, 0 }
56 };
57 
58 /* Translation table. */
59 #define MAX_EX	64
60 static struct {
61     int len;	/* -1 for no translation, 0 for empty translation */
62     unsigned char expansion[MAX_EX];
63 } xls[256];
64 static int xtable_initted = 0;
65 
66 /*
67  * Expand 1-3 octal characters.
68  * (*s) points to the first.
69  * Point (*s) at the last.
70  */
71 static char
loct(char ** s)72 loct(char **s)
73 {
74     char *t = *s;
75     char r = *t - '0';
76 
77     if (*(t + 1) >= '0' && *(t + 1) <= '7') {
78 	r *= 8;
79 	r += *++t - '0';
80 	if (*(t + 1) >= 0 && *(t + 1) < '7') {
81 	    r *= 8;
82 	    r += *++t - '0';
83 	}
84     }
85     *s = t;
86     return r;
87 }
88 
89 /*
90  * Translate a hex digit to 0..16.
91  * Return -1 for an invalid digit.
92  */
93 static int
xdigit(char c)94 xdigit(char c)
95 {
96     if (c >= '0' && c <= '9') {
97 	return c - '0';
98     } else if (c >= 'a' && c <= 'f') {
99 	return 10 + (c - 'a');
100     } else if (c >= 'A' && c <= 'F') {
101 	return 10 + (c - 'A');
102     } else {
103 	return -1;
104     }
105 }
106 
107 /*
108  * Expand 1-2 hex characters.
109  * (*s) points to the character before the first.
110  * Point (*s) at the last.
111  */
112 static int
lhex(char ** s)113 lhex(char **s)
114 {
115     char *t = *s;
116     char r = 0;
117     int d;
118 
119     d = xdigit(*(t + 1));
120     if (d >= 0) {
121 	r = d;
122 	t++;
123 	d = xdigit(*(t + 1));
124 	if (d >= 0) {
125 	    r = (r * 16) + d;
126 	    t++;
127 	}
128     } else {
129 	return -1;
130     }
131     *s = t;
132     return r;
133 }
134 
135 #define is_white(c)	((c) == ' ' || (c) == '\t' || (c) == 'r' || (c) == '\n')
136 #define is_delim(c)	(is_white(c) || (c) == '\0')
137 #define is_comment(s)	(*(s) == '!' || *(s) == '#' || !strncmp(s, "//", 2))
138 
139 /* Initialize the translation table. */
140 int
xtable_init(const char * filename)141 xtable_init(const char *filename)
142 {
143     FILE *f;
144     char buf[1024];
145     int lno = 0;
146     int i;
147     int rc = 0;
148 
149     /* Initialize the translation table. */
150     for (i = 0; i < 256; i++) {
151 	xls[i].len = -1;
152     }
153 
154     /* We're initted well enough for xtable_lookup() to be called. */
155     xtable_initted = 1;
156 
157     /* Open the file. */
158     f = fopen(filename, "r");
159     if (f == NULL) {
160 	errmsg("%s: %s", filename, strerror(errno));
161 	return -1;
162     }
163 
164     /* Read it. */
165     while (fgets(buf, sizeof(buf), f) != NULL) {
166 	char *s;
167 	unsigned long ebc, asc;
168 	char *p;
169 	char xl[64];
170 	int sx;
171 
172 	lno++;
173 	s = buf;
174 
175 	while (is_white(*s)) {
176 	    s++;
177 	}
178 	/* Skip empty lines. */
179 	if (!*s) {
180 	    continue;
181 	}
182 	/* Skip comment lines. */
183 	if (is_comment(s)) {
184 	    continue;
185 	}
186 
187 	/*
188 	 * The format of a line is:
189 	 *  ebcdic EBCDIC-code ascii [ASCII-code]...
190 	 * An EBCDIC code can be specified as:
191 	 *  X'nn'    Hexadecimal
192 	 *  0xnn     Hexadecimal
193 	 *  0nn      Octal
194 	 *  nn       Decimal
195 	 * An ASCII code can be specified as:
196 	 *  0xn      Hexadecimal
197 	 *  0n       Octal
198 	 *  n        Decimal
199 	 *  ^X       Control code
200 	 *  CR NL LF FF NUL TAB SPACE ESC ESCAPE
201 	 *           More control codes
202 	 *  "text"   Literal text
203 	 * Named and literal characters are not supported on the EBCDIC
204 	 *  side because their definition depends on the host codepage.
205 	 * Literal characters are supported on the ASCII side, though
206 	 *  their interpretation of single characters depends on the
207 	 *  local character set.
208 	 */
209 
210 	/* Parse 'ebcdic'. */
211 	if (strncasecmp(s, "ebcdic", strlen("ebcdic")) ||
212 	    !is_white(*(s + strlen("ebcdic")))) {
213 	    errmsg("%s:%d: missing 'ebcdic' keyword", filename, lno);
214 	    rc = -1;
215 	    goto done;
216 	}
217 
218 	s += strlen("ebcdic");
219 	while (is_white(*s)) {
220 		s++;
221 	}
222 	/* Skip empty lines. */
223 	if (!*s) {
224 	    continue;
225 	}
226 	/* Skip comment lines. */
227 	if (is_comment(s)) {
228 	    continue;
229 	}
230 
231 	/* Parse the EBCDIC code. */
232 	if (!strncasecmp(s, "X'", 2)) {
233 	    ebc = strtoul(s + 2, &p, 16);
234 	    if (*p != '\'' || !is_delim(*(p + 1))) {
235 		errmsg("%s:%d: EBCDIC code X'nn' syntax error", filename, lno);
236 		rc = -1;
237 		goto done;
238 	    }
239 	    p++;
240 	} else {
241 	    ebc = strtoul(s, &p, 0);
242 	    if (!is_delim(*p)) {
243 		errmsg("%s:%d: EBCDIC code number syntax error", filename, lno);
244 		rc = -1;
245 		goto done;
246 	    }
247 	}
248 	if (ebc < 64) {
249 	    errmsg("%s:%d: EBCDIC code < 64", filename, lno);
250 	    rc = -1;
251 	    goto done;
252 	}
253 	if (ebc > 255) {
254 	    errmsg("%s:%d: EBCDIC code > 255", filename, lno);
255 	    rc = -1;
256 	    goto done;
257 	}
258 	s = p;
259 	while (is_white(*s)) {
260 	    s++;
261 	}
262 
263 	/* Parse 'ascii'. */
264 	if (strncasecmp(s, "ascii", strlen("ascii")) ||
265 	    !is_white(*(s + strlen("ascii")))) {
266 	    errmsg("%s:%d: missing 'ascii' keyword", filename, lno);
267 	    rc = -1;
268 	    goto done;
269 	}
270 
271 	s += strlen("ascii");
272 	/* Skip empty lines. */
273 	if (!*s) {
274 	    continue;
275 	}
276 	/* Skip comment lines. */
277 	if (is_comment(s)) {
278 	    continue;
279 	}
280 
281 	/* Parse the ASCII codes. */
282 	sx = 0;
283 	while (*s) {
284 	    while (is_white(*s)) {
285 		s++;
286 	    }
287 	    if (!*s || is_comment(s)) {
288 		break;
289 	    }
290 	    if (*s >= '0' && *s <= '9') {
291 		/* Looks like a number. */
292 		asc = strtoul(s, &p, 0);
293 		if (!is_delim(*p)) {
294 		    errmsg("%s:%d:%zd: number syntax error", filename, lno,
295 			    s - buf + 1);
296 		    rc = -1;
297 		    goto done;
298 		}
299 		s = p;
300 	    } else if (*s == '^') {
301 		/* Looks like a control character. */
302 		if (*(s + 1) >= '@' &&
303 		    *(s + 1) <= '_' &&
304 		    is_delim(*(s + 2))) {
305 		    asc = *(s + 1) - '@';
306 		} else {
307 		    errmsg("%s:%d:%zd: control character syntax error",
308 			    filename, lno, s - buf + 1);
309 		    rc = -1;
310 		    goto done;
311 		}
312 		s += 2;
313 	    } else if (*s == '"') {
314 		char *t;
315 
316 		/* Quoted text. */
317 		t = ++s;
318 		for (;;) {
319 		    t = strchr(t, '"');
320 		    if (t != s && *(t - 1) == '\\') {
321 			t++;
322 			continue;
323 		    }
324 		    if (t == NULL || !is_delim(*(t + 1))) {
325 			errmsg("%s:%d:%zd: quoted text syntax error ",
326 				filename, lno, s - buf + 1);
327 			rc = -1;
328 			goto done;
329 		    }
330 		    break;
331 		}
332 		while (s < t) {
333 		    int c = *s++;
334 
335 		    if (c == '\\') {
336 			switch (*s) {
337 			case '0':
338 			    c = loct(&s);
339 			    break;
340 			case 'a':
341 			    c = '\a';
342 			    break;
343 			case 'b':
344 			    c = '\b';
345 			    break;
346 			case 'f':
347 			    c = '\f';
348 			    break;
349 			case 'n':
350 			    c = '\n';
351 			    break;
352 			case 'r':
353 			    c = '\r';
354 			    break;
355 			case 't':
356 			    c = '\t';
357 			    break;
358 			case 'v':
359 			    c = '\v';
360 			    break;
361 			case 'x':
362 			    c = lhex(&s);
363 			    if (c < 0) {
364 				errmsg("%s:%d:%zd: \\x syntax error ",
365 					filename, lno, s - buf + 1);
366 				rc = -1;
367 				goto done;
368 			    }
369 			    break;
370 			default:
371 			    c = *s;
372 			    break;
373 			}
374 			s++;
375 		    }
376 		    if ((size_t)sx > sizeof(xl)) {
377 			errmsg("%s:%d: too many (%d) ASCII characters",
378 				filename, lno, sx);
379 			rc = -1;
380 			goto done;
381 		    }
382 		    xl[sx++] = c;
383 		}
384 		/* Skip the trailing double quote. */
385 		s++;
386 
387 		/*
388 		 * Don't fall through to the logic that adds
389 		 * one character to the translation.
390 		 */
391 		continue;
392 	    } else {
393 		int j;
394 
395 		/* Might be a symbolic character. */
396 		for (j = 0; cc[j].name != NULL; j++) {
397 		    size_t sl = strlen(cc[j].name);
398 
399 		    if (!strncasecmp(cc[j].name, s, sl) &&
400 			is_delim(s[sl])) {
401 			asc = cc[j].value;
402 			s += sl;
403 			break;
404 		    }
405 		}
406 		if (cc[j].name == NULL) {
407 		    errmsg("%s:%d:%zd: unknown token", filename, lno,
408 			    s - buf + 1);
409 		    rc = -1;
410 		    goto done;
411 		}
412 	    }
413 	    if (asc > 255) {
414 		errmsg("%s:%d: ASCII code > 255", filename, lno);
415 		rc = -1;
416 		goto done;
417 	    }
418 	    if ((size_t)sx > sizeof(xl)) {
419 		errmsg("%s:%d: too many (%d) ASCII characters", filename,
420 			lno, sx);
421 		rc = -1;
422 		goto done;
423 	    }
424 	    xl[sx++] = (char)asc;
425 	}
426 
427 	/* Save the translation. */
428 	xls[ebc].len = sx;
429 	memcpy(xls[ebc].expansion, xl, sx);
430     }
431 
432 #if defined(DUMP_TABLE) /*[*/
433     {
434 	int ebc;
435 
436 	for (ebc = 0; ebc < 256; ebc++) {
437 	    if (xls[ebc].len >= 0) {
438 		int k;
439 
440 		printf("X'%02X' ->", ebc);
441 		for (k = 0; k < xls[ebc].len; k++) {
442 		    printf(" 0x%02x", (unsigned char)xls[ebc].expansion[k]);
443 		}
444 		printf("\n");
445 	    }
446 	}
447 	fflush(stdout); /* for Windows */
448     }
449 #endif /*]*/
450 
451 done:
452     fclose(f);
453     return rc;
454 }
455 
456 /*
457  * Translate an EBCDIC code to ASCII, using the custom table.
458  * Returns:
459  *   -1 no translation defined (use default table)
460  *    0 expand to nothing
461  *    n expand to <n> returned characters
462  */
463 int
xtable_lookup(unsigned char ebc,unsigned char ** r)464 xtable_lookup(unsigned char ebc, unsigned char **r)
465 {
466     if (!xtable_initted || ebc < 0x40) {
467 	*r = NULL;
468 	return -1;
469     }
470 
471     if (xls[ebc].len > 0) {
472 	*r = xls[ebc].expansion;
473     } else if (xls[ebc].len == 0) {
474 	*r = (unsigned char *)"";
475     } else {
476 	*r = NULL;
477     }
478     return xls[ebc].len;
479 }
480