1 /*
2 * Copyright (c) 2013, 2014, 2019 Paul Mattes.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the names of Paul Mattes nor the names of his contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY PAUL MATTES "AS IS" AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL PAUL MATTES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * pr3287 custom translation table support (-xtable).
30 */
31
32 #include "globals.h"
33 #include <errno.h>
34
35 #include "xtablec.h"
36
37 /* Symbolically-named ASCII control characters. */
38 static struct {
39 const char *name;
40 int value;
41 } cc[] = {
42 { "bs", '\b' },
43 { "cr", '\r' },
44 { "bel", '\a' },
45 { "esc", 27 },
46 { "escape", 27 },
47 { "ff", '\f' },
48 { "ht", '\t' },
49 { "lf", 10 },
50 { "nl", 10 },
51 { "nul", 0 },
52 { "space", 32 },
53 { "tab", '\t' },
54 { "vt", '\v' },
55 { NULL, 0 }
56 };
57
58 /* Translation table. */
59 #define MAX_EX 64
60 static struct {
61 int len; /* -1 for no translation, 0 for empty translation */
62 unsigned char expansion[MAX_EX];
63 } xls[256];
64 static int xtable_initted = 0;
65
66 /*
67 * Expand 1-3 octal characters.
68 * (*s) points to the first.
69 * Point (*s) at the last.
70 */
71 static char
loct(char ** s)72 loct(char **s)
73 {
74 char *t = *s;
75 char r = *t - '0';
76
77 if (*(t + 1) >= '0' && *(t + 1) <= '7') {
78 r *= 8;
79 r += *++t - '0';
80 if (*(t + 1) >= 0 && *(t + 1) < '7') {
81 r *= 8;
82 r += *++t - '0';
83 }
84 }
85 *s = t;
86 return r;
87 }
88
89 /*
90 * Translate a hex digit to 0..16.
91 * Return -1 for an invalid digit.
92 */
93 static int
xdigit(char c)94 xdigit(char c)
95 {
96 if (c >= '0' && c <= '9') {
97 return c - '0';
98 } else if (c >= 'a' && c <= 'f') {
99 return 10 + (c - 'a');
100 } else if (c >= 'A' && c <= 'F') {
101 return 10 + (c - 'A');
102 } else {
103 return -1;
104 }
105 }
106
107 /*
108 * Expand 1-2 hex characters.
109 * (*s) points to the character before the first.
110 * Point (*s) at the last.
111 */
112 static int
lhex(char ** s)113 lhex(char **s)
114 {
115 char *t = *s;
116 char r = 0;
117 int d;
118
119 d = xdigit(*(t + 1));
120 if (d >= 0) {
121 r = d;
122 t++;
123 d = xdigit(*(t + 1));
124 if (d >= 0) {
125 r = (r * 16) + d;
126 t++;
127 }
128 } else {
129 return -1;
130 }
131 *s = t;
132 return r;
133 }
134
135 #define is_white(c) ((c) == ' ' || (c) == '\t' || (c) == 'r' || (c) == '\n')
136 #define is_delim(c) (is_white(c) || (c) == '\0')
137 #define is_comment(s) (*(s) == '!' || *(s) == '#' || !strncmp(s, "//", 2))
138
139 /* Initialize the translation table. */
140 int
xtable_init(const char * filename)141 xtable_init(const char *filename)
142 {
143 FILE *f;
144 char buf[1024];
145 int lno = 0;
146 int i;
147 int rc = 0;
148
149 /* Initialize the translation table. */
150 for (i = 0; i < 256; i++) {
151 xls[i].len = -1;
152 }
153
154 /* We're initted well enough for xtable_lookup() to be called. */
155 xtable_initted = 1;
156
157 /* Open the file. */
158 f = fopen(filename, "r");
159 if (f == NULL) {
160 errmsg("%s: %s", filename, strerror(errno));
161 return -1;
162 }
163
164 /* Read it. */
165 while (fgets(buf, sizeof(buf), f) != NULL) {
166 char *s;
167 unsigned long ebc, asc;
168 char *p;
169 char xl[64];
170 int sx;
171
172 lno++;
173 s = buf;
174
175 while (is_white(*s)) {
176 s++;
177 }
178 /* Skip empty lines. */
179 if (!*s) {
180 continue;
181 }
182 /* Skip comment lines. */
183 if (is_comment(s)) {
184 continue;
185 }
186
187 /*
188 * The format of a line is:
189 * ebcdic EBCDIC-code ascii [ASCII-code]...
190 * An EBCDIC code can be specified as:
191 * X'nn' Hexadecimal
192 * 0xnn Hexadecimal
193 * 0nn Octal
194 * nn Decimal
195 * An ASCII code can be specified as:
196 * 0xn Hexadecimal
197 * 0n Octal
198 * n Decimal
199 * ^X Control code
200 * CR NL LF FF NUL TAB SPACE ESC ESCAPE
201 * More control codes
202 * "text" Literal text
203 * Named and literal characters are not supported on the EBCDIC
204 * side because their definition depends on the host codepage.
205 * Literal characters are supported on the ASCII side, though
206 * their interpretation of single characters depends on the
207 * local character set.
208 */
209
210 /* Parse 'ebcdic'. */
211 if (strncasecmp(s, "ebcdic", strlen("ebcdic")) ||
212 !is_white(*(s + strlen("ebcdic")))) {
213 errmsg("%s:%d: missing 'ebcdic' keyword", filename, lno);
214 rc = -1;
215 goto done;
216 }
217
218 s += strlen("ebcdic");
219 while (is_white(*s)) {
220 s++;
221 }
222 /* Skip empty lines. */
223 if (!*s) {
224 continue;
225 }
226 /* Skip comment lines. */
227 if (is_comment(s)) {
228 continue;
229 }
230
231 /* Parse the EBCDIC code. */
232 if (!strncasecmp(s, "X'", 2)) {
233 ebc = strtoul(s + 2, &p, 16);
234 if (*p != '\'' || !is_delim(*(p + 1))) {
235 errmsg("%s:%d: EBCDIC code X'nn' syntax error", filename, lno);
236 rc = -1;
237 goto done;
238 }
239 p++;
240 } else {
241 ebc = strtoul(s, &p, 0);
242 if (!is_delim(*p)) {
243 errmsg("%s:%d: EBCDIC code number syntax error", filename, lno);
244 rc = -1;
245 goto done;
246 }
247 }
248 if (ebc < 64) {
249 errmsg("%s:%d: EBCDIC code < 64", filename, lno);
250 rc = -1;
251 goto done;
252 }
253 if (ebc > 255) {
254 errmsg("%s:%d: EBCDIC code > 255", filename, lno);
255 rc = -1;
256 goto done;
257 }
258 s = p;
259 while (is_white(*s)) {
260 s++;
261 }
262
263 /* Parse 'ascii'. */
264 if (strncasecmp(s, "ascii", strlen("ascii")) ||
265 !is_white(*(s + strlen("ascii")))) {
266 errmsg("%s:%d: missing 'ascii' keyword", filename, lno);
267 rc = -1;
268 goto done;
269 }
270
271 s += strlen("ascii");
272 /* Skip empty lines. */
273 if (!*s) {
274 continue;
275 }
276 /* Skip comment lines. */
277 if (is_comment(s)) {
278 continue;
279 }
280
281 /* Parse the ASCII codes. */
282 sx = 0;
283 while (*s) {
284 while (is_white(*s)) {
285 s++;
286 }
287 if (!*s || is_comment(s)) {
288 break;
289 }
290 if (*s >= '0' && *s <= '9') {
291 /* Looks like a number. */
292 asc = strtoul(s, &p, 0);
293 if (!is_delim(*p)) {
294 errmsg("%s:%d:%zd: number syntax error", filename, lno,
295 s - buf + 1);
296 rc = -1;
297 goto done;
298 }
299 s = p;
300 } else if (*s == '^') {
301 /* Looks like a control character. */
302 if (*(s + 1) >= '@' &&
303 *(s + 1) <= '_' &&
304 is_delim(*(s + 2))) {
305 asc = *(s + 1) - '@';
306 } else {
307 errmsg("%s:%d:%zd: control character syntax error",
308 filename, lno, s - buf + 1);
309 rc = -1;
310 goto done;
311 }
312 s += 2;
313 } else if (*s == '"') {
314 char *t;
315
316 /* Quoted text. */
317 t = ++s;
318 for (;;) {
319 t = strchr(t, '"');
320 if (t != s && *(t - 1) == '\\') {
321 t++;
322 continue;
323 }
324 if (t == NULL || !is_delim(*(t + 1))) {
325 errmsg("%s:%d:%zd: quoted text syntax error ",
326 filename, lno, s - buf + 1);
327 rc = -1;
328 goto done;
329 }
330 break;
331 }
332 while (s < t) {
333 int c = *s++;
334
335 if (c == '\\') {
336 switch (*s) {
337 case '0':
338 c = loct(&s);
339 break;
340 case 'a':
341 c = '\a';
342 break;
343 case 'b':
344 c = '\b';
345 break;
346 case 'f':
347 c = '\f';
348 break;
349 case 'n':
350 c = '\n';
351 break;
352 case 'r':
353 c = '\r';
354 break;
355 case 't':
356 c = '\t';
357 break;
358 case 'v':
359 c = '\v';
360 break;
361 case 'x':
362 c = lhex(&s);
363 if (c < 0) {
364 errmsg("%s:%d:%zd: \\x syntax error ",
365 filename, lno, s - buf + 1);
366 rc = -1;
367 goto done;
368 }
369 break;
370 default:
371 c = *s;
372 break;
373 }
374 s++;
375 }
376 if ((size_t)sx > sizeof(xl)) {
377 errmsg("%s:%d: too many (%d) ASCII characters",
378 filename, lno, sx);
379 rc = -1;
380 goto done;
381 }
382 xl[sx++] = c;
383 }
384 /* Skip the trailing double quote. */
385 s++;
386
387 /*
388 * Don't fall through to the logic that adds
389 * one character to the translation.
390 */
391 continue;
392 } else {
393 int j;
394
395 /* Might be a symbolic character. */
396 for (j = 0; cc[j].name != NULL; j++) {
397 size_t sl = strlen(cc[j].name);
398
399 if (!strncasecmp(cc[j].name, s, sl) &&
400 is_delim(s[sl])) {
401 asc = cc[j].value;
402 s += sl;
403 break;
404 }
405 }
406 if (cc[j].name == NULL) {
407 errmsg("%s:%d:%zd: unknown token", filename, lno,
408 s - buf + 1);
409 rc = -1;
410 goto done;
411 }
412 }
413 if (asc > 255) {
414 errmsg("%s:%d: ASCII code > 255", filename, lno);
415 rc = -1;
416 goto done;
417 }
418 if ((size_t)sx > sizeof(xl)) {
419 errmsg("%s:%d: too many (%d) ASCII characters", filename,
420 lno, sx);
421 rc = -1;
422 goto done;
423 }
424 xl[sx++] = (char)asc;
425 }
426
427 /* Save the translation. */
428 xls[ebc].len = sx;
429 memcpy(xls[ebc].expansion, xl, sx);
430 }
431
432 #if defined(DUMP_TABLE) /*[*/
433 {
434 int ebc;
435
436 for (ebc = 0; ebc < 256; ebc++) {
437 if (xls[ebc].len >= 0) {
438 int k;
439
440 printf("X'%02X' ->", ebc);
441 for (k = 0; k < xls[ebc].len; k++) {
442 printf(" 0x%02x", (unsigned char)xls[ebc].expansion[k]);
443 }
444 printf("\n");
445 }
446 }
447 fflush(stdout); /* for Windows */
448 }
449 #endif /*]*/
450
451 done:
452 fclose(f);
453 return rc;
454 }
455
456 /*
457 * Translate an EBCDIC code to ASCII, using the custom table.
458 * Returns:
459 * -1 no translation defined (use default table)
460 * 0 expand to nothing
461 * n expand to <n> returned characters
462 */
463 int
xtable_lookup(unsigned char ebc,unsigned char ** r)464 xtable_lookup(unsigned char ebc, unsigned char **r)
465 {
466 if (!xtable_initted || ebc < 0x40) {
467 *r = NULL;
468 return -1;
469 }
470
471 if (xls[ebc].len > 0) {
472 *r = xls[ebc].expansion;
473 } else if (xls[ebc].len == 0) {
474 *r = (unsigned char *)"";
475 } else {
476 *r = NULL;
477 }
478 return xls[ebc].len;
479 }
480