1 /* $OpenBSD: parse.c,v 1.23 2018/04/26 12:42:51 guenther Exp $ */
2 /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
3
4 /*
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <ctype.h>
34 #include <err.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 #include "hexdump.h"
40
41 FU *endfu; /* format at end-of-data */
42
43 static __dead void badcnt(char *);
44 static __dead void badconv(char *);
45 static __dead void badfmt(const char *);
46 static __dead void badsfmt(void);
47 static void escape(char *);
48
49 void
addfile(char * name)50 addfile(char *name)
51 {
52 FILE *fp;
53 size_t len;
54 char *buf, *lbuf, *p;
55
56 if ((fp = fopen(name, "r")) == NULL)
57 err(1, "fopen %s", name);
58
59 lbuf = NULL;
60 while ((buf = fgetln(fp, &len))) {
61 if (buf[len - 1] == '\n')
62 buf[len - 1] = '\0';
63 else {
64 /* EOF without EOL, copy and add the NUL */
65 if ((lbuf = malloc(len + 1)) == NULL)
66 err(1, NULL);
67 memcpy(lbuf, buf, len);
68 lbuf[len] = '\0';
69 buf = lbuf;
70 }
71 for (p = buf; isspace((unsigned char)*p); ++p);
72 if (!*p || *p == '#')
73 continue;
74 add(p);
75 }
76 free(lbuf);
77 (void)fclose(fp);
78 }
79
80 void
add(const char * fmt)81 add(const char *fmt)
82 {
83 const char *p;
84 static FS **nextfs;
85 FS *tfs;
86 FU *tfu, **nextfu;
87 const char *savep;
88
89 /* start new linked list of format units */
90 if ((tfs = calloc(1, sizeof(FS))) == NULL)
91 err(1, NULL);
92 if (!fshead)
93 fshead = tfs;
94 else
95 *nextfs = tfs;
96 nextfs = &tfs->nextfs;
97 nextfu = &tfs->nextfu;
98
99 /* take the format string and break it up into format units */
100 for (p = fmt;;) {
101 /* skip leading white space */
102 for (; isspace((unsigned char)*p); ++p);
103 if (!*p)
104 break;
105
106 /* allocate a new format unit and link it in */
107 if ((tfu = calloc(1, sizeof(FU))) == NULL)
108 err(1, NULL);
109 *nextfu = tfu;
110 nextfu = &tfu->nextfu;
111 tfu->reps = 1;
112
113 /* if leading digit, repetition count */
114 if (isdigit((unsigned char)*p)) {
115 for (savep = p; isdigit((unsigned char)*p); ++p);
116 if (!isspace((unsigned char)*p) && *p != '/')
117 badfmt(fmt);
118 /* may overwrite either white space or slash */
119 tfu->reps = atoi(savep);
120 tfu->flags = F_SETREP;
121 /* skip trailing white space */
122 for (++p; isspace((unsigned char)*p); ++p);
123 }
124
125 /* skip slash and trailing white space */
126 if (*p == '/')
127 while (isspace((unsigned char)*++p));
128
129 /* byte count */
130 if (isdigit((unsigned char)*p)) {
131 for (savep = p; isdigit((unsigned char)*p); ++p);
132 if (!isspace((unsigned char)*p))
133 badfmt(fmt);
134 tfu->bcnt = atoi(savep);
135 /* skip trailing white space */
136 for (++p; isspace((unsigned char)*p); ++p);
137 }
138
139 /* format */
140 if (*p != '"')
141 badfmt(fmt);
142 for (savep = ++p; *p != '"';)
143 if (*p++ == 0)
144 badfmt(fmt);
145 if ((tfu->fmt = strndup(savep, p - savep)) == NULL)
146 err(1, NULL);
147 escape(tfu->fmt);
148 p++;
149 }
150 }
151
152 static const char *spec = ".#-+ 0123456789";
153
154 int
size(FS * fs)155 size(FS *fs)
156 {
157 FU *fu;
158 int bcnt, cursize;
159 char *fmt;
160 int prec;
161
162 /* figure out the data block size needed for each format unit */
163 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
164 if (fu->bcnt) {
165 cursize += fu->bcnt * fu->reps;
166 continue;
167 }
168 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
169 if (*fmt != '%')
170 continue;
171 /*
172 * skip any special chars -- save precision in
173 * case it's a %s format.
174 */
175 while (*++fmt && strchr(spec + 1, *fmt));
176 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
177 prec = atoi(fmt);
178 while (isdigit((unsigned char)*++fmt));
179 }
180 switch(*fmt) {
181 case 'c':
182 bcnt += 1;
183 break;
184 case 'd': case 'i': case 'o': case 'u':
185 case 'x': case 'X':
186 bcnt += 4;
187 break;
188 case 'e': case 'E': case 'f': case 'g': case 'G':
189 bcnt += 8;
190 break;
191 case 's':
192 bcnt += prec;
193 break;
194 case '_':
195 switch(*++fmt) {
196 case 'c': case 'p': case 'u':
197 bcnt += 1;
198 break;
199 }
200 }
201 }
202 cursize += bcnt * fu->reps;
203 }
204 return (cursize);
205 }
206
207 void
rewrite(FS * fs)208 rewrite(FS *fs)
209 {
210 enum { NOTOKAY, USEBCNT, USEPREC } sokay;
211 PR *pr, **nextpr;
212 FU *fu;
213 char *p1, *p2;
214 char savech, *fmtp, cs[4];
215 int nconv, prec;
216
217 nextpr = NULL;
218 prec = 0;
219 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
220 /*
221 * Break each format unit into print units; each conversion
222 * character gets its own.
223 */
224 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
225 if ((pr = calloc(1, sizeof(PR))) == NULL)
226 err(1, NULL);
227 if (!fu->nextpr)
228 fu->nextpr = pr;
229 else
230 *nextpr = pr;
231
232 /* Skip preceding text and up to the next % sign. */
233 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
234
235 /* Only text in the string. */
236 if (!*p1) {
237 pr->fmt = fmtp;
238 pr->flags = F_TEXT;
239 break;
240 }
241
242 /*
243 * Get precision for %s -- if have a byte count, don't
244 * need it.
245 */
246 if (fu->bcnt) {
247 sokay = USEBCNT;
248 /* Skip to conversion character. */
249 for (++p1; *p1 && strchr(spec, *p1); ++p1);
250 } else {
251 /* Skip any special chars, field width. */
252 while (*++p1 && strchr(spec + 1, *p1));
253 if (*p1 == '.' &&
254 isdigit((unsigned char)*++p1)) {
255 sokay = USEPREC;
256 prec = atoi(p1);
257 while (isdigit((unsigned char)*++p1))
258 continue;
259 } else
260 sokay = NOTOKAY;
261 }
262
263 p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
264 cs[0] = *p1; /* Set conversion string. */
265 cs[1] = '\0';
266
267 /*
268 * Figure out the byte count for each conversion;
269 * rewrite the format as necessary, set up blank-
270 * padding for end of data.
271 */
272 switch(cs[0]) {
273 case 'c':
274 pr->flags = F_CHAR;
275 switch(fu->bcnt) {
276 case 0: case 1:
277 pr->bcnt = 1;
278 break;
279 default:
280 p1[1] = '\0';
281 badcnt(p1);
282 }
283 break;
284 case 'd': case 'i':
285 case 'o': case 'u': case 'x': case 'X':
286 if (cs[0] == 'd' || cs[0] == 'i')
287 pr->flags = F_INT;
288 else
289 pr->flags = F_UINT;
290
291 cs[3] = '\0';
292 cs[2] = cs[0];
293 cs[1] = 'l';
294 cs[0] = 'l';
295 switch(fu->bcnt) {
296 case 0: case 4:
297 pr->bcnt = 4;
298 break;
299 case 1:
300 pr->bcnt = 1;
301 break;
302 case 2:
303 pr->bcnt = 2;
304 break;
305 case 8:
306 pr->bcnt = 8;
307 break;
308 default:
309 p1[1] = '\0';
310 badcnt(p1);
311 }
312 break;
313 case 'e': case 'E': case 'f': case 'g': case 'G':
314 pr->flags = F_DBL;
315 switch(fu->bcnt) {
316 case 0: case 8:
317 pr->bcnt = 8;
318 break;
319 case 4:
320 pr->bcnt = 4;
321 break;
322 default:
323 p1[1] = '\0';
324 badcnt(p1);
325 }
326 break;
327 case 's':
328 pr->flags = F_STR;
329 switch(sokay) {
330 case NOTOKAY:
331 badsfmt();
332 case USEBCNT:
333 pr->bcnt = fu->bcnt;
334 break;
335 case USEPREC:
336 pr->bcnt = prec;
337 break;
338 }
339 break;
340 case '_':
341 ++p2;
342 switch(p1[1]) {
343 case 'A':
344 endfu = fu;
345 fu->flags |= F_IGNORE;
346 /* FALLTHROUGH */
347 case 'a':
348 pr->flags = F_ADDRESS;
349 ++p2;
350 switch(p1[2]) {
351 case 'd': case 'o': case'x':
352 cs[0] = 'l';
353 cs[1] = 'l';
354 cs[2] = p1[2];
355 cs[3] = '\0';
356 break;
357 default:
358 if (p1[2])
359 p1[3] = '\0';
360 badconv(p1);
361 }
362 break;
363 case 'c':
364 case 'p':
365 case 'u':
366 if (p1[1] == 'c') {
367 pr->flags = F_C;
368 /* cs[0] = 'c'; set in conv_c */
369 } else if (p1[1] == 'p') {
370 pr->flags = F_P;
371 cs[0] = 'c';
372 } else {
373 pr->flags = F_U;
374 /* cs[0] = 'c'; set in conv_u */
375 }
376
377 switch(fu->bcnt) {
378 case 0: case 1:
379 pr->bcnt = 1;
380 break;
381 default:
382 p1[2] = '\0';
383 badcnt(p1);
384 }
385 break;
386 default:
387 if (p1[1])
388 p1[2] = '\0';
389 badconv(p1);
390 }
391 break;
392 default:
393 if (cs[0])
394 p1[1] = '\0';
395 badconv(p1);
396 }
397
398 /*
399 * Copy to PR format string, set conversion character
400 * pointer, update original.
401 */
402 savech = *p2;
403 p1[0] = '\0';
404 if (asprintf(&pr->fmt, "%s%s", fmtp, cs) == -1)
405 err(1, NULL);
406 *p2 = savech;
407 pr->cchar = pr->fmt + (p1 - fmtp);
408 fmtp = p2;
409
410 /* Only one conversion character if byte count. */
411 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
412 errx(1,
413 "byte count with multiple conversion characters");
414 }
415 /*
416 * If format unit byte count not specified, figure it out
417 * so can adjust rep count later.
418 */
419 if (!fu->bcnt)
420 for (pr = fu->nextpr; pr; pr = pr->nextpr)
421 fu->bcnt += pr->bcnt;
422 }
423 /*
424 * If the format string interprets any data at all, and it's
425 * not the same as the blocksize, and its last format unit
426 * interprets any data at all, and has no iteration count,
427 * repeat it as necessary.
428 *
429 * If, rep count is greater than 1, no trailing whitespace
430 * gets output from the last iteration of the format unit.
431 */
432 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
433 if (!fu->nextfu && fs->bcnt < blocksize &&
434 !(fu->flags&F_SETREP) && fu->bcnt)
435 fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
436 if (fu->reps > 1) {
437 if (!fu->nextpr)
438 break;
439 for (pr = fu->nextpr;; pr = pr->nextpr)
440 if (!pr->nextpr)
441 break;
442 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
443 p2 = isspace((unsigned char)*p1) ? p1 : NULL;
444 if (p2)
445 pr->nospace = p2;
446 }
447 }
448 #ifdef DEBUG
449 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
450 (void)printf("fmt:");
451 for (pr = fu->nextpr; pr; pr = pr->nextpr)
452 (void)printf(" {%s}", pr->fmt);
453 (void)printf("\n");
454 }
455 #endif
456 }
457
458 static void
escape(char * p1)459 escape(char *p1)
460 {
461 char *p2;
462
463 /* alphabetic escape sequences have to be done in place */
464 for (p2 = p1;; ++p1, ++p2) {
465 if (!*p1) {
466 *p2 = *p1;
467 break;
468 }
469 if (*p1 == '\\') {
470 switch(*++p1) {
471 case '\0':
472 *p2++ = '\\';
473 *p2 = '\0';
474 return; /* incomplete escape sequence */
475 case 'a':
476 /* *p2 = '\a'; */
477 *p2 = '\007';
478 break;
479 case 'b':
480 *p2 = '\b';
481 break;
482 case 'f':
483 *p2 = '\f';
484 break;
485 case 'n':
486 *p2 = '\n';
487 break;
488 case 'r':
489 *p2 = '\r';
490 break;
491 case 't':
492 *p2 = '\t';
493 break;
494 case 'v':
495 *p2 = '\v';
496 break;
497 default:
498 *p2 = *p1;
499 break;
500 }
501 } else
502 *p2 = *p1;
503 }
504 }
505
506 static __dead void
badcnt(char * s)507 badcnt(char *s)
508 {
509 errx(1, "%s: bad byte count", s);
510 }
511
512 static __dead void
badsfmt(void)513 badsfmt(void)
514 {
515 errx(1, "%%s: requires a precision or a byte count");
516 }
517
518 static __dead void
badfmt(const char * fmt)519 badfmt(const char *fmt)
520 {
521 errx(1, "\"%s\": bad format", fmt);
522 }
523
524 static __dead void
badconv(char * ch)525 badconv(char *ch)
526 {
527 errx(1, "%%%s: bad conversion character", ch);
528 }
529