xref: /original-bsd/usr.bin/hexdump/parse.c (revision 088910ec)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)parse.c	5.7 (Berkeley) 07/14/92";
10 #endif /* not lint */
11 
12 #include <sys/types.h>
13 
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <ctype.h>
19 #include <string.h>
20 #include "hexdump.h"
21 
22 FU *endfu;					/* format at end-of-data */
23 
24 void
25 addfile(name)
26 	char *name;
27 {
28 	register char *p;
29 	FILE *fp;
30 	int ch;
31 	char buf[2048 + 1];
32 
33 	if ((fp = fopen(name, "r")) == NULL)
34 		err("%s: %s\n", name, strerror(errno));
35 	while (fgets(buf, sizeof(buf), fp)) {
36 		if (!(p = index(buf, '\n'))) {
37 			(void)fprintf(stderr, "hexdump: line too long.\n");
38 			while ((ch = getchar()) != '\n' && ch != EOF);
39 			continue;
40 		}
41 		*p = '\0';
42 		for (p = buf; *p && isspace(*p); ++p);
43 		if (!*p || *p == '#')
44 			continue;
45 		add(p);
46 	}
47 	(void)fclose(fp);
48 }
49 
50 void
51 add(fmt)
52 	char *fmt;
53 {
54 	register char *p;
55 	static FS **nextfs;
56 	FS *tfs;
57 	FU *tfu, **nextfu;
58 	char *savep;
59 
60 	/* start new linked list of format units */
61 	tfs = emalloc(sizeof(FS));
62 	if (!fshead)
63 		fshead = tfs;
64 	else
65 		*nextfs = tfs;
66 	nextfs = &tfs->nextfs;
67 	nextfu = &tfs->nextfu;
68 
69 	/* take the format string and break it up into format units */
70 	for (p = fmt;;) {
71 		/* skip leading white space */
72 		for (; isspace(*p); ++p);
73 		if (!*p)
74 			break;
75 
76 		/* allocate a new format unit and link it in */
77 		tfu = emalloc(sizeof(FU));
78 		*nextfu = tfu;
79 		nextfu = &tfu->nextfu;
80 		tfu->reps = 1;
81 
82 		/* if leading digit, repetition count */
83 		if (isdigit(*p)) {
84 			for (savep = p; isdigit(*p); ++p);
85 			if (!isspace(*p) && *p != '/')
86 				badfmt(fmt);
87 			/* may overwrite either white space or slash */
88 			tfu->reps = atoi(savep);
89 			tfu->flags = F_SETREP;
90 			/* skip trailing white space */
91 			for (++p; isspace(*p); ++p);
92 		}
93 
94 		/* skip slash and trailing white space */
95 		if (*p == '/')
96 			while (isspace(*++p));
97 
98 		/* byte count */
99 		if (isdigit(*p)) {
100 			for (savep = p; isdigit(*p); ++p);
101 			if (!isspace(*p))
102 				badfmt(fmt);
103 			tfu->bcnt = atoi(savep);
104 			/* skip trailing white space */
105 			for (++p; isspace(*p); ++p);
106 		}
107 
108 		/* format */
109 		if (*p != '"')
110 			badfmt(fmt);
111 		for (savep = ++p; *p != '"';)
112 			if (*p++ == 0)
113 				badfmt(fmt);
114 		if (!(tfu->fmt = malloc(p - savep + 1)))
115 			nomem();
116 		(void) strncpy(tfu->fmt, savep, p - savep);
117 		tfu->fmt[p - savep] = '\0';
118 		escape(tfu->fmt);
119 		p++;
120 	}
121 }
122 
123 static char *spec = ".#-+ 0123456789";
124 
125 int
126 size(fs)
127 	FS *fs;
128 {
129 	register FU *fu;
130 	register int bcnt, cursize;
131 	register char *fmt;
132 	int prec;
133 
134 	/* figure out the data block size needed for each format unit */
135 	for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
136 		if (fu->bcnt) {
137 			cursize += fu->bcnt * fu->reps;
138 			continue;
139 		}
140 		for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
141 			if (*fmt != '%')
142 				continue;
143 			/*
144 			 * skip any special chars -- save precision in
145 			 * case it's a %s format.
146 			 */
147 			while (index(spec + 1, *++fmt));
148 			if (*fmt == '.' && isdigit(*++fmt)) {
149 				prec = atoi(fmt);
150 				while (isdigit(*++fmt));
151 			}
152 			switch(*fmt) {
153 			case 'c':
154 				bcnt += 1;
155 				break;
156 			case 'd': case 'i': case 'o': case 'u':
157 			case 'x': case 'X':
158 				bcnt += 4;
159 				break;
160 			case 'e': case 'E': case 'f': case 'g': case 'G':
161 				bcnt += 8;
162 				break;
163 			case 's':
164 				bcnt += prec;
165 				break;
166 			case '_':
167 				switch(*++fmt) {
168 				case 'c': case 'p': case 'u':
169 					bcnt += 1;
170 					break;
171 				}
172 			}
173 		}
174 		cursize += bcnt * fu->reps;
175 	}
176 	return (cursize);
177 }
178 
179 void
180 rewrite(fs)
181 	FS *fs;
182 {
183 	enum { NOTOKAY, USEBCNT, USEPREC } sokay;
184 	register PR *pr, **nextpr;
185 	register FU *fu;
186 	register char *p1, *p2;
187 	char savech, *fmtp, cs[3];
188 	int nconv, prec;
189 
190 	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
191 		/*
192 		 * Break each format unit into print units; each conversion
193 		 * character gets its own.
194 		 */
195 		for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
196 			pr = emalloc(sizeof(PR));
197 			if (!fu->nextpr)
198 				fu->nextpr = pr;
199 			else
200 				*nextpr = pr;
201 
202 			/* Skip preceding text and up to the next % sign. */
203 			for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
204 
205 			/* Only text in the string. */
206 			if (!*p1) {
207 				pr->fmt = fmtp;
208 				pr->flags = F_TEXT;
209 				break;
210 			}
211 
212 			/*
213 			 * Get precision for %s -- if have a byte count, don't
214 			 * need it.
215 			 */
216 			if (fu->bcnt) {
217 				sokay = USEBCNT;
218 				/* Skip to conversion character. */
219 				for (++p1; index(spec, *p1); ++p1);
220 			} else {
221 				/* Skip any special chars, field width. */
222 				while (index(spec + 1, *++p1));
223 				if (*p1 == '.' && isdigit(*++p1)) {
224 					sokay = USEPREC;
225 					prec = atoi(p1);
226 					while (isdigit(*++p1));
227 				} else
228 					sokay = NOTOKAY;
229 			}
230 
231 			p2 = p1 + 1;		/* Set end pointer. */
232 			cs[0] = *p1;		/* Set conversion string. */
233 			cs[1] = '\0';
234 
235 			/*
236 			 * Figure out the byte count for each conversion;
237 			 * rewrite the format as necessary, set up blank-
238 			 * padding for end of data.
239 			 */
240 			switch(cs[0]) {
241 			case 'c':
242 				pr->flags = F_CHAR;
243 				switch(fu->bcnt) {
244 				case 0: case 1:
245 					pr->bcnt = 1;
246 					break;
247 				default:
248 					p1[1] = '\0';
249 					badcnt(p1);
250 				}
251 				break;
252 			case 'd': case 'i':
253 				pr->flags = F_INT;
254 				goto isint;
255 			case 'o': case 'u': case 'x': case 'X':
256 				pr->flags = F_UINT;
257 isint:				cs[2] = '\0';
258 				cs[1] = cs[0];
259 				cs[0] = 'q';
260 				switch(fu->bcnt) {
261 				case 0: case 4:
262 					pr->bcnt = 4;
263 					break;
264 				case 1:
265 					pr->bcnt = 1;
266 					break;
267 				case 2:
268 					pr->bcnt = 2;
269 					break;
270 				default:
271 					p1[1] = '\0';
272 					badcnt(p1);
273 				}
274 				break;
275 			case 'e': case 'E': case 'f': case 'g': case 'G':
276 				pr->flags = F_DBL;
277 				switch(fu->bcnt) {
278 				case 0: case 8:
279 					pr->bcnt = 8;
280 					break;
281 				case 4:
282 					pr->bcnt = 4;
283 					break;
284 				default:
285 					p1[1] = '\0';
286 					badcnt(p1);
287 				}
288 				break;
289 			case 's':
290 				pr->flags = F_STR;
291 				switch(sokay) {
292 				case NOTOKAY:
293 					badsfmt();
294 				case USEBCNT:
295 					pr->bcnt = fu->bcnt;
296 					break;
297 				case USEPREC:
298 					pr->bcnt = prec;
299 					break;
300 				}
301 				break;
302 			case '_':
303 				++p2;
304 				switch(p1[1]) {
305 				case 'A':
306 					endfu = fu;
307 					fu->flags |= F_IGNORE;
308 					/* FALLTHROUGH */
309 				case 'a':
310 					pr->flags = F_ADDRESS;
311 					++p2;
312 					switch(p1[2]) {
313 					case 'd': case 'o': case'x':
314 						cs[0] = 'q';
315 						cs[1] = p1[2];
316 						cs[2] = '\0';
317 						break;
318 					default:
319 						p1[3] = '\0';
320 						badconv(p1);
321 					}
322 					break;
323 				case 'c':
324 					pr->flags = F_C;
325 					/* cs[0] = 'c';	set in conv_c */
326 					goto isint2;
327 				case 'p':
328 					pr->flags = F_P;
329 					cs[0] = 'c';
330 					goto isint2;
331 				case 'u':
332 					pr->flags = F_U;
333 					/* cs[0] = 'c';	set in conv_u */
334 isint2:					switch(fu->bcnt) {
335 					case 0: case 1:
336 						pr->bcnt = 1;
337 						break;
338 					default:
339 						p1[2] = '\0';
340 						badcnt(p1);
341 					}
342 					break;
343 				default:
344 					p1[2] = '\0';
345 					badconv(p1);
346 				}
347 				break;
348 			default:
349 				p1[1] = '\0';
350 				badconv(p1);
351 			}
352 
353 			/*
354 			 * Copy to PR format string, set conversion character
355 			 * pointer, update original.
356 			 */
357 			savech = *p2;
358 			p1[0] = '\0';
359 			pr->fmt = emalloc(strlen(fmtp) + 2);
360 			(void)strcpy(pr->fmt, fmtp);
361 			(void)strcat(pr->fmt, cs);
362 			*p2 = savech;
363 			pr->cchar = pr->fmt + (p1 - fmtp);
364 			fmtp = p2;
365 
366 			/* Only one conversion character if byte count. */
367 			if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
368 	    err("byte count with multiple conversion characters");
369 		}
370 		/*
371 		 * If format unit byte count not specified, figure it out
372 		 * so can adjust rep count later.
373 		 */
374 		if (!fu->bcnt)
375 			for (pr = fu->nextpr; pr; pr = pr->nextpr)
376 				fu->bcnt += pr->bcnt;
377 	}
378 	/*
379 	 * If the format string interprets any data at all, and it's
380 	 * not the same as the blocksize, and its last format unit
381 	 * interprets any data at all, and has no iteration count,
382 	 * repeat it as necessary.
383 	 *
384 	 * If, rep count is greater than 1, no trailing whitespace
385 	 * gets output from the last iteration of the format unit.
386 	 */
387 	for (fu = fs->nextfu;; fu = fu->nextfu) {
388 		if (!fu->nextfu && fs->bcnt < blocksize &&
389 		    !(fu->flags&F_SETREP) && fu->bcnt)
390 			fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
391 		if (fu->reps > 1) {
392 			for (pr = fu->nextpr;; pr = pr->nextpr)
393 				if (!pr->nextpr)
394 					break;
395 			for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
396 				p2 = isspace(*p1) ? p1 : NULL;
397 			if (p2)
398 				pr->nospace = p2;
399 		}
400 		if (!fu->nextfu)
401 			break;
402 	}
403 #ifdef DEBUG
404 	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
405 		(void)printf("fmt:");
406 		for (pr = fu->nextpr; pr; pr = pr->nextpr)
407 			(void)printf(" {%s}", pr->fmt);
408 		(void)printf("\n");
409 	}
410 #endif
411 }
412 
413 void
414 escape(p1)
415 	register char *p1;
416 {
417 	register char *p2;
418 
419 	/* alphabetic escape sequences have to be done in place */
420 	for (p2 = p1;; ++p1, ++p2) {
421 		if (!*p1) {
422 			*p2 = *p1;
423 			break;
424 		}
425 		if (*p1 == '\\')
426 			switch(*++p1) {
427 			case 'a':
428 			     /* *p2 = '\a'; */
429 				*p2 = '\007';
430 				break;
431 			case 'b':
432 				*p2 = '\b';
433 				break;
434 			case 'f':
435 				*p2 = '\f';
436 				break;
437 			case 'n':
438 				*p2 = '\n';
439 				break;
440 			case 'r':
441 				*p2 = '\r';
442 				break;
443 			case 't':
444 				*p2 = '\t';
445 				break;
446 			case 'v':
447 				*p2 = '\v';
448 				break;
449 			default:
450 				*p2 = *p1;
451 				break;
452 			}
453 	}
454 }
455 
456 void
457 badcnt(s)
458 	char *s;
459 {
460 	err("%s: bad byte count", s);
461 }
462 
463 void
464 badsfmt()
465 {
466 	err("%%s: requires a precision or a byte count\n");
467 }
468 
469 void
470 badfmt(fmt)
471 	char *fmt;
472 {
473 	err("\"%s\": bad format\n", fmt);
474 }
475 
476 void
477 badconv(ch)
478 	char *ch;
479 {
480 	err("%%%s: bad conversion character\n", ch);
481 }
482