xref: /original-bsd/usr.bin/hexdump/parse.c (revision 9acaf688)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)parse.c	5.4 (Berkeley) 06/01/90";
10 #endif /* not lint */
11 
12 #include <sys/types.h>
13 #include <sys/file.h>
14 #include <stdio.h>
15 #include <ctype.h>
16 #include <string.h>
17 #include "hexdump.h"
18 
19 FU *endfu;					/* format at end-of-data */
20 
21 addfile(name)
22 	char *name;
23 {
24 	register char *p;
25 	FILE *fp;
26 	int ch;
27 	char buf[2048 + 1];
28 
29 	if (!(fp = fopen(name, "r"))) {
30 		(void)fprintf(stderr, "hexdump: can't read %s.\n", name);
31 		exit(1);
32 	}
33 	while (fgets(buf, sizeof(buf), fp)) {
34 		if (!(p = index(buf, '\n'))) {
35 			(void)fprintf(stderr, "hexdump: line too long.\n");
36 			while ((ch = getchar()) != '\n' && ch != EOF);
37 			continue;
38 		}
39 		*p = '\0';
40 		for (p = buf; *p && isspace(*p); ++p);
41 		if (!*p || *p == '#')
42 			continue;
43 		add(p);
44 	}
45 	(void)fclose(fp);
46 }
47 
48 add(fmt)
49 	char *fmt;
50 {
51 	register char *p;
52 	static FS **nextfs;
53 	FS *tfs;
54 	FU *tfu, **nextfu;
55 	char savech, *savep, *emalloc(), *strdup();
56 
57 	/* start new linked list of format units */
58 	/* NOSTRICT */
59 	tfs = (FS *)emalloc(sizeof(FS));
60 	if (!fshead)
61 		fshead = tfs;
62 	else
63 		*nextfs = tfs;
64 	nextfs = &tfs->nextfs;
65 	nextfu = &tfs->nextfu;
66 
67 	/* take the format string and break it up into format units */
68 	for (p = fmt;;) {
69 		/* skip leading white space */
70 		for (; isspace(*p); ++p);
71 		if (!*p)
72 			break;
73 
74 		/* allocate a new format unit and link it in */
75 		/* NOSTRICT */
76 		tfu = (FU *)emalloc(sizeof(FU));
77 		*nextfu = tfu;
78 		nextfu = &tfu->nextfu;
79 		tfu->reps = 1;
80 
81 		/* if leading digit, repetition count */
82 		if (isdigit(*p)) {
83 			for (savep = p; isdigit(*p); ++p);
84 			if (!isspace(*p) && *p != '/')
85 				badfmt(fmt);
86 			/* may overwrite either white space or slash */
87 			savech = *p;
88 			*p = '\0';
89 			tfu->reps = atoi(savep);
90 			tfu->flags = F_SETREP;
91 			*p = savech;
92 			/* skip trailing white space */
93 			for (++p; isspace(*p); ++p);
94 		}
95 
96 		/* skip slash and trailing white space */
97 		if (*p == '/')
98 			while (isspace(*++p));
99 
100 		/* byte count */
101 		if (isdigit(*p)) {
102 			for (savep = p; isdigit(*p); ++p);
103 			if (!isspace(*p))
104 				badfmt(fmt);
105 			savech = *p;
106 			*p = '\0';
107 			tfu->bcnt = atoi(savep);
108 			*p = savech;
109 			/* skip trailing white space */
110 			for (++p; isspace(*p); ++p);
111 		}
112 
113 		/* format */
114 		if (*p != '"')
115 			badfmt(fmt);
116 		for (savep = ++p; *p != '"'; ++p);
117 		if (*p != '"')
118 			badfmt(fmt);
119 		savech = *p;
120 		*p = '\0';
121 		if (!(tfu->fmt = strdup(savep)))
122 			nomem();
123 		escape(tfu->fmt);
124 		*p++ = savech;
125 	}
126 }
127 
128 static char *spec = ".#-+ 0123456789";
129 size(fs)
130 	FS *fs;
131 {
132 	register FU *fu;
133 	register int bcnt, cursize;
134 	register char *fmt;
135 	int prec;
136 
137 	/* figure out the data block size needed for each format unit */
138 	for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
139 		if (fu->bcnt) {
140 			cursize += fu->bcnt * fu->reps;
141 			continue;
142 		}
143 		for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
144 			if (*fmt != '%')
145 				continue;
146 			/*
147 			 * skip any special chars -- save precision in
148 			 * case it's a %s format.
149 			 */
150 			while (index(spec + 1, *++fmt));
151 			if (*fmt == '.' && isdigit(*++fmt)) {
152 				prec = atoi(fmt);
153 				while (isdigit(*++fmt));
154 			}
155 			switch(*fmt) {
156 			case 'c':
157 				bcnt += 1;
158 				break;
159 			case 'd': case 'i': case 'o': case 'u':
160 			case 'x': case 'X':
161 				bcnt += 4;
162 				break;
163 			case 'e': case 'E': case 'f': case 'g': case 'G':
164 				bcnt += 8;
165 				break;
166 			case 's':
167 				bcnt += prec;
168 				break;
169 			case '_':
170 				switch(*++fmt) {
171 				case 'c': case 'p': case 'u':
172 					bcnt += 1;
173 					break;
174 				}
175 			}
176 		}
177 		cursize += bcnt * fu->reps;
178 	}
179 	return(cursize);
180 }
181 
182 rewrite(fs)
183 	FS *fs;
184 {
185 	enum { NOTOKAY, USEBCNT, USEPREC } sokay;
186 	register PR *pr, **nextpr;
187 	register FU *fu;
188 	register char *p1, *p2;
189 	char savech, *fmtp;
190 	int nconv, prec;
191 
192 	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
193 		/*
194 		 * break each format unit into print units; each
195 		 * conversion character gets its own.
196 		 */
197 		for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
198 			/* NOSTRICT */
199 			pr = (PR *)emalloc(sizeof(PR));
200 			if (!fu->nextpr)
201 				fu->nextpr = pr;
202 			else
203 				*nextpr = pr;
204 
205 			/* skip preceding text and up to the next % sign */
206 			for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
207 
208 			/* only text in the string */
209 			if (!*p1) {
210 				pr->fmt = fmtp;
211 				pr->flags = F_TEXT;
212 				break;
213 			}
214 
215 			/*
216 			 * get precision for %s -- if have a byte count, don't
217 			 * need it.
218 			 */
219 			if (fu->bcnt) {
220 				sokay = USEBCNT;
221 				/* skip to conversion character */
222 				for (++p1; index(spec, *p1); ++p1);
223 			} else {
224 				/* skip any special chars, field width */
225 				while (index(spec + 1, *++p1));
226 				if (*p1 == '.' && isdigit(*++p1)) {
227 					sokay = USEPREC;
228 					prec = atoi(p1);
229 					while (isdigit(*++p1));
230 				}
231 				else
232 					sokay = NOTOKAY;
233 			}
234 
235 			p2 = p1 + 1;		/* set end pointer */
236 
237 			/*
238 			 * figure out the byte count for each conversion;
239 			 * rewrite the format as necessary, set up blank-
240 			 * padding for end of data.
241 			 */
242 			switch(*p1) {
243 			case 'c':
244 				pr->flags = F_CHAR;
245 				switch(fu->bcnt) {
246 				case 0: case 1:
247 					pr->bcnt = 1;
248 					break;
249 				default:
250 					p1[1] = '\0';
251 					badcnt(p1);
252 				}
253 				break;
254 			case 'd': case 'i':
255 				pr->flags = F_INT;
256 				goto sw1;
257 			case 'l':
258 				++p2;
259 				switch(p1[1]) {
260 				case 'd': case 'i':
261 					++p1;
262 					pr->flags = F_INT;
263 					goto sw1;
264 				case 'o': case 'u': case 'x': case 'X':
265 					++p1;
266 					pr->flags = F_UINT;
267 					goto sw1;
268 				default:
269 					p1[2] = '\0';
270 					badconv(p1);
271 				}
272 				/* NOTREACHED */
273 			case 'o': case 'u': case 'x': case 'X':
274 				pr->flags = F_UINT;
275 sw1:				switch(fu->bcnt) {
276 				case 0: case 4:
277 					pr->bcnt = 4;
278 					break;
279 				case 1:
280 					pr->bcnt = 1;
281 					break;
282 				case 2:
283 					pr->bcnt = 2;
284 					break;
285 				default:
286 					p1[1] = '\0';
287 					badcnt(p1);
288 				}
289 				break;
290 			case 'e': case 'E': case 'f': case 'g': case 'G':
291 				pr->flags = F_DBL;
292 				switch(fu->bcnt) {
293 				case 0: case 8:
294 					pr->bcnt = 8;
295 					break;
296 				case 4:
297 					pr->bcnt = 4;
298 					break;
299 				default:
300 					p1[1] = '\0';
301 					badcnt(p1);
302 				}
303 				break;
304 			case 's':
305 				pr->flags = F_STR;
306 				switch(sokay) {
307 				case NOTOKAY:
308 					badsfmt();
309 				case USEBCNT:
310 					pr->bcnt = fu->bcnt;
311 					break;
312 				case USEPREC:
313 					pr->bcnt = prec;
314 					break;
315 				}
316 				break;
317 			case '_':
318 				++p2;
319 				switch(p1[1]) {
320 				case 'A':
321 					endfu = fu;
322 					fu->flags |= F_IGNORE;
323 					/* FALLTHROUGH */
324 				case 'a':
325 					pr->flags = F_ADDRESS;
326 					++p2;
327 					switch(p1[2]) {
328 					case 'd': case 'o': case'x':
329 						*p1 = p1[2];
330 						break;
331 					default:
332 						p1[3] = '\0';
333 						badconv(p1);
334 					}
335 					break;
336 				case 'c':
337 					pr->flags = F_C;
338 					/* *p1 = 'c';	set in conv_c */
339 					goto sw2;
340 				case 'p':
341 					pr->flags = F_P;
342 					*p1 = 'c';
343 					goto sw2;
344 				case 'u':
345 					pr->flags = F_U;
346 					/* *p1 = 'c';	set in conv_u */
347 sw2:					switch(fu->bcnt) {
348 					case 0: case 1:
349 						pr->bcnt = 1;
350 						break;
351 					default:
352 						p1[2] = '\0';
353 						badcnt(p1);
354 					}
355 					break;
356 				default:
357 					p1[2] = '\0';
358 					badconv(p1);
359 				}
360 				break;
361 			default:
362 				p1[1] = '\0';
363 				badconv(p1);
364 			}
365 
366 			/*
367 			 * copy to PR format string, set conversion character
368 			 * pointer, update original.
369 			 */
370 			savech = *p2;
371 			p1[1] = '\0';
372 			if (!(pr->fmt = strdup(fmtp)))
373 				nomem();
374 			*p2 = savech;
375 			pr->cchar = pr->fmt + (p1 - fmtp);
376 			fmtp = p2;
377 
378 			/* only one conversion character if byte count */
379 			if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) {
380 				(void)fprintf(stderr,
381 				    "hexdump: byte count with multiple conversion characters.\n");
382 				exit(1);
383 			}
384 		}
385 		/*
386 		 * if format unit byte count not specified, figure it out
387 		 * so can adjust rep count later.
388 		 */
389 		if (!fu->bcnt)
390 			for (pr = fu->nextpr; pr; pr = pr->nextpr)
391 				fu->bcnt += pr->bcnt;
392 	}
393 	/*
394 	 * if the format string interprets any data at all, and it's
395 	 * not the same as the blocksize, and its last format unit
396 	 * interprets any data at all, and has no iteration count,
397 	 * repeat it as necessary.
398 	 *
399 	 * if, rep count is greater than 1, no trailing whitespace
400 	 * gets output from the last iteration of the format unit.
401 	 */
402 	for (fu = fs->nextfu;; fu = fu->nextfu) {
403 		if (!fu->nextfu && fs->bcnt < blocksize &&
404 		    !(fu->flags&F_SETREP) && fu->bcnt)
405 			fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
406 		if (fu->reps > 1) {
407 			for (pr = fu->nextpr;; pr = pr->nextpr)
408 				if (!pr->nextpr)
409 					break;
410 			for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
411 				p2 = isspace(*p1) ? p1 : NULL;
412 			if (p2)
413 				pr->nospace = p2;
414 		}
415 		if (!fu->nextfu)
416 			break;
417 	}
418 }
419 
420 
421 escape(p1)
422 	register char *p1;
423 {
424 	register char *p2;
425 
426 	/* alphabetic escape sequences have to be done in place */
427 	for (p2 = p1;; ++p1, ++p2) {
428 		if (!*p1) {
429 			*p2 = *p1;
430 			break;
431 		}
432 		if (*p1 == '\\')
433 			switch(*++p1) {
434 			case 'a':
435 			     /* *p2 = '\a'; */
436 				*p2 = '\007';
437 				break;
438 			case 'b':
439 				*p2 = '\b';
440 				break;
441 			case 'f':
442 				*p2 = '\f';
443 				break;
444 			case 'n':
445 				*p2 = '\n';
446 				break;
447 			case 'r':
448 				*p2 = '\r';
449 				break;
450 			case 't':
451 				*p2 = '\t';
452 				break;
453 			case 'v':
454 				*p2 = '\v';
455 				break;
456 			default:
457 				*p2 = *p1;
458 				break;
459 			}
460 	}
461 }
462 
463 badcnt(s)
464 	char *s;
465 {
466 	(void)fprintf(stderr,
467 	    "hexdump: bad byte count for conversion character %s.\n", s);
468 	exit(1);
469 }
470 
471 badsfmt()
472 {
473 	(void)fprintf(stderr,
474 	    "hexdump: %%s requires a precision or a byte count.\n");
475 	exit(1);
476 }
477 
478 badfmt(fmt)
479 	char *fmt;
480 {
481 	(void)fprintf(stderr, "hexdump: bad format {%s}\n", fmt);
482 	exit(1);
483 }
484 
485 badconv(ch)
486 	char *ch;
487 {
488 	(void)fprintf(stderr, "hexdump: bad conversion character %%%s.\n", ch);
489 	exit(1);
490 }
491