1 /*
2 **	find file types by using a modified "magic" file
3 **
4 **	based on file v3.22 by Ian F. Darwin (see below)
5 **
6 **	Modified for mkhybrid James Pearson 19/5/98
7 */
8 
9 /*
10  * apprentice - make one pass through /etc/magic, learning its secrets.
11  *
12  * Copyright (c) Ian F. Darwin, 1987.
13  * Written by Ian F. Darwin.
14  *
15  * This software is not subject to any license of the American Telephone
16  * and Telegraph Company or of the Regents of the University of California.
17  *
18  * Permission is granted to anyone to use this software for any purpose on
19  * any computer system, and to alter it and redistribute it freely, subject
20  * to the following restrictions:
21  *
22  * 1. The author is not responsible for the consequences of use of this
23  *    software, no matter how awful, even if they arise from flaws in it.
24  *
25  * 2. The origin of this software must not be misrepresented, either by
26  *    explicit claim or by omission.  Since few users ever read sources,
27  *    credits must appear in the documentation.
28  *
29  * 3. Altered versions must be plainly marked as such, and must not be
30  *    misrepresented as being the original software.  Since few users
31  *    ever read sources, credits must appear in the documentation.
32  *
33  * 4. This notice may not be removed or altered.
34  */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <errno.h>
41 #include "file.h"
42 
43 #ifndef	lint
44 static char *moduleid =
45 	"@(#)$Id: apprentice.c,v 1.1 2000/10/10 20:40:36 beck Exp $";
46 #endif	/* lint */
47 
48 #define	EATAB {while (isascii((unsigned char) *l) && \
49 		      isspace((unsigned char) *l))  ++l;}
50 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
51 			tolower((unsigned char) (l)) : (l))
52 
53 
54 static int getvalue	__P((struct magic *, char **));
55 static int hextoint	__P((int));
56 static char *getstr	__P((char *, char *, int, int *));
57 static int parse	__P((char *, int *, int));
58 static void eatsize	__P((char **));
59 
60 static int maxmagic = 0;
61 
62 static int apprentice_1	__P((char *, int));
63 
64 /*
65  * init_magic - read magic file and set up mapping
66  * based on the original apprentice()
67  */
68 int
69 init_magic(fn)
70 char *fn;			/* list of magic files */
71 {
72         maxmagic = MAXMAGIS;
73 	magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
74 	if (magic == NULL)
75 		return -1;
76 
77 	return(apprentice_1(fn, 0));
78 }
79 
80 static int
81 apprentice_1(fn, check)
82 char *fn;			/* name of magic file */
83 int check;			/* non-zero? checking-only run. */
84 {
85 	static const char hdr[] =
86 		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
87 	FILE *f;
88 	char line[BUFSIZ+1];
89 	int errs = 0;
90 
91 	f = fopen(fn, "r");
92 	if (f==NULL) {
93 		return -1;
94 	}
95 
96 	/* parse it */
97 	if (check)	/* print silly verbose header for USG compat. */
98 		(void) printf("%s\n", hdr);
99 
100 	for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
101 		if (line[0]=='#')	/* comment, do not parse */
102 			continue;
103 		if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
104 			continue;
105 		line[strlen(line)-1] = '\0'; /* delete newline */
106 		if (parse(line, &nmagic, check) != 0)
107 			errs = 1;
108 	}
109 
110 	(void) fclose(f);
111 	return errs;
112 }
113 
114 /*
115  * extend the sign bit if the comparison is to be signed
116  */
117 uint32
118 signextend(m, v)
119 struct magic *m;
120 uint32 v;
121 {
122 	if (!(m->flag & UNSIGNED))
123 		switch(m->type) {
124 		/*
125 		 * Do not remove the casts below.  They are
126 		 * vital.  When later compared with the data,
127 		 * the sign extension must have happened.
128 		 */
129 		case BYTE:
130 			v = (char) v;
131 			break;
132 		case SHORT:
133 		case BESHORT:
134 		case LESHORT:
135 			v = (short) v;
136 			break;
137 		case DATE:
138 		case BEDATE:
139 		case LEDATE:
140 		case LONG:
141 		case BELONG:
142 		case LELONG:
143 			v = (int32) v;
144 			break;
145 		case STRING:
146 			break;
147 		default:
148 			return -1;
149 		}
150 	return v;
151 }
152 
153 /*
154  * parse one line from magic file, put into magic[index++] if valid
155  */
156 static int
157 parse(l, ndx, check)
158 char *l;
159 int *ndx, check;
160 {
161 	int i = 0, nd = *ndx;
162 	struct magic *m;
163 	char *t, *s;
164 
165 #define ALLOC_INCR	20
166 	if (nd+1 >= maxmagic){
167 	    maxmagic += ALLOC_INCR;
168 	    if ((magic = (struct magic *) realloc(magic,
169 						  sizeof(struct magic) *
170 						  maxmagic)) == NULL) {
171 		(void) fprintf(stderr, "%s: Out of memory.\n", progname);
172 		if (check)
173 			return -1;
174 		else
175 			exit(1);
176 	    }
177 	    memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
178 	}
179 	m = &magic[*ndx];
180 	m->flag = 0;
181 	m->cont_level = 0;
182 
183 	while (*l == '>') {
184 		++l;		/* step over */
185 		m->cont_level++;
186 	}
187 
188 	if (m->cont_level != 0 && *l == '(') {
189 		++l;		/* step over */
190 		m->flag |= INDIR;
191 	}
192 	if (m->cont_level != 0 && *l == '&') {
193                 ++l;            /* step over */
194                 m->flag |= ADD;
195         }
196 
197 	/* get offset, then skip over it */
198 	m->offset = (int) strtoul(l,&t,0);
199 /*
200         if (l == t)
201 		magwarn("offset %s invalid", l);
202 */
203         l = t;
204 
205 	if (m->flag & INDIR) {
206 		m->in.type = LONG;
207 		m->in.offset = 0;
208 		/*
209 		 * read [.lbs][+-]nnnnn)
210 		 */
211 		if (*l == '.') {
212 			l++;
213 			switch (LOWCASE(*l)) {
214 			case 'l':
215 				m->in.type = LONG;
216 				break;
217 			case 'h':
218 			case 's':
219 				m->in.type = SHORT;
220 				break;
221 			case 'c':
222 			case 'b':
223 				m->in.type = BYTE;
224 				break;
225 			default:
226 				break;
227 			}
228 			l++;
229 		}
230 		s = l;
231 		if (*l == '+' || *l == '-') l++;
232 		if (isdigit((unsigned char)*l)) {
233 			m->in.offset = strtoul(l, &t, 0);
234 			if (*s == '-') m->in.offset = - m->in.offset;
235 		}
236 		else
237 			t = l;
238 /*
239 		if (*t++ != ')')
240 			magwarn("missing ')' in indirect offset");
241 */
242 		l = t;
243 	}
244 
245 
246 	while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
247 		++l;
248 	EATAB;
249 
250 #define NBYTE		4
251 #define NSHORT		5
252 #define NLONG		4
253 #define NSTRING 	6
254 #define NDATE		4
255 #define NBESHORT	7
256 #define NBELONG		6
257 #define NBEDATE		6
258 #define NLESHORT	7
259 #define NLELONG		6
260 #define NLEDATE		6
261 
262 	if (*l == 'u') {
263 		++l;
264 		m->flag |= UNSIGNED;
265 	}
266 
267 	/* get type, skip it */
268 	if (strncmp(l, "byte", NBYTE)==0) {
269 		m->type = BYTE;
270 		l += NBYTE;
271 	} else if (strncmp(l, "short", NSHORT)==0) {
272 		m->type = SHORT;
273 		l += NSHORT;
274 	} else if (strncmp(l, "long", NLONG)==0) {
275 		m->type = LONG;
276 		l += NLONG;
277 	} else if (strncmp(l, "string", NSTRING)==0) {
278 		m->type = STRING;
279 		l += NSTRING;
280 	} else if (strncmp(l, "date", NDATE)==0) {
281 		m->type = DATE;
282 		l += NDATE;
283 	} else if (strncmp(l, "beshort", NBESHORT)==0) {
284 		m->type = BESHORT;
285 		l += NBESHORT;
286 	} else if (strncmp(l, "belong", NBELONG)==0) {
287 		m->type = BELONG;
288 		l += NBELONG;
289 	} else if (strncmp(l, "bedate", NBEDATE)==0) {
290 		m->type = BEDATE;
291 		l += NBEDATE;
292 	} else if (strncmp(l, "leshort", NLESHORT)==0) {
293 		m->type = LESHORT;
294 		l += NLESHORT;
295 	} else if (strncmp(l, "lelong", NLELONG)==0) {
296 		m->type = LELONG;
297 		l += NLELONG;
298 	} else if (strncmp(l, "ledate", NLEDATE)==0) {
299 		m->type = LEDATE;
300 		l += NLEDATE;
301 	} else {
302 		return -1;
303 	}
304 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
305 	if (*l == '&') {
306 		++l;
307 		m->mask = signextend(m, strtoul(l, &l, 0));
308 		eatsize(&l);
309 	} else
310 		m->mask = ~0L;
311 	EATAB;
312 
313 	switch (*l) {
314 	case '>':
315 	case '<':
316 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
317 	case '&':
318 	case '^':
319 	case '=':
320   		m->reln = *l;
321   		++l;
322 		break;
323 	case '!':
324 		if (m->type != STRING) {
325 			m->reln = *l;
326 			++l;
327 			break;
328 		}
329 		/* FALL THROUGH */
330 	default:
331 		if (*l == 'x' && isascii((unsigned char)l[1]) &&
332 		    isspace((unsigned char)l[1])) {
333 			m->reln = *l;
334 			++l;
335 			goto GetDesc;	/* Bill The Cat */
336 		}
337   		m->reln = '=';
338 		break;
339 	}
340   	EATAB;
341 
342 	if (getvalue(m, &l))
343 		return -1;
344 	/*
345 	 * TODO finish this macro and start using it!
346 	 * #define offsetcheck {if (offset > HOWMANY-1)
347 	 *	magwarn("offset too big"); }
348 	 */
349 
350 	/*
351 	 * now get last part - the description
352 	 */
353 GetDesc:
354 	EATAB;
355 	if (l[0] == '\b') {
356 		++l;
357 		m->nospflag = 1;
358 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
359 		++l;
360 		++l;
361 		m->nospflag = 1;
362 	} else
363 		m->nospflag = 0;
364 	while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
365 		/* NULLBODY */;
366 
367 	++(*ndx);		/* make room for next */
368 	return 0;
369 }
370 
371 /*
372  * Read a numeric value from a pointer, into the value union of a magic
373  * pointer, according to the magic type.  Update the string pointer to point
374  * just after the number read.  Return 0 for success, non-zero for failure.
375  */
376 static int
377 getvalue(m, p)
378 struct magic *m;
379 char **p;
380 {
381 	int slen;
382 
383 	if (m->type == STRING) {
384 		*p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
385 		m->vallen = slen;
386 	} else
387 		if (m->reln != 'x') {
388 			m->value.l = signextend(m, strtoul(*p, p, 0));
389 			eatsize(p);
390 		}
391 	return 0;
392 }
393 
394 /*
395  * Convert a string containing C character escapes.  Stop at an unescaped
396  * space or tab.
397  * Copy the converted version to "p", returning its length in *slen.
398  * Return updated scan pointer as function result.
399  */
400 static char *
401 getstr(s, p, plen, slen)
402 register char	*s;
403 register char	*p;
404 int	plen, *slen;
405 {
406 	char	*origs = s, *origp = p;
407 	char	*pmax = p + plen - 1;
408 	register int	c;
409 	register int	val;
410 
411 	while ((c = *s++) != '\0') {
412 		if (isspace((unsigned char) c))
413 			break;
414 		if (p >= pmax) {
415 			fprintf(stderr, "String too long: %s\n", origs);
416 			break;
417 		}
418 		if(c == '\\') {
419 			switch(c = *s++) {
420 
421 			case '\0':
422 				goto out;
423 
424 			default:
425 				*p++ = (char) c;
426 				break;
427 
428 			case 'n':
429 				*p++ = '\n';
430 				break;
431 
432 			case 'r':
433 				*p++ = '\r';
434 				break;
435 
436 			case 'b':
437 				*p++ = '\b';
438 				break;
439 
440 			case 't':
441 				*p++ = '\t';
442 				break;
443 
444 			case 'f':
445 				*p++ = '\f';
446 				break;
447 
448 			case 'v':
449 				*p++ = '\v';
450 				break;
451 
452 			/* \ and up to 3 octal digits */
453 			case '0':
454 			case '1':
455 			case '2':
456 			case '3':
457 			case '4':
458 			case '5':
459 			case '6':
460 			case '7':
461 				val = c - '0';
462 				c = *s++;  /* try for 2 */
463 				if(c >= '0' && c <= '7') {
464 					val = (val<<3) | (c - '0');
465 					c = *s++;  /* try for 3 */
466 					if(c >= '0' && c <= '7')
467 						val = (val<<3) | (c-'0');
468 					else
469 						--s;
470 				}
471 				else
472 					--s;
473 				*p++ = (char)val;
474 				break;
475 
476 			/* \x and up to 2 hex digits */
477 			case 'x':
478 				val = 'x';	/* Default if no digits */
479 				c = hextoint(*s++);	/* Get next char */
480 				if (c >= 0) {
481 					val = c;
482 					c = hextoint(*s++);
483 					if (c >= 0)
484 						val = (val << 4) + c;
485 					else
486 						--s;
487 				} else
488 					--s;
489 				*p++ = (char)val;
490 				break;
491 			}
492 		} else
493 			*p++ = (char)c;
494 	}
495 out:
496 	*p = '\0';
497 	*slen = p - origp;
498 	return s;
499 }
500 
501 
502 /* Single hex char to int; -1 if not a hex char. */
503 static int
504 hextoint(c)
505 int c;
506 {
507 	if (!isascii((unsigned char) c))	return -1;
508 	if (isdigit((unsigned char) c))		return c - '0';
509 	if ((c>='a')&&(c<='f'))	return c + 10 - 'a';
510 	if ((c>='A')&&(c<='F'))	return c + 10 - 'A';
511 				return -1;
512 }
513 
514 
515 /*
516  * Print a string containing C character escapes.
517  */
518 void
519 showstr(fp, s, len)
520 FILE *fp;
521 const char *s;
522 int len;
523 {
524 	register char	c;
525 
526 	for (;;) {
527 		c = *s++;
528 		if (len == -1) {
529 			if (c == '\0')
530 				break;
531 		}
532 		else  {
533 			if (len-- == 0)
534 				break;
535 		}
536 		if(c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
537 			(void) fputc(c, fp);
538 		else {
539 			(void) fputc('\\', fp);
540 			switch (c) {
541 
542 			case '\n':
543 				(void) fputc('n', fp);
544 				break;
545 
546 			case '\r':
547 				(void) fputc('r', fp);
548 				break;
549 
550 			case '\b':
551 				(void) fputc('b', fp);
552 				break;
553 
554 			case '\t':
555 				(void) fputc('t', fp);
556 				break;
557 
558 			case '\f':
559 				(void) fputc('f', fp);
560 				break;
561 
562 			case '\v':
563 				(void) fputc('v', fp);
564 				break;
565 
566 			default:
567 				(void) fprintf(fp, "%.3o", c & 0377);
568 				break;
569 			}
570 		}
571 	}
572 }
573 
574 /*
575  * eatsize(): Eat the size spec from a number [eg. 10UL]
576  */
577 static void
578 eatsize(p)
579 char **p;
580 {
581 	char *l = *p;
582 
583 	if (LOWCASE(*l) == 'u')
584 		l++;
585 
586 	switch (LOWCASE(*l)) {
587 	case 'l':    /* long */
588 	case 's':    /* short */
589 	case 'h':    /* short */
590 	case 'b':    /* char/byte */
591 	case 'c':    /* char/byte */
592 		l++;
593 		/*FALLTHROUGH*/
594 	default:
595 		break;
596 	}
597 
598 	*p = l;
599 }
600