1 /*
2 **	find file types by using a modified "magic" file
3 **
4 **	based on file v3.22 by Ian F. Darwin (see below)
5 **
6 **	Modified for mkhybrid James Pearson 19/5/98
7 */
8 
9 /*
10  * apprentice - make one pass through /etc/magic, learning its secrets.
11  *
12  * Copyright (c) Ian F. Darwin, 1987.
13  * Written by Ian F. Darwin.
14  *
15  * This software is not subject to any license of the American Telephone
16  * and Telegraph Company or of the Regents of the University of California.
17  *
18  * Permission is granted to anyone to use this software for any purpose on
19  * any computer system, and to alter it and redistribute it freely, subject
20  * to the following restrictions:
21  *
22  * 1. The author is not responsible for the consequences of use of this
23  *    software, no matter how awful, even if they arise from flaws in it.
24  *
25  * 2. The origin of this software must not be misrepresented, either by
26  *    explicit claim or by omission.  Since few users ever read sources,
27  *    credits must appear in the documentation.
28  *
29  * 3. Altered versions must be plainly marked as such, and must not be
30  *    misrepresented as being the original software.  Since few users
31  *    ever read sources, credits must appear in the documentation.
32  *
33  * 4. This notice may not be removed or altered.
34  */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <errno.h>
41 #include "file.h"
42 
43 #define	EATAB {while (isascii((unsigned char) *l) && \
44 		      isspace((unsigned char) *l))  ++l;}
45 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
46 			tolower((unsigned char) (l)) : (l))
47 
48 
49 static int getvalue	__P((struct magic *, char **));
50 static int hextoint	__P((int));
51 static char *getstr	__P((char *, char *, int, int *));
52 static int parse	__P((char *, int *, int));
53 static void eatsize	__P((char **));
54 
55 static int maxmagic = 0;
56 
57 static int apprentice_1	__P((char *, int));
58 
59 /*
60  * init_magic - read magic file and set up mapping
61  * based on the original apprentice()
62  */
63 int
init_magic(fn)64 init_magic(fn)
65 char *fn;			/* list of magic files */
66 {
67         maxmagic = MAXMAGIS;
68 	magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
69 	if (magic == NULL)
70 		return -1;
71 
72 	return(apprentice_1(fn, 0));
73 }
74 
75 static int
apprentice_1(fn,check)76 apprentice_1(fn, check)
77 char *fn;			/* name of magic file */
78 int check;			/* non-zero? checking-only run. */
79 {
80 	static const char hdr[] =
81 		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
82 	FILE *f;
83 	char line[BUFSIZ+1];
84 	int errs = 0;
85 
86 	f = fopen(fn, "r");
87 	if (f==NULL) {
88 		return -1;
89 	}
90 
91 	/* parse it */
92 	if (check)	/* print silly verbose header for USG compat. */
93 		(void) printf("%s\n", hdr);
94 
95 	for (lineno = 1;fgets(line, sizeof(line), f) != NULL; lineno++) {
96 		if (line[0]=='#')	/* comment, do not parse */
97 			continue;
98 		/* delete newline */
99 		line[strcspn(line, "\n")] = '\0';
100 		if (line[0] == '\0')
101 			continue;
102 		if (parse(line, &nmagic, check) != 0)
103 			errs = 1;
104 	}
105 
106 	(void) fclose(f);
107 	return errs;
108 }
109 
110 /*
111  * extend the sign bit if the comparison is to be signed
112  */
113 uint32
signextend(m,v)114 signextend(m, v)
115 struct magic *m;
116 uint32 v;
117 {
118 	if (!(m->flag & UNSIGNED))
119 		switch(m->type) {
120 		/*
121 		 * Do not remove the casts below.  They are
122 		 * vital.  When later compared with the data,
123 		 * the sign extension must have happened.
124 		 */
125 		case BYTE:
126 			v = (char) v;
127 			break;
128 		case SHORT:
129 		case BESHORT:
130 		case LESHORT:
131 			v = (short) v;
132 			break;
133 		case DATE:
134 		case BEDATE:
135 		case LEDATE:
136 		case LONG:
137 		case BELONG:
138 		case LELONG:
139 			v = (int32) v;
140 			break;
141 		case STRING:
142 			break;
143 		default:
144 			return -1;
145 		}
146 	return v;
147 }
148 
149 /*
150  * parse one line from magic file, put into magic[index++] if valid
151  */
152 static int
parse(l,ndx,check)153 parse(l, ndx, check)
154 char *l;
155 int *ndx, check;
156 {
157 	int i = 0, nd = *ndx;
158 	struct magic *m;
159 	char *t, *s;
160 
161 #define ALLOC_INCR	20
162 	if (nd+1 >= maxmagic){
163 	    maxmagic += ALLOC_INCR;
164 	    if ((magic = (struct magic *) realloc(magic,
165 						  sizeof(struct magic) *
166 						  maxmagic)) == NULL) {
167 		(void) fprintf(stderr, "%s: Out of memory.\n", progname);
168 		if (check)
169 			return -1;
170 		else
171 			exit(1);
172 	    }
173 	    memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
174 	}
175 	m = &magic[*ndx];
176 	m->flag = 0;
177 	m->cont_level = 0;
178 
179 	while (*l == '>') {
180 		++l;		/* step over */
181 		m->cont_level++;
182 	}
183 
184 	if (m->cont_level != 0 && *l == '(') {
185 		++l;		/* step over */
186 		m->flag |= INDIR;
187 	}
188 	if (m->cont_level != 0 && *l == '&') {
189                 ++l;            /* step over */
190                 m->flag |= ADD;
191         }
192 
193 	/* get offset, then skip over it */
194 	m->offset = (int) strtoul(l,&t,0);
195 /*
196         if (l == t)
197 		magwarn("offset %s invalid", l);
198 */
199         l = t;
200 
201 	if (m->flag & INDIR) {
202 		m->in.type = LONG;
203 		m->in.offset = 0;
204 		/*
205 		 * read [.lbs][+-]nnnnn)
206 		 */
207 		if (*l == '.') {
208 			l++;
209 			switch (LOWCASE(*l)) {
210 			case 'l':
211 				m->in.type = LONG;
212 				break;
213 			case 'h':
214 			case 's':
215 				m->in.type = SHORT;
216 				break;
217 			case 'c':
218 			case 'b':
219 				m->in.type = BYTE;
220 				break;
221 			default:
222 				break;
223 			}
224 			l++;
225 		}
226 		s = l;
227 		if (*l == '+' || *l == '-') l++;
228 		if (isdigit((unsigned char)*l)) {
229 			m->in.offset = strtoul(l, &t, 0);
230 			if (*s == '-') m->in.offset = - m->in.offset;
231 		}
232 		else
233 			t = l;
234 /*
235 		if (*t++ != ')')
236 			magwarn("missing ')' in indirect offset");
237 */
238 		l = t;
239 	}
240 
241 
242 	while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
243 		++l;
244 	EATAB;
245 
246 #define NBYTE		4
247 #define NSHORT		5
248 #define NLONG		4
249 #define NSTRING 	6
250 #define NDATE		4
251 #define NBESHORT	7
252 #define NBELONG		6
253 #define NBEDATE		6
254 #define NLESHORT	7
255 #define NLELONG		6
256 #define NLEDATE		6
257 
258 	if (*l == 'u') {
259 		++l;
260 		m->flag |= UNSIGNED;
261 	}
262 
263 	/* get type, skip it */
264 	if (strncmp(l, "byte", NBYTE)==0) {
265 		m->type = BYTE;
266 		l += NBYTE;
267 	} else if (strncmp(l, "short", NSHORT)==0) {
268 		m->type = SHORT;
269 		l += NSHORT;
270 	} else if (strncmp(l, "long", NLONG)==0) {
271 		m->type = LONG;
272 		l += NLONG;
273 	} else if (strncmp(l, "string", NSTRING)==0) {
274 		m->type = STRING;
275 		l += NSTRING;
276 	} else if (strncmp(l, "date", NDATE)==0) {
277 		m->type = DATE;
278 		l += NDATE;
279 	} else if (strncmp(l, "beshort", NBESHORT)==0) {
280 		m->type = BESHORT;
281 		l += NBESHORT;
282 	} else if (strncmp(l, "belong", NBELONG)==0) {
283 		m->type = BELONG;
284 		l += NBELONG;
285 	} else if (strncmp(l, "bedate", NBEDATE)==0) {
286 		m->type = BEDATE;
287 		l += NBEDATE;
288 	} else if (strncmp(l, "leshort", NLESHORT)==0) {
289 		m->type = LESHORT;
290 		l += NLESHORT;
291 	} else if (strncmp(l, "lelong", NLELONG)==0) {
292 		m->type = LELONG;
293 		l += NLELONG;
294 	} else if (strncmp(l, "ledate", NLEDATE)==0) {
295 		m->type = LEDATE;
296 		l += NLEDATE;
297 	} else {
298 		return -1;
299 	}
300 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
301 	if (*l == '&') {
302 		++l;
303 		m->mask = signextend(m, strtoul(l, &l, 0));
304 		eatsize(&l);
305 	} else
306 		m->mask = ~0L;
307 	EATAB;
308 
309 	switch (*l) {
310 	case '>':
311 	case '<':
312 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
313 	case '&':
314 	case '^':
315 	case '=':
316   		m->reln = *l;
317   		++l;
318 		break;
319 	case '!':
320 		if (m->type != STRING) {
321 			m->reln = *l;
322 			++l;
323 			break;
324 		}
325 		/* FALL THROUGH */
326 	default:
327 		if (*l == 'x' && isascii((unsigned char)l[1]) &&
328 		    isspace((unsigned char)l[1])) {
329 			m->reln = *l;
330 			++l;
331 			goto GetDesc;	/* Bill The Cat */
332 		}
333   		m->reln = '=';
334 		break;
335 	}
336   	EATAB;
337 
338 	if (getvalue(m, &l))
339 		return -1;
340 	/*
341 	 * TODO finish this macro and start using it!
342 	 * #define offsetcheck {if (offset > HOWMANY-1)
343 	 *	magwarn("offset too big"); }
344 	 */
345 
346 	/*
347 	 * now get last part - the description
348 	 */
349 GetDesc:
350 	EATAB;
351 	if (l[0] == '\b') {
352 		++l;
353 		m->nospflag = 1;
354 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
355 		++l;
356 		++l;
357 		m->nospflag = 1;
358 	} else
359 		m->nospflag = 0;
360 	while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
361 		/* NULLBODY */;
362 
363 	++(*ndx);		/* make room for next */
364 	return 0;
365 }
366 
367 /*
368  * Read a numeric value from a pointer, into the value union of a magic
369  * pointer, according to the magic type.  Update the string pointer to point
370  * just after the number read.  Return 0 for success, non-zero for failure.
371  */
372 static int
getvalue(m,p)373 getvalue(m, p)
374 struct magic *m;
375 char **p;
376 {
377 	int slen;
378 
379 	if (m->type == STRING) {
380 		*p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
381 		m->vallen = slen;
382 	} else
383 		if (m->reln != 'x') {
384 			m->value.l = signextend(m, strtoul(*p, p, 0));
385 			eatsize(p);
386 		}
387 	return 0;
388 }
389 
390 /*
391  * Convert a string containing C character escapes.  Stop at an unescaped
392  * space or tab.
393  * Copy the converted version to "p", returning its length in *slen.
394  * Return updated scan pointer as function result.
395  */
396 static char *
getstr(s,p,plen,slen)397 getstr(s, p, plen, slen)
398 register char	*s;
399 register char	*p;
400 int	plen, *slen;
401 {
402 	char	*origs = s, *origp = p;
403 	char	*pmax = p + plen - 1;
404 	register int	c;
405 	register int	val;
406 
407 	while ((c = *s++) != '\0') {
408 		if (isspace((unsigned char) c))
409 			break;
410 		if (p >= pmax) {
411 			fprintf(stderr, "String too long: %s\n", origs);
412 			break;
413 		}
414 		if(c == '\\') {
415 			switch(c = *s++) {
416 
417 			case '\0':
418 				goto out;
419 
420 			default:
421 				*p++ = (char) c;
422 				break;
423 
424 			case 'n':
425 				*p++ = '\n';
426 				break;
427 
428 			case 'r':
429 				*p++ = '\r';
430 				break;
431 
432 			case 'b':
433 				*p++ = '\b';
434 				break;
435 
436 			case 't':
437 				*p++ = '\t';
438 				break;
439 
440 			case 'f':
441 				*p++ = '\f';
442 				break;
443 
444 			case 'v':
445 				*p++ = '\v';
446 				break;
447 
448 			/* \ and up to 3 octal digits */
449 			case '0':
450 			case '1':
451 			case '2':
452 			case '3':
453 			case '4':
454 			case '5':
455 			case '6':
456 			case '7':
457 				val = c - '0';
458 				c = *s++;  /* try for 2 */
459 				if(c >= '0' && c <= '7') {
460 					val = (val<<3) | (c - '0');
461 					c = *s++;  /* try for 3 */
462 					if(c >= '0' && c <= '7')
463 						val = (val<<3) | (c-'0');
464 					else
465 						--s;
466 				}
467 				else
468 					--s;
469 				*p++ = (char)val;
470 				break;
471 
472 			/* \x and up to 2 hex digits */
473 			case 'x':
474 				val = 'x';	/* Default if no digits */
475 				c = hextoint(*s++);	/* Get next char */
476 				if (c >= 0) {
477 					val = c;
478 					c = hextoint(*s++);
479 					if (c >= 0)
480 						val = (val << 4) + c;
481 					else
482 						--s;
483 				} else
484 					--s;
485 				*p++ = (char)val;
486 				break;
487 			}
488 		} else
489 			*p++ = (char)c;
490 	}
491 out:
492 	*p = '\0';
493 	*slen = p - origp;
494 	return s;
495 }
496 
497 
498 /* Single hex char to int; -1 if not a hex char. */
499 static int
hextoint(c)500 hextoint(c)
501 int c;
502 {
503 	if (!isascii((unsigned char) c))	return -1;
504 	if (isdigit((unsigned char) c))		return c - '0';
505 	if ((c>='a')&&(c<='f'))	return c + 10 - 'a';
506 	if ((c>='A')&&(c<='F'))	return c + 10 - 'A';
507 				return -1;
508 }
509 
510 
511 /*
512  * Print a string containing C character escapes.
513  */
514 void
showstr(fp,s,len)515 showstr(fp, s, len)
516 FILE *fp;
517 const char *s;
518 int len;
519 {
520 	register char	c;
521 
522 	for (;;) {
523 		c = *s++;
524 		if (len == -1) {
525 			if (c == '\0')
526 				break;
527 		}
528 		else  {
529 			if (len-- == 0)
530 				break;
531 		}
532 		if(c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
533 			(void) fputc(c, fp);
534 		else {
535 			(void) fputc('\\', fp);
536 			switch (c) {
537 
538 			case '\n':
539 				(void) fputc('n', fp);
540 				break;
541 
542 			case '\r':
543 				(void) fputc('r', fp);
544 				break;
545 
546 			case '\b':
547 				(void) fputc('b', fp);
548 				break;
549 
550 			case '\t':
551 				(void) fputc('t', fp);
552 				break;
553 
554 			case '\f':
555 				(void) fputc('f', fp);
556 				break;
557 
558 			case '\v':
559 				(void) fputc('v', fp);
560 				break;
561 
562 			default:
563 				(void) fprintf(fp, "%.3o", c & 0377);
564 				break;
565 			}
566 		}
567 	}
568 }
569 
570 /*
571  * eatsize(): Eat the size spec from a number [eg. 10UL]
572  */
573 static void
eatsize(p)574 eatsize(p)
575 char **p;
576 {
577 	char *l = *p;
578 
579 	if (LOWCASE(*l) == 'u')
580 		l++;
581 
582 	switch (LOWCASE(*l)) {
583 	case 'l':    /* long */
584 	case 's':    /* short */
585 	case 'h':    /* short */
586 	case 'b':    /* char/byte */
587 	case 'c':    /* char/byte */
588 		l++;
589 		/*FALLTHROUGH*/
590 	default:
591 		break;
592 	}
593 
594 	*p = l;
595 }
596