1 /* @(#)apprentice.c	1.14 17/07/17 joerg */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)apprentice.c	1.14 17/07/17 joerg";
6 #endif
7 /*
8 **	find file types by using a modified "magic" file
9 **
10 **	based on file v3.22 by Ian F. Darwin (see below)
11 **
12 **	Modified for mkhybrid James Pearson 19/5/98
13 */
14 
15 /*
16  * apprentice - make one pass through /etc/magic, learning its secrets.
17  *
18  * Copyright (c) Ian F. Darwin, 1987.
19  * Written by Ian F. Darwin.
20  *
21  * This software is not subject to any export provision of the United States
22  * Department of Commerce, and may be exported to any country or planet.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted provided that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice immediately at the beginning of the file, without modification,
29  *    this list of conditions, and the following disclaimer.
30  * 2. Redistributions in binary form must reproduce the above copyright
31  *    notice, this list of conditions and the following disclaimer in the
32  *    documentation and/or other materials provided with the distribution.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
38  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  */
46 
47 #include <schily/stdio.h>
48 #include <schily/stdlib.h>
49 #include <schily/string.h>
50 #include <schily/ctype.h>
51 #include "file.h"
52 #include <schily/schily.h>
53 
54 #ifndef	lint
55 static UConst char moduleid[] =
56 	"@(#)$Id: apprentice.c,v 1.25 1997/01/15 17:23:24 christos Exp $";
57 #endif	/* lint */
58 
59 int	__f_nmagic = 0;		/* number of valid magic[]s 		*/
60 #if	defined(IS_MACOS_X)
61 /*
62  * The MAC OS X linker does not grok "common" varaibles.
63  * Make __f_magic a "data" variable.
64  */
65 struct  magic *__f_magic = 0;	/* array of magic entries		*/
66 #else
67 struct  magic *__f_magic;	/* array of magic entries		*/
68 #endif
69 
70 #define	EATAB {while (isascii((unsigned char) *l) && \
71 		      isspace((unsigned char) *l))  ++l;}
72 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
73 			tolower((unsigned char) (l)) : (l))
74 
75 
76 static int getvalue	__PR((struct magic *, char **));
77 static int hextoint	__PR((int));
78 static char *apgetstr	__PR((char *, char *, int, int *));
79 static int parse	__PR((char *, int *, int));
80 static void eatsize	__PR((char **));
81 
82 static int maxmagic = 0;
83 
84 static int apprentice_1	__PR((char *, int));
85 
86 /*
87  * init_magic - read magic file and set up mapping
88  * based on the original apprentice()
89  */
90 int
init_magic(fn)91 init_magic(fn)
92 char *fn;			/* list of magic files */
93 {
94         maxmagic = MAXMAGIS;
95 	__f_magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
96 	if (__f_magic == NULL)
97 		return -1;
98 
99 	return(apprentice_1(fn, 0));
100 }
101 
102 static int
apprentice_1(fn,check)103 apprentice_1(fn, check)
104 char *fn;			/* name of magic file */
105 int check;			/* non-zero? checking-only run. */
106 {
107 	static const char hdr[] =
108 		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
109 	FILE *f;
110 	char line[BUFSIZ+1];
111 	int errs = 0;
112 	int lineno;
113 
114 	f = fopen(fn, "r");
115 	if (f==NULL) {
116 		return -1;
117 	}
118 
119 	/* parse it */
120 	if (check)	/* print silly verbose header for USG compat. */
121 		(void) printf("%s\n", hdr);
122 
123 	for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
124 		if (line[0]=='#')	/* comment, do not parse */
125 			continue;
126 		if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
127 			continue;
128 		line[strlen(line)-1] = '\0'; /* delete newline */
129 		if (parse(line, &__f_nmagic, check) != 0)
130 			errs = 1;
131 	}
132 
133 	(void) fclose(f);
134 	return errs;
135 }
136 
137 /*
138  * extend the sign bit if the comparison is to be signed
139  * XXX is uint32 really a good idea XXX JS
140  */
141 UInt32_t
signextend(m,v)142 signextend(m, v)
143 struct magic *m;
144 UInt32_t v;
145 {
146 	if (!(m->flag & UNSIGNED))
147 		switch(m->type) {
148 		/*
149 		 * Do not remove the casts below.  They are
150 		 * vital.  When later compared with the data,
151 		 * the sign extension must have happened.
152 		 */
153 		case BYTE:
154 			v = (char) v;
155 			break;
156 		case SHORT:
157 		case BESHORT:
158 		case LESHORT:
159 			v = (short) v;
160 			break;
161 		case DATE:
162 		case BEDATE:
163 		case LEDATE:
164 		case LONG:
165 		case BELONG:
166 		case LELONG:
167 			v = (Int32_t) v;
168 			break;
169 		case STRING:
170 			break;
171 		default:
172 			return -1;
173 		}
174 	return v;
175 }
176 
177 /*
178  * parse one line from magic file, put into magic[index++] if valid
179  */
180 static int
parse(l,ndx,check)181 parse(l, ndx, check)
182 char *l;
183 int *ndx, check;
184 {
185 	int i = 0, nd = *ndx;
186 	struct magic *m;
187 	char *t, *s;
188 	struct magic *__f_magic2;
189 
190 #define ALLOC_INCR	20
191 	if (nd+1 >= maxmagic){
192 	    maxmagic += ALLOC_INCR;
193 	    if ((__f_magic2 = (struct magic *) realloc(__f_magic,
194 						  sizeof(struct magic) *
195 						  maxmagic)) == NULL) {
196 		    maxmagic -= ALLOC_INCR;
197 #ifdef	MAIN
198 		(void) fprintf(stderr, "%s: Out of memory.\n", progname);
199 #else
200 		(void) fprintf(stderr, "libfile: Out of memory.\n");
201 #endif
202 		if (check)
203 			return -1;
204 		else
205 			exit(1);
206 	    }
207 	    __f_magic = __f_magic2;
208 	    memset(&__f_magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
209 	}
210 	m = &__f_magic[*ndx];
211 	m->flag = 0;
212 	m->cont_level = 0;
213 
214 	while (*l == '>') {
215 		++l;		/* step over */
216 		m->cont_level++;
217 	}
218 
219 	if (m->cont_level != 0 && *l == '(') {
220 		++l;		/* step over */
221 		m->flag |= INDIR;
222 	}
223 	if (m->cont_level != 0 && *l == '&') {
224                 ++l;            /* step over */
225                 m->flag |= ADD;
226         }
227 
228 	/* get offset, then skip over it */
229 	m->offset = (int) strtoul(l,&t,0);
230 /*
231         if (l == t)
232 		magwarn("offset %s invalid", l);
233 */
234         l = t;
235 
236 	if (m->flag & INDIR) {
237 		m->in.type = LONG;
238 		m->in.offset = 0;
239 		/*
240 		 * read [.lbs][+-]nnnnn)
241 		 */
242 		if (*l == '.') {
243 			l++;
244 			switch (LOWCASE(*l)) {
245 			case 'l':
246 				m->in.type = LONG;
247 				break;
248 			case 'h':
249 			case 's':
250 				m->in.type = SHORT;
251 				break;
252 			case 'c':
253 			case 'b':
254 				m->in.type = BYTE;
255 				break;
256 			default:
257 				break;
258 			}
259 			l++;
260 		}
261 		s = l;
262 		if (*l == '+' || *l == '-') l++;
263 		if (isdigit((unsigned char)*l)) {
264 			m->in.offset = strtoul(l, &t, 0);
265 			if (*s == '-') m->in.offset = - m->in.offset;
266 		}
267 		else
268 			t = l;
269 /*
270 		if (*t++ != ')')
271 			magwarn("missing ')' in indirect offset");
272 */
273 		l = t;
274 	}
275 
276 
277 	while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
278 		++l;
279 	EATAB;
280 
281 #define NBYTE		4
282 #define NSHORT		5
283 #define NLONG		4
284 #define NSTRING 	6
285 #define NDATE		4
286 #define NBESHORT	7
287 #define NBELONG		6
288 #define NBEDATE		6
289 #define NLESHORT	7
290 #define NLELONG		6
291 #define NLEDATE		6
292 
293 	if (*l == 'u') {
294 		++l;
295 		m->flag |= UNSIGNED;
296 	}
297 
298 	/* get type, skip it */
299 	if (strncmp(l, "byte", NBYTE)==0) {
300 		m->type = BYTE;
301 		l += NBYTE;
302 	} else if (strncmp(l, "short", NSHORT)==0) {
303 		m->type = SHORT;
304 		l += NSHORT;
305 	} else if (strncmp(l, "long", NLONG)==0) {
306 		m->type = LONG;
307 		l += NLONG;
308 	} else if (strncmp(l, "string", NSTRING)==0) {
309 		m->type = STRING;
310 		l += NSTRING;
311 	} else if (strncmp(l, "date", NDATE)==0) {
312 		m->type = DATE;
313 		l += NDATE;
314 	} else if (strncmp(l, "beshort", NBESHORT)==0) {
315 		m->type = BESHORT;
316 		l += NBESHORT;
317 	} else if (strncmp(l, "belong", NBELONG)==0) {
318 		m->type = BELONG;
319 		l += NBELONG;
320 	} else if (strncmp(l, "bedate", NBEDATE)==0) {
321 		m->type = BEDATE;
322 		l += NBEDATE;
323 	} else if (strncmp(l, "leshort", NLESHORT)==0) {
324 		m->type = LESHORT;
325 		l += NLESHORT;
326 	} else if (strncmp(l, "lelong", NLELONG)==0) {
327 		m->type = LELONG;
328 		l += NLELONG;
329 	} else if (strncmp(l, "ledate", NLEDATE)==0) {
330 		m->type = LEDATE;
331 		l += NLEDATE;
332 	} else {
333 		return -1;
334 	}
335 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
336 	if (*l == '&') {
337 		++l;
338 		m->mask = signextend(m, (UInt32_t)strtoul(l, &l, 0)); /* XXX JS uint32 cat may be wrong */
339 		eatsize(&l);
340 	} else
341 		m->mask = ~0L;
342 	EATAB;
343 
344 	switch (*l) {
345 	case '>':
346 	case '<':
347 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
348 	case '&':
349 	case '^':
350 	case '=':
351   		m->reln = *l;
352   		++l;
353 		break;
354 	case '!':
355 		if (m->type != STRING) {
356 			m->reln = *l;
357 			++l;
358 			break;
359 		}
360 		/* FALL THROUGH */
361 	default:
362 		if (*l == 'x' && isascii((unsigned char)l[1]) &&
363 		    isspace((unsigned char)l[1])) {
364 			m->reln = *l;
365 			++l;
366 			goto GetDesc;	/* Bill The Cat */
367 		}
368   		m->reln = '=';
369 		break;
370 	}
371   	EATAB;
372 
373 	if (getvalue(m, &l))
374 		return -1;
375 	/*
376 	 * TODO finish this macro and start using it!
377 	 * #define offsetcheck {if (offset > HOWMANY-1)
378 	 *	magwarn("offset too big"); }
379 	 */
380 
381 	/*
382 	 * now get last part - the description
383 	 */
384 GetDesc:
385 	EATAB;
386 	if (l[0] == '\b') {
387 		++l;
388 		m->nospflag = 1;
389 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
390 		++l;
391 		++l;
392 		m->nospflag = 1;
393 	} else
394 		m->nospflag = 0;
395 	while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
396 		/* NULLBODY */;
397 
398 	++(*ndx);		/* make room for next */
399 	return 0;
400 }
401 
402 /*
403  * Read a numeric value from a pointer, into the value union of a magic
404  * pointer, according to the magic type.  Update the string pointer to point
405  * just after the number read.  Return 0 for success, non-zero for failure.
406  */
407 static int
getvalue(m,p)408 getvalue(m, p)
409 struct magic *m;
410 char **p;
411 {
412 	int slen;
413 
414 	if (m->type == STRING) {
415 		*p = apgetstr(*p, m->value.s, sizeof(m->value.s), &slen);
416 		m->vallen = slen;
417 	} else
418 		if (m->reln != 'x') {
419 			m->value.l = signextend(m, (UInt32_t)strtoul(*p, p, 0)); /* XXX JS uint32 cat may be wrong */
420 			eatsize(p);
421 		}
422 	return 0;
423 }
424 
425 /*
426  * Convert a string containing C character escapes.  Stop at an unescaped
427  * space or tab.
428  * Copy the converted version to "p", returning its length in *slen.
429  * Return updated scan pointer as function result.
430  */
431 static char *
apgetstr(s,p,plen,slen)432 apgetstr(s, p, plen, slen)
433 register char	*s;
434 register char	*p;
435 int	plen, *slen;
436 {
437 	char	*origs = s, *origp = p;
438 	char	*pmax = p + plen - 1;
439 	register int	c;
440 	register int	val;
441 
442 	while ((c = *s++) != '\0') {
443 		if (isspace((unsigned char) c))
444 			break;
445 		if (p >= pmax) {
446 			fprintf(stderr, "String too long: %s\n", origs);
447 			break;
448 		}
449 		if(c == '\\') {
450 			switch(c = *s++) {
451 
452 			case '\0':
453 				goto out;
454 
455 			default:
456 				*p++ = (char) c;
457 				break;
458 
459 			case 'n':
460 				*p++ = '\n';
461 				break;
462 
463 			case 'r':
464 				*p++ = '\r';
465 				break;
466 
467 			case 'b':
468 				*p++ = '\b';
469 				break;
470 
471 			case 't':
472 				*p++ = '\t';
473 				break;
474 
475 			case 'f':
476 				*p++ = '\f';
477 				break;
478 
479 			case 'v':
480 				*p++ = '\v';
481 				break;
482 
483 			/* \ and up to 3 octal digits */
484 			case '0':
485 			case '1':
486 			case '2':
487 			case '3':
488 			case '4':
489 			case '5':
490 			case '6':
491 			case '7':
492 				val = c - '0';
493 				c = *s++;  /* try for 2 */
494 				if(c >= '0' && c <= '7') {
495 					val = (val<<3) | (c - '0');
496 					c = *s++;  /* try for 3 */
497 					if(c >= '0' && c <= '7')
498 						val = (val<<3) | (c-'0');
499 					else
500 						--s;
501 				}
502 				else
503 					--s;
504 				*p++ = (char)val;
505 				break;
506 
507 			/* \x and up to 2 hex digits */
508 			case 'x':
509 				val = 'x';	/* Default if no digits */
510 				c = hextoint(*s++);	/* Get next char */
511 				if (c >= 0) {
512 					val = c;
513 					c = hextoint(*s++);
514 					if (c >= 0)
515 						val = (val << 4) + c;
516 					else
517 						--s;
518 				} else
519 					--s;
520 				*p++ = (char)val;
521 				break;
522 			}
523 		} else
524 			*p++ = (char)c;
525 	}
526 out:
527 	*p = '\0';
528 	*slen = p - origp;
529 	return s;
530 }
531 
532 
533 /* Single hex char to int; -1 if not a hex char. */
534 static int
hextoint(c)535 hextoint(c)
536 int c;
537 {
538 	if (!isascii((unsigned char) c))	return -1;
539 	if (isdigit((unsigned char) c))		return c - '0';
540 	if ((c>='a')&&(c<='f'))	return c + 10 - 'a';
541 	if ((c>='A')&&(c<='F'))	return c + 10 - 'A';
542 				return -1;
543 }
544 
545 
546 /*
547  * Print a string containing C character escapes.
548  */
549 void
showstr(fp,s,len)550 showstr(fp, s, len)
551 FILE *fp;
552 const char *s;
553 int len;
554 {
555 	register char	c;
556 
557 	for (;;) {
558 		c = *s++;
559 		if (len == -1) {
560 			if (c == '\0')
561 				break;
562 		}
563 		else  {
564 			if (len-- == 0)
565 				break;
566 		}
567 		if(c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
568 			(void) fputc(c, fp);
569 		else {
570 			(void) fputc('\\', fp);
571 			switch (c) {
572 
573 			case '\n':
574 				(void) fputc('n', fp);
575 				break;
576 
577 			case '\r':
578 				(void) fputc('r', fp);
579 				break;
580 
581 			case '\b':
582 				(void) fputc('b', fp);
583 				break;
584 
585 			case '\t':
586 				(void) fputc('t', fp);
587 				break;
588 
589 			case '\f':
590 				(void) fputc('f', fp);
591 				break;
592 
593 			case '\v':
594 				(void) fputc('v', fp);
595 				break;
596 
597 			default:
598 				(void) fprintf(fp, "%.3o", c & 0377);
599 				break;
600 			}
601 		}
602 	}
603 }
604 
605 /*
606  * eatsize(): Eat the size spec from a number [eg. 10UL]
607  */
608 static void
eatsize(p)609 eatsize(p)
610 char **p;
611 {
612 	char *l = *p;
613 
614 	if (LOWCASE(*l) == 'u')
615 		l++;
616 
617 	switch (LOWCASE(*l)) {
618 	case 'l':    /* long */
619 	case 's':    /* short */
620 	case 'h':    /* short */
621 	case 'b':    /* char/byte */
622 	case 'c':    /* char/byte */
623 		l++;
624 		/*FALLTHROUGH*/
625 	default:
626 		break;
627 	}
628 
629 	*p = l;
630 }
631