1 /* @(#)apprentice.c 1.14 17/07/17 joerg */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)apprentice.c 1.14 17/07/17 joerg";
6 #endif
7 /*
8 ** find file types by using a modified "magic" file
9 **
10 ** based on file v3.22 by Ian F. Darwin (see below)
11 **
12 ** Modified for mkhybrid James Pearson 19/5/98
13 */
14
15 /*
16 * apprentice - make one pass through /etc/magic, learning its secrets.
17 *
18 * Copyright (c) Ian F. Darwin, 1987.
19 * Written by Ian F. Darwin.
20 *
21 * This software is not subject to any export provision of the United States
22 * Department of Commerce, and may be exported to any country or planet.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 * notice immediately at the beginning of the file, without modification,
29 * this list of conditions, and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
38 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
45 */
46
47 #include <schily/stdio.h>
48 #include <schily/stdlib.h>
49 #include <schily/string.h>
50 #include <schily/ctype.h>
51 #include "file.h"
52 #include <schily/schily.h>
53
54 #ifndef lint
55 static UConst char moduleid[] =
56 "@(#)$Id: apprentice.c,v 1.25 1997/01/15 17:23:24 christos Exp $";
57 #endif /* lint */
58
59 int __f_nmagic = 0; /* number of valid magic[]s */
60 #if defined(IS_MACOS_X)
61 /*
62 * The MAC OS X linker does not grok "common" varaibles.
63 * Make __f_magic a "data" variable.
64 */
65 struct magic *__f_magic = 0; /* array of magic entries */
66 #else
67 struct magic *__f_magic; /* array of magic entries */
68 #endif
69
70 #define EATAB {while (isascii((unsigned char) *l) && \
71 isspace((unsigned char) *l)) ++l;}
72 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
73 tolower((unsigned char) (l)) : (l))
74
75
76 static int getvalue __PR((struct magic *, char **));
77 static int hextoint __PR((int));
78 static char *apgetstr __PR((char *, char *, int, int *));
79 static int parse __PR((char *, int *, int));
80 static void eatsize __PR((char **));
81
82 static int maxmagic = 0;
83
84 static int apprentice_1 __PR((char *, int));
85
86 /*
87 * init_magic - read magic file and set up mapping
88 * based on the original apprentice()
89 */
90 int
init_magic(fn)91 init_magic(fn)
92 char *fn; /* list of magic files */
93 {
94 maxmagic = MAXMAGIS;
95 __f_magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
96 if (__f_magic == NULL)
97 return -1;
98
99 return(apprentice_1(fn, 0));
100 }
101
102 static int
apprentice_1(fn,check)103 apprentice_1(fn, check)
104 char *fn; /* name of magic file */
105 int check; /* non-zero? checking-only run. */
106 {
107 static const char hdr[] =
108 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
109 FILE *f;
110 char line[BUFSIZ+1];
111 int errs = 0;
112 int lineno;
113
114 f = fopen(fn, "r");
115 if (f==NULL) {
116 return -1;
117 }
118
119 /* parse it */
120 if (check) /* print silly verbose header for USG compat. */
121 (void) printf("%s\n", hdr);
122
123 for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
124 if (line[0]=='#') /* comment, do not parse */
125 continue;
126 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
127 continue;
128 line[strlen(line)-1] = '\0'; /* delete newline */
129 if (parse(line, &__f_nmagic, check) != 0)
130 errs = 1;
131 }
132
133 (void) fclose(f);
134 return errs;
135 }
136
137 /*
138 * extend the sign bit if the comparison is to be signed
139 * XXX is uint32 really a good idea XXX JS
140 */
141 UInt32_t
signextend(m,v)142 signextend(m, v)
143 struct magic *m;
144 UInt32_t v;
145 {
146 if (!(m->flag & UNSIGNED))
147 switch(m->type) {
148 /*
149 * Do not remove the casts below. They are
150 * vital. When later compared with the data,
151 * the sign extension must have happened.
152 */
153 case BYTE:
154 v = (char) v;
155 break;
156 case SHORT:
157 case BESHORT:
158 case LESHORT:
159 v = (short) v;
160 break;
161 case DATE:
162 case BEDATE:
163 case LEDATE:
164 case LONG:
165 case BELONG:
166 case LELONG:
167 v = (Int32_t) v;
168 break;
169 case STRING:
170 break;
171 default:
172 return -1;
173 }
174 return v;
175 }
176
177 /*
178 * parse one line from magic file, put into magic[index++] if valid
179 */
180 static int
parse(l,ndx,check)181 parse(l, ndx, check)
182 char *l;
183 int *ndx, check;
184 {
185 int i = 0, nd = *ndx;
186 struct magic *m;
187 char *t, *s;
188 struct magic *__f_magic2;
189
190 #define ALLOC_INCR 20
191 if (nd+1 >= maxmagic){
192 maxmagic += ALLOC_INCR;
193 if ((__f_magic2 = (struct magic *) realloc(__f_magic,
194 sizeof(struct magic) *
195 maxmagic)) == NULL) {
196 maxmagic -= ALLOC_INCR;
197 #ifdef MAIN
198 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
199 #else
200 (void) fprintf(stderr, "libfile: Out of memory.\n");
201 #endif
202 if (check)
203 return -1;
204 else
205 exit(1);
206 }
207 __f_magic = __f_magic2;
208 memset(&__f_magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
209 }
210 m = &__f_magic[*ndx];
211 m->flag = 0;
212 m->cont_level = 0;
213
214 while (*l == '>') {
215 ++l; /* step over */
216 m->cont_level++;
217 }
218
219 if (m->cont_level != 0 && *l == '(') {
220 ++l; /* step over */
221 m->flag |= INDIR;
222 }
223 if (m->cont_level != 0 && *l == '&') {
224 ++l; /* step over */
225 m->flag |= ADD;
226 }
227
228 /* get offset, then skip over it */
229 m->offset = (int) strtoul(l,&t,0);
230 /*
231 if (l == t)
232 magwarn("offset %s invalid", l);
233 */
234 l = t;
235
236 if (m->flag & INDIR) {
237 m->in.type = LONG;
238 m->in.offset = 0;
239 /*
240 * read [.lbs][+-]nnnnn)
241 */
242 if (*l == '.') {
243 l++;
244 switch (LOWCASE(*l)) {
245 case 'l':
246 m->in.type = LONG;
247 break;
248 case 'h':
249 case 's':
250 m->in.type = SHORT;
251 break;
252 case 'c':
253 case 'b':
254 m->in.type = BYTE;
255 break;
256 default:
257 break;
258 }
259 l++;
260 }
261 s = l;
262 if (*l == '+' || *l == '-') l++;
263 if (isdigit((unsigned char)*l)) {
264 m->in.offset = strtoul(l, &t, 0);
265 if (*s == '-') m->in.offset = - m->in.offset;
266 }
267 else
268 t = l;
269 /*
270 if (*t++ != ')')
271 magwarn("missing ')' in indirect offset");
272 */
273 l = t;
274 }
275
276
277 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
278 ++l;
279 EATAB;
280
281 #define NBYTE 4
282 #define NSHORT 5
283 #define NLONG 4
284 #define NSTRING 6
285 #define NDATE 4
286 #define NBESHORT 7
287 #define NBELONG 6
288 #define NBEDATE 6
289 #define NLESHORT 7
290 #define NLELONG 6
291 #define NLEDATE 6
292
293 if (*l == 'u') {
294 ++l;
295 m->flag |= UNSIGNED;
296 }
297
298 /* get type, skip it */
299 if (strncmp(l, "byte", NBYTE)==0) {
300 m->type = BYTE;
301 l += NBYTE;
302 } else if (strncmp(l, "short", NSHORT)==0) {
303 m->type = SHORT;
304 l += NSHORT;
305 } else if (strncmp(l, "long", NLONG)==0) {
306 m->type = LONG;
307 l += NLONG;
308 } else if (strncmp(l, "string", NSTRING)==0) {
309 m->type = STRING;
310 l += NSTRING;
311 } else if (strncmp(l, "date", NDATE)==0) {
312 m->type = DATE;
313 l += NDATE;
314 } else if (strncmp(l, "beshort", NBESHORT)==0) {
315 m->type = BESHORT;
316 l += NBESHORT;
317 } else if (strncmp(l, "belong", NBELONG)==0) {
318 m->type = BELONG;
319 l += NBELONG;
320 } else if (strncmp(l, "bedate", NBEDATE)==0) {
321 m->type = BEDATE;
322 l += NBEDATE;
323 } else if (strncmp(l, "leshort", NLESHORT)==0) {
324 m->type = LESHORT;
325 l += NLESHORT;
326 } else if (strncmp(l, "lelong", NLELONG)==0) {
327 m->type = LELONG;
328 l += NLELONG;
329 } else if (strncmp(l, "ledate", NLEDATE)==0) {
330 m->type = LEDATE;
331 l += NLEDATE;
332 } else {
333 return -1;
334 }
335 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
336 if (*l == '&') {
337 ++l;
338 m->mask = signextend(m, (UInt32_t)strtoul(l, &l, 0)); /* XXX JS uint32 cat may be wrong */
339 eatsize(&l);
340 } else
341 m->mask = ~0L;
342 EATAB;
343
344 switch (*l) {
345 case '>':
346 case '<':
347 /* Old-style anding: "0 byte &0x80 dynamically linked" */
348 case '&':
349 case '^':
350 case '=':
351 m->reln = *l;
352 ++l;
353 break;
354 case '!':
355 if (m->type != STRING) {
356 m->reln = *l;
357 ++l;
358 break;
359 }
360 /* FALL THROUGH */
361 default:
362 if (*l == 'x' && isascii((unsigned char)l[1]) &&
363 isspace((unsigned char)l[1])) {
364 m->reln = *l;
365 ++l;
366 goto GetDesc; /* Bill The Cat */
367 }
368 m->reln = '=';
369 break;
370 }
371 EATAB;
372
373 if (getvalue(m, &l))
374 return -1;
375 /*
376 * TODO finish this macro and start using it!
377 * #define offsetcheck {if (offset > HOWMANY-1)
378 * magwarn("offset too big"); }
379 */
380
381 /*
382 * now get last part - the description
383 */
384 GetDesc:
385 EATAB;
386 if (l[0] == '\b') {
387 ++l;
388 m->nospflag = 1;
389 } else if ((l[0] == '\\') && (l[1] == 'b')) {
390 ++l;
391 ++l;
392 m->nospflag = 1;
393 } else
394 m->nospflag = 0;
395 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
396 /* NULLBODY */;
397
398 ++(*ndx); /* make room for next */
399 return 0;
400 }
401
402 /*
403 * Read a numeric value from a pointer, into the value union of a magic
404 * pointer, according to the magic type. Update the string pointer to point
405 * just after the number read. Return 0 for success, non-zero for failure.
406 */
407 static int
getvalue(m,p)408 getvalue(m, p)
409 struct magic *m;
410 char **p;
411 {
412 int slen;
413
414 if (m->type == STRING) {
415 *p = apgetstr(*p, m->value.s, sizeof(m->value.s), &slen);
416 m->vallen = slen;
417 } else
418 if (m->reln != 'x') {
419 m->value.l = signextend(m, (UInt32_t)strtoul(*p, p, 0)); /* XXX JS uint32 cat may be wrong */
420 eatsize(p);
421 }
422 return 0;
423 }
424
425 /*
426 * Convert a string containing C character escapes. Stop at an unescaped
427 * space or tab.
428 * Copy the converted version to "p", returning its length in *slen.
429 * Return updated scan pointer as function result.
430 */
431 static char *
apgetstr(s,p,plen,slen)432 apgetstr(s, p, plen, slen)
433 register char *s;
434 register char *p;
435 int plen, *slen;
436 {
437 char *origs = s, *origp = p;
438 char *pmax = p + plen - 1;
439 register int c;
440 register int val;
441
442 while ((c = *s++) != '\0') {
443 if (isspace((unsigned char) c))
444 break;
445 if (p >= pmax) {
446 fprintf(stderr, "String too long: %s\n", origs);
447 break;
448 }
449 if(c == '\\') {
450 switch(c = *s++) {
451
452 case '\0':
453 goto out;
454
455 default:
456 *p++ = (char) c;
457 break;
458
459 case 'n':
460 *p++ = '\n';
461 break;
462
463 case 'r':
464 *p++ = '\r';
465 break;
466
467 case 'b':
468 *p++ = '\b';
469 break;
470
471 case 't':
472 *p++ = '\t';
473 break;
474
475 case 'f':
476 *p++ = '\f';
477 break;
478
479 case 'v':
480 *p++ = '\v';
481 break;
482
483 /* \ and up to 3 octal digits */
484 case '0':
485 case '1':
486 case '2':
487 case '3':
488 case '4':
489 case '5':
490 case '6':
491 case '7':
492 val = c - '0';
493 c = *s++; /* try for 2 */
494 if(c >= '0' && c <= '7') {
495 val = (val<<3) | (c - '0');
496 c = *s++; /* try for 3 */
497 if(c >= '0' && c <= '7')
498 val = (val<<3) | (c-'0');
499 else
500 --s;
501 }
502 else
503 --s;
504 *p++ = (char)val;
505 break;
506
507 /* \x and up to 2 hex digits */
508 case 'x':
509 val = 'x'; /* Default if no digits */
510 c = hextoint(*s++); /* Get next char */
511 if (c >= 0) {
512 val = c;
513 c = hextoint(*s++);
514 if (c >= 0)
515 val = (val << 4) + c;
516 else
517 --s;
518 } else
519 --s;
520 *p++ = (char)val;
521 break;
522 }
523 } else
524 *p++ = (char)c;
525 }
526 out:
527 *p = '\0';
528 *slen = p - origp;
529 return s;
530 }
531
532
533 /* Single hex char to int; -1 if not a hex char. */
534 static int
hextoint(c)535 hextoint(c)
536 int c;
537 {
538 if (!isascii((unsigned char) c)) return -1;
539 if (isdigit((unsigned char) c)) return c - '0';
540 if ((c>='a')&&(c<='f')) return c + 10 - 'a';
541 if ((c>='A')&&(c<='F')) return c + 10 - 'A';
542 return -1;
543 }
544
545
546 /*
547 * Print a string containing C character escapes.
548 */
549 void
showstr(fp,s,len)550 showstr(fp, s, len)
551 FILE *fp;
552 const char *s;
553 int len;
554 {
555 register char c;
556
557 for (;;) {
558 c = *s++;
559 if (len == -1) {
560 if (c == '\0')
561 break;
562 }
563 else {
564 if (len-- == 0)
565 break;
566 }
567 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
568 (void) fputc(c, fp);
569 else {
570 (void) fputc('\\', fp);
571 switch (c) {
572
573 case '\n':
574 (void) fputc('n', fp);
575 break;
576
577 case '\r':
578 (void) fputc('r', fp);
579 break;
580
581 case '\b':
582 (void) fputc('b', fp);
583 break;
584
585 case '\t':
586 (void) fputc('t', fp);
587 break;
588
589 case '\f':
590 (void) fputc('f', fp);
591 break;
592
593 case '\v':
594 (void) fputc('v', fp);
595 break;
596
597 default:
598 (void) fprintf(fp, "%.3o", c & 0377);
599 break;
600 }
601 }
602 }
603 }
604
605 /*
606 * eatsize(): Eat the size spec from a number [eg. 10UL]
607 */
608 static void
eatsize(p)609 eatsize(p)
610 char **p;
611 {
612 char *l = *p;
613
614 if (LOWCASE(*l) == 'u')
615 l++;
616
617 switch (LOWCASE(*l)) {
618 case 'l': /* long */
619 case 's': /* short */
620 case 'h': /* short */
621 case 'b': /* char/byte */
622 case 'c': /* char/byte */
623 l++;
624 /*FALLTHROUGH*/
625 default:
626 break;
627 }
628
629 *p = l;
630 }
631