1 /*
2 ** find file types by using a modified "magic" file
3 **
4 ** based on file v3.22 by Ian F. Darwin (see below)
5 **
6 ** Modified for mkhybrid James Pearson 19/5/98
7 */
8
9 /*
10 * apprentice - make one pass through /etc/magic, learning its secrets.
11 *
12 * Copyright (c) Ian F. Darwin, 1987.
13 * Written by Ian F. Darwin.
14 *
15 * This software is not subject to any license of the American Telephone
16 * and Telegraph Company or of the Regents of the University of California.
17 *
18 * Permission is granted to anyone to use this software for any purpose on
19 * any computer system, and to alter it and redistribute it freely, subject
20 * to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of this
23 * software, no matter how awful, even if they arise from flaws in it.
24 *
25 * 2. The origin of this software must not be misrepresented, either by
26 * explicit claim or by omission. Since few users ever read sources,
27 * credits must appear in the documentation.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not be
30 * misrepresented as being the original software. Since few users
31 * ever read sources, credits must appear in the documentation.
32 *
33 * 4. This notice may not be removed or altered.
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <errno.h>
41 #include "file.h"
42
43 #define EATAB {while (isascii((unsigned char) *l) && \
44 isspace((unsigned char) *l)) ++l;}
45 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
46 tolower((unsigned char) (l)) : (l))
47
48
49 static int getvalue __P((struct magic *, char **));
50 static int hextoint __P((int));
51 static char *getstr __P((char *, char *, int, int *));
52 static int parse __P((char *, int *, int));
53 static void eatsize __P((char **));
54
55 static int maxmagic = 0;
56
57 static int apprentice_1 __P((char *, int));
58
59 /*
60 * init_magic - read magic file and set up mapping
61 * based on the original apprentice()
62 */
63 int
init_magic(fn)64 init_magic(fn)
65 char *fn; /* list of magic files */
66 {
67 maxmagic = MAXMAGIS;
68 magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
69 if (magic == NULL)
70 return -1;
71
72 return(apprentice_1(fn, 0));
73 }
74
75 static int
apprentice_1(fn,check)76 apprentice_1(fn, check)
77 char *fn; /* name of magic file */
78 int check; /* non-zero? checking-only run. */
79 {
80 static const char hdr[] =
81 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
82 FILE *f;
83 char line[BUFSIZ+1];
84 int errs = 0;
85
86 f = fopen(fn, "r");
87 if (f==NULL) {
88 return -1;
89 }
90
91 /* parse it */
92 if (check) /* print silly verbose header for USG compat. */
93 (void) printf("%s\n", hdr);
94
95 for (lineno = 1;fgets(line, sizeof(line), f) != NULL; lineno++) {
96 if (line[0]=='#') /* comment, do not parse */
97 continue;
98 /* delete newline */
99 line[strcspn(line, "\n")] = '\0';
100 if (line[0] == '\0')
101 continue;
102 if (parse(line, &nmagic, check) != 0)
103 errs = 1;
104 }
105
106 (void) fclose(f);
107 return errs;
108 }
109
110 /*
111 * extend the sign bit if the comparison is to be signed
112 */
113 uint32
signextend(m,v)114 signextend(m, v)
115 struct magic *m;
116 uint32 v;
117 {
118 if (!(m->flag & UNSIGNED))
119 switch(m->type) {
120 /*
121 * Do not remove the casts below. They are
122 * vital. When later compared with the data,
123 * the sign extension must have happened.
124 */
125 case BYTE:
126 v = (char) v;
127 break;
128 case SHORT:
129 case BESHORT:
130 case LESHORT:
131 v = (short) v;
132 break;
133 case DATE:
134 case BEDATE:
135 case LEDATE:
136 case LONG:
137 case BELONG:
138 case LELONG:
139 v = (int32) v;
140 break;
141 case STRING:
142 break;
143 default:
144 return -1;
145 }
146 return v;
147 }
148
149 /*
150 * parse one line from magic file, put into magic[index++] if valid
151 */
152 static int
parse(l,ndx,check)153 parse(l, ndx, check)
154 char *l;
155 int *ndx, check;
156 {
157 int i = 0, nd = *ndx;
158 struct magic *m;
159 char *t, *s;
160
161 #define ALLOC_INCR 20
162 if (nd+1 >= maxmagic){
163 maxmagic += ALLOC_INCR;
164 if ((magic = (struct magic *) realloc(magic,
165 sizeof(struct magic) *
166 maxmagic)) == NULL) {
167 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
168 if (check)
169 return -1;
170 else
171 exit(1);
172 }
173 memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
174 }
175 m = &magic[*ndx];
176 m->flag = 0;
177 m->cont_level = 0;
178
179 while (*l == '>') {
180 ++l; /* step over */
181 m->cont_level++;
182 }
183
184 if (m->cont_level != 0 && *l == '(') {
185 ++l; /* step over */
186 m->flag |= INDIR;
187 }
188 if (m->cont_level != 0 && *l == '&') {
189 ++l; /* step over */
190 m->flag |= ADD;
191 }
192
193 /* get offset, then skip over it */
194 m->offset = (int) strtoul(l,&t,0);
195 /*
196 if (l == t)
197 magwarn("offset %s invalid", l);
198 */
199 l = t;
200
201 if (m->flag & INDIR) {
202 m->in.type = LONG;
203 m->in.offset = 0;
204 /*
205 * read [.lbs][+-]nnnnn)
206 */
207 if (*l == '.') {
208 l++;
209 switch (LOWCASE(*l)) {
210 case 'l':
211 m->in.type = LONG;
212 break;
213 case 'h':
214 case 's':
215 m->in.type = SHORT;
216 break;
217 case 'c':
218 case 'b':
219 m->in.type = BYTE;
220 break;
221 default:
222 break;
223 }
224 l++;
225 }
226 s = l;
227 if (*l == '+' || *l == '-') l++;
228 if (isdigit((unsigned char)*l)) {
229 m->in.offset = strtoul(l, &t, 0);
230 if (*s == '-') m->in.offset = - m->in.offset;
231 }
232 else
233 t = l;
234 /*
235 if (*t++ != ')')
236 magwarn("missing ')' in indirect offset");
237 */
238 l = t;
239 }
240
241
242 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
243 ++l;
244 EATAB;
245
246 #define NBYTE 4
247 #define NSHORT 5
248 #define NLONG 4
249 #define NSTRING 6
250 #define NDATE 4
251 #define NBESHORT 7
252 #define NBELONG 6
253 #define NBEDATE 6
254 #define NLESHORT 7
255 #define NLELONG 6
256 #define NLEDATE 6
257
258 if (*l == 'u') {
259 ++l;
260 m->flag |= UNSIGNED;
261 }
262
263 /* get type, skip it */
264 if (strncmp(l, "byte", NBYTE)==0) {
265 m->type = BYTE;
266 l += NBYTE;
267 } else if (strncmp(l, "short", NSHORT)==0) {
268 m->type = SHORT;
269 l += NSHORT;
270 } else if (strncmp(l, "long", NLONG)==0) {
271 m->type = LONG;
272 l += NLONG;
273 } else if (strncmp(l, "string", NSTRING)==0) {
274 m->type = STRING;
275 l += NSTRING;
276 } else if (strncmp(l, "date", NDATE)==0) {
277 m->type = DATE;
278 l += NDATE;
279 } else if (strncmp(l, "beshort", NBESHORT)==0) {
280 m->type = BESHORT;
281 l += NBESHORT;
282 } else if (strncmp(l, "belong", NBELONG)==0) {
283 m->type = BELONG;
284 l += NBELONG;
285 } else if (strncmp(l, "bedate", NBEDATE)==0) {
286 m->type = BEDATE;
287 l += NBEDATE;
288 } else if (strncmp(l, "leshort", NLESHORT)==0) {
289 m->type = LESHORT;
290 l += NLESHORT;
291 } else if (strncmp(l, "lelong", NLELONG)==0) {
292 m->type = LELONG;
293 l += NLELONG;
294 } else if (strncmp(l, "ledate", NLEDATE)==0) {
295 m->type = LEDATE;
296 l += NLEDATE;
297 } else {
298 return -1;
299 }
300 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
301 if (*l == '&') {
302 ++l;
303 m->mask = signextend(m, strtoul(l, &l, 0));
304 eatsize(&l);
305 } else
306 m->mask = ~0L;
307 EATAB;
308
309 switch (*l) {
310 case '>':
311 case '<':
312 /* Old-style anding: "0 byte &0x80 dynamically linked" */
313 case '&':
314 case '^':
315 case '=':
316 m->reln = *l;
317 ++l;
318 break;
319 case '!':
320 if (m->type != STRING) {
321 m->reln = *l;
322 ++l;
323 break;
324 }
325 /* FALL THROUGH */
326 default:
327 if (*l == 'x' && isascii((unsigned char)l[1]) &&
328 isspace((unsigned char)l[1])) {
329 m->reln = *l;
330 ++l;
331 goto GetDesc; /* Bill The Cat */
332 }
333 m->reln = '=';
334 break;
335 }
336 EATAB;
337
338 if (getvalue(m, &l))
339 return -1;
340 /*
341 * TODO finish this macro and start using it!
342 * #define offsetcheck {if (offset > HOWMANY-1)
343 * magwarn("offset too big"); }
344 */
345
346 /*
347 * now get last part - the description
348 */
349 GetDesc:
350 EATAB;
351 if (l[0] == '\b') {
352 ++l;
353 m->nospflag = 1;
354 } else if ((l[0] == '\\') && (l[1] == 'b')) {
355 ++l;
356 ++l;
357 m->nospflag = 1;
358 } else
359 m->nospflag = 0;
360 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
361 /* NULLBODY */;
362
363 ++(*ndx); /* make room for next */
364 return 0;
365 }
366
367 /*
368 * Read a numeric value from a pointer, into the value union of a magic
369 * pointer, according to the magic type. Update the string pointer to point
370 * just after the number read. Return 0 for success, non-zero for failure.
371 */
372 static int
getvalue(m,p)373 getvalue(m, p)
374 struct magic *m;
375 char **p;
376 {
377 int slen;
378
379 if (m->type == STRING) {
380 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
381 m->vallen = slen;
382 } else
383 if (m->reln != 'x') {
384 m->value.l = signextend(m, strtoul(*p, p, 0));
385 eatsize(p);
386 }
387 return 0;
388 }
389
390 /*
391 * Convert a string containing C character escapes. Stop at an unescaped
392 * space or tab.
393 * Copy the converted version to "p", returning its length in *slen.
394 * Return updated scan pointer as function result.
395 */
396 static char *
getstr(s,p,plen,slen)397 getstr(s, p, plen, slen)
398 register char *s;
399 register char *p;
400 int plen, *slen;
401 {
402 char *origs = s, *origp = p;
403 char *pmax = p + plen - 1;
404 register int c;
405 register int val;
406
407 while ((c = *s++) != '\0') {
408 if (isspace((unsigned char) c))
409 break;
410 if (p >= pmax) {
411 fprintf(stderr, "String too long: %s\n", origs);
412 break;
413 }
414 if(c == '\\') {
415 switch(c = *s++) {
416
417 case '\0':
418 goto out;
419
420 default:
421 *p++ = (char) c;
422 break;
423
424 case 'n':
425 *p++ = '\n';
426 break;
427
428 case 'r':
429 *p++ = '\r';
430 break;
431
432 case 'b':
433 *p++ = '\b';
434 break;
435
436 case 't':
437 *p++ = '\t';
438 break;
439
440 case 'f':
441 *p++ = '\f';
442 break;
443
444 case 'v':
445 *p++ = '\v';
446 break;
447
448 /* \ and up to 3 octal digits */
449 case '0':
450 case '1':
451 case '2':
452 case '3':
453 case '4':
454 case '5':
455 case '6':
456 case '7':
457 val = c - '0';
458 c = *s++; /* try for 2 */
459 if(c >= '0' && c <= '7') {
460 val = (val<<3) | (c - '0');
461 c = *s++; /* try for 3 */
462 if(c >= '0' && c <= '7')
463 val = (val<<3) | (c-'0');
464 else
465 --s;
466 }
467 else
468 --s;
469 *p++ = (char)val;
470 break;
471
472 /* \x and up to 2 hex digits */
473 case 'x':
474 val = 'x'; /* Default if no digits */
475 c = hextoint(*s++); /* Get next char */
476 if (c >= 0) {
477 val = c;
478 c = hextoint(*s++);
479 if (c >= 0)
480 val = (val << 4) + c;
481 else
482 --s;
483 } else
484 --s;
485 *p++ = (char)val;
486 break;
487 }
488 } else
489 *p++ = (char)c;
490 }
491 out:
492 *p = '\0';
493 *slen = p - origp;
494 return s;
495 }
496
497
498 /* Single hex char to int; -1 if not a hex char. */
499 static int
hextoint(c)500 hextoint(c)
501 int c;
502 {
503 if (!isascii((unsigned char) c)) return -1;
504 if (isdigit((unsigned char) c)) return c - '0';
505 if ((c>='a')&&(c<='f')) return c + 10 - 'a';
506 if ((c>='A')&&(c<='F')) return c + 10 - 'A';
507 return -1;
508 }
509
510
511 /*
512 * Print a string containing C character escapes.
513 */
514 void
showstr(fp,s,len)515 showstr(fp, s, len)
516 FILE *fp;
517 const char *s;
518 int len;
519 {
520 register char c;
521
522 for (;;) {
523 c = *s++;
524 if (len == -1) {
525 if (c == '\0')
526 break;
527 }
528 else {
529 if (len-- == 0)
530 break;
531 }
532 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
533 (void) fputc(c, fp);
534 else {
535 (void) fputc('\\', fp);
536 switch (c) {
537
538 case '\n':
539 (void) fputc('n', fp);
540 break;
541
542 case '\r':
543 (void) fputc('r', fp);
544 break;
545
546 case '\b':
547 (void) fputc('b', fp);
548 break;
549
550 case '\t':
551 (void) fputc('t', fp);
552 break;
553
554 case '\f':
555 (void) fputc('f', fp);
556 break;
557
558 case '\v':
559 (void) fputc('v', fp);
560 break;
561
562 default:
563 (void) fprintf(fp, "%.3o", c & 0377);
564 break;
565 }
566 }
567 }
568 }
569
570 /*
571 * eatsize(): Eat the size spec from a number [eg. 10UL]
572 */
573 static void
eatsize(p)574 eatsize(p)
575 char **p;
576 {
577 char *l = *p;
578
579 if (LOWCASE(*l) == 'u')
580 l++;
581
582 switch (LOWCASE(*l)) {
583 case 'l': /* long */
584 case 's': /* short */
585 case 'h': /* short */
586 case 'b': /* char/byte */
587 case 'c': /* char/byte */
588 l++;
589 /*FALLTHROUGH*/
590 default:
591 break;
592 }
593
594 *p = l;
595 }
596