1 /* 2 ** find file types by using a modified "magic" file 3 ** 4 ** based on file v3.22 by Ian F. Darwin (see below) 5 ** 6 ** Modified for mkhybrid James Pearson 19/5/98 7 */ 8 9 /* 10 * apprentice - make one pass through /etc/magic, learning its secrets. 11 * 12 * Copyright (c) Ian F. Darwin, 1987. 13 * Written by Ian F. Darwin. 14 * 15 * This software is not subject to any license of the American Telephone 16 * and Telegraph Company or of the Regents of the University of California. 17 * 18 * Permission is granted to anyone to use this software for any purpose on 19 * any computer system, and to alter it and redistribute it freely, subject 20 * to the following restrictions: 21 * 22 * 1. The author is not responsible for the consequences of use of this 23 * software, no matter how awful, even if they arise from flaws in it. 24 * 25 * 2. The origin of this software must not be misrepresented, either by 26 * explicit claim or by omission. Since few users ever read sources, 27 * credits must appear in the documentation. 28 * 29 * 3. Altered versions must be plainly marked as such, and must not be 30 * misrepresented as being the original software. Since few users 31 * ever read sources, credits must appear in the documentation. 32 * 33 * 4. This notice may not be removed or altered. 34 */ 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <ctype.h> 40 #include <errno.h> 41 #include "file.h" 42 43 #ifndef lint 44 static char *moduleid = 45 "@(#)$Id: apprentice.c,v 1.1 2000/10/10 20:40:36 beck Exp $"; 46 #endif /* lint */ 47 48 #define EATAB {while (isascii((unsigned char) *l) && \ 49 isspace((unsigned char) *l)) ++l;} 50 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 51 tolower((unsigned char) (l)) : (l)) 52 53 54 static int getvalue __P((struct magic *, char **)); 55 static int hextoint __P((int)); 56 static char *getstr __P((char *, char *, int, int *)); 57 static int parse __P((char *, int *, int)); 58 static void eatsize __P((char **)); 59 60 static int maxmagic = 0; 61 62 static int apprentice_1 __P((char *, int)); 63 64 /* 65 * init_magic - read magic file and set up mapping 66 * based on the original apprentice() 67 */ 68 int 69 init_magic(fn) 70 char *fn; /* list of magic files */ 71 { 72 maxmagic = MAXMAGIS; 73 magic = (struct magic *) calloc(sizeof(struct magic), maxmagic); 74 if (magic == NULL) 75 return -1; 76 77 return(apprentice_1(fn, 0)); 78 } 79 80 static int 81 apprentice_1(fn, check) 82 char *fn; /* name of magic file */ 83 int check; /* non-zero? checking-only run. */ 84 { 85 static const char hdr[] = 86 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 87 FILE *f; 88 char line[BUFSIZ+1]; 89 int errs = 0; 90 91 f = fopen(fn, "r"); 92 if (f==NULL) { 93 return -1; 94 } 95 96 /* parse it */ 97 if (check) /* print silly verbose header for USG compat. */ 98 (void) printf("%s\n", hdr); 99 100 for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) { 101 if (line[0]=='#') /* comment, do not parse */ 102 continue; 103 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */ 104 continue; 105 line[strlen(line)-1] = '\0'; /* delete newline */ 106 if (parse(line, &nmagic, check) != 0) 107 errs = 1; 108 } 109 110 (void) fclose(f); 111 return errs; 112 } 113 114 /* 115 * extend the sign bit if the comparison is to be signed 116 */ 117 uint32 118 signextend(m, v) 119 struct magic *m; 120 uint32 v; 121 { 122 if (!(m->flag & UNSIGNED)) 123 switch(m->type) { 124 /* 125 * Do not remove the casts below. They are 126 * vital. When later compared with the data, 127 * the sign extension must have happened. 128 */ 129 case BYTE: 130 v = (char) v; 131 break; 132 case SHORT: 133 case BESHORT: 134 case LESHORT: 135 v = (short) v; 136 break; 137 case DATE: 138 case BEDATE: 139 case LEDATE: 140 case LONG: 141 case BELONG: 142 case LELONG: 143 v = (int32) v; 144 break; 145 case STRING: 146 break; 147 default: 148 return -1; 149 } 150 return v; 151 } 152 153 /* 154 * parse one line from magic file, put into magic[index++] if valid 155 */ 156 static int 157 parse(l, ndx, check) 158 char *l; 159 int *ndx, check; 160 { 161 int i = 0, nd = *ndx; 162 struct magic *m; 163 char *t, *s; 164 165 #define ALLOC_INCR 20 166 if (nd+1 >= maxmagic){ 167 maxmagic += ALLOC_INCR; 168 if ((magic = (struct magic *) realloc(magic, 169 sizeof(struct magic) * 170 maxmagic)) == NULL) { 171 (void) fprintf(stderr, "%s: Out of memory.\n", progname); 172 if (check) 173 return -1; 174 else 175 exit(1); 176 } 177 memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR); 178 } 179 m = &magic[*ndx]; 180 m->flag = 0; 181 m->cont_level = 0; 182 183 while (*l == '>') { 184 ++l; /* step over */ 185 m->cont_level++; 186 } 187 188 if (m->cont_level != 0 && *l == '(') { 189 ++l; /* step over */ 190 m->flag |= INDIR; 191 } 192 if (m->cont_level != 0 && *l == '&') { 193 ++l; /* step over */ 194 m->flag |= ADD; 195 } 196 197 /* get offset, then skip over it */ 198 m->offset = (int) strtoul(l,&t,0); 199 /* 200 if (l == t) 201 magwarn("offset %s invalid", l); 202 */ 203 l = t; 204 205 if (m->flag & INDIR) { 206 m->in.type = LONG; 207 m->in.offset = 0; 208 /* 209 * read [.lbs][+-]nnnnn) 210 */ 211 if (*l == '.') { 212 l++; 213 switch (LOWCASE(*l)) { 214 case 'l': 215 m->in.type = LONG; 216 break; 217 case 'h': 218 case 's': 219 m->in.type = SHORT; 220 break; 221 case 'c': 222 case 'b': 223 m->in.type = BYTE; 224 break; 225 default: 226 break; 227 } 228 l++; 229 } 230 s = l; 231 if (*l == '+' || *l == '-') l++; 232 if (isdigit((unsigned char)*l)) { 233 m->in.offset = strtoul(l, &t, 0); 234 if (*s == '-') m->in.offset = - m->in.offset; 235 } 236 else 237 t = l; 238 /* 239 if (*t++ != ')') 240 magwarn("missing ')' in indirect offset"); 241 */ 242 l = t; 243 } 244 245 246 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l)) 247 ++l; 248 EATAB; 249 250 #define NBYTE 4 251 #define NSHORT 5 252 #define NLONG 4 253 #define NSTRING 6 254 #define NDATE 4 255 #define NBESHORT 7 256 #define NBELONG 6 257 #define NBEDATE 6 258 #define NLESHORT 7 259 #define NLELONG 6 260 #define NLEDATE 6 261 262 if (*l == 'u') { 263 ++l; 264 m->flag |= UNSIGNED; 265 } 266 267 /* get type, skip it */ 268 if (strncmp(l, "byte", NBYTE)==0) { 269 m->type = BYTE; 270 l += NBYTE; 271 } else if (strncmp(l, "short", NSHORT)==0) { 272 m->type = SHORT; 273 l += NSHORT; 274 } else if (strncmp(l, "long", NLONG)==0) { 275 m->type = LONG; 276 l += NLONG; 277 } else if (strncmp(l, "string", NSTRING)==0) { 278 m->type = STRING; 279 l += NSTRING; 280 } else if (strncmp(l, "date", NDATE)==0) { 281 m->type = DATE; 282 l += NDATE; 283 } else if (strncmp(l, "beshort", NBESHORT)==0) { 284 m->type = BESHORT; 285 l += NBESHORT; 286 } else if (strncmp(l, "belong", NBELONG)==0) { 287 m->type = BELONG; 288 l += NBELONG; 289 } else if (strncmp(l, "bedate", NBEDATE)==0) { 290 m->type = BEDATE; 291 l += NBEDATE; 292 } else if (strncmp(l, "leshort", NLESHORT)==0) { 293 m->type = LESHORT; 294 l += NLESHORT; 295 } else if (strncmp(l, "lelong", NLELONG)==0) { 296 m->type = LELONG; 297 l += NLELONG; 298 } else if (strncmp(l, "ledate", NLEDATE)==0) { 299 m->type = LEDATE; 300 l += NLEDATE; 301 } else { 302 return -1; 303 } 304 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 305 if (*l == '&') { 306 ++l; 307 m->mask = signextend(m, strtoul(l, &l, 0)); 308 eatsize(&l); 309 } else 310 m->mask = ~0L; 311 EATAB; 312 313 switch (*l) { 314 case '>': 315 case '<': 316 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 317 case '&': 318 case '^': 319 case '=': 320 m->reln = *l; 321 ++l; 322 break; 323 case '!': 324 if (m->type != STRING) { 325 m->reln = *l; 326 ++l; 327 break; 328 } 329 /* FALL THROUGH */ 330 default: 331 if (*l == 'x' && isascii((unsigned char)l[1]) && 332 isspace((unsigned char)l[1])) { 333 m->reln = *l; 334 ++l; 335 goto GetDesc; /* Bill The Cat */ 336 } 337 m->reln = '='; 338 break; 339 } 340 EATAB; 341 342 if (getvalue(m, &l)) 343 return -1; 344 /* 345 * TODO finish this macro and start using it! 346 * #define offsetcheck {if (offset > HOWMANY-1) 347 * magwarn("offset too big"); } 348 */ 349 350 /* 351 * now get last part - the description 352 */ 353 GetDesc: 354 EATAB; 355 if (l[0] == '\b') { 356 ++l; 357 m->nospflag = 1; 358 } else if ((l[0] == '\\') && (l[1] == 'b')) { 359 ++l; 360 ++l; 361 m->nospflag = 1; 362 } else 363 m->nospflag = 0; 364 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC) 365 /* NULLBODY */; 366 367 ++(*ndx); /* make room for next */ 368 return 0; 369 } 370 371 /* 372 * Read a numeric value from a pointer, into the value union of a magic 373 * pointer, according to the magic type. Update the string pointer to point 374 * just after the number read. Return 0 for success, non-zero for failure. 375 */ 376 static int 377 getvalue(m, p) 378 struct magic *m; 379 char **p; 380 { 381 int slen; 382 383 if (m->type == STRING) { 384 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); 385 m->vallen = slen; 386 } else 387 if (m->reln != 'x') { 388 m->value.l = signextend(m, strtoul(*p, p, 0)); 389 eatsize(p); 390 } 391 return 0; 392 } 393 394 /* 395 * Convert a string containing C character escapes. Stop at an unescaped 396 * space or tab. 397 * Copy the converted version to "p", returning its length in *slen. 398 * Return updated scan pointer as function result. 399 */ 400 static char * 401 getstr(s, p, plen, slen) 402 register char *s; 403 register char *p; 404 int plen, *slen; 405 { 406 char *origs = s, *origp = p; 407 char *pmax = p + plen - 1; 408 register int c; 409 register int val; 410 411 while ((c = *s++) != '\0') { 412 if (isspace((unsigned char) c)) 413 break; 414 if (p >= pmax) { 415 fprintf(stderr, "String too long: %s\n", origs); 416 break; 417 } 418 if(c == '\\') { 419 switch(c = *s++) { 420 421 case '\0': 422 goto out; 423 424 default: 425 *p++ = (char) c; 426 break; 427 428 case 'n': 429 *p++ = '\n'; 430 break; 431 432 case 'r': 433 *p++ = '\r'; 434 break; 435 436 case 'b': 437 *p++ = '\b'; 438 break; 439 440 case 't': 441 *p++ = '\t'; 442 break; 443 444 case 'f': 445 *p++ = '\f'; 446 break; 447 448 case 'v': 449 *p++ = '\v'; 450 break; 451 452 /* \ and up to 3 octal digits */ 453 case '0': 454 case '1': 455 case '2': 456 case '3': 457 case '4': 458 case '5': 459 case '6': 460 case '7': 461 val = c - '0'; 462 c = *s++; /* try for 2 */ 463 if(c >= '0' && c <= '7') { 464 val = (val<<3) | (c - '0'); 465 c = *s++; /* try for 3 */ 466 if(c >= '0' && c <= '7') 467 val = (val<<3) | (c-'0'); 468 else 469 --s; 470 } 471 else 472 --s; 473 *p++ = (char)val; 474 break; 475 476 /* \x and up to 2 hex digits */ 477 case 'x': 478 val = 'x'; /* Default if no digits */ 479 c = hextoint(*s++); /* Get next char */ 480 if (c >= 0) { 481 val = c; 482 c = hextoint(*s++); 483 if (c >= 0) 484 val = (val << 4) + c; 485 else 486 --s; 487 } else 488 --s; 489 *p++ = (char)val; 490 break; 491 } 492 } else 493 *p++ = (char)c; 494 } 495 out: 496 *p = '\0'; 497 *slen = p - origp; 498 return s; 499 } 500 501 502 /* Single hex char to int; -1 if not a hex char. */ 503 static int 504 hextoint(c) 505 int c; 506 { 507 if (!isascii((unsigned char) c)) return -1; 508 if (isdigit((unsigned char) c)) return c - '0'; 509 if ((c>='a')&&(c<='f')) return c + 10 - 'a'; 510 if ((c>='A')&&(c<='F')) return c + 10 - 'A'; 511 return -1; 512 } 513 514 515 /* 516 * Print a string containing C character escapes. 517 */ 518 void 519 showstr(fp, s, len) 520 FILE *fp; 521 const char *s; 522 int len; 523 { 524 register char c; 525 526 for (;;) { 527 c = *s++; 528 if (len == -1) { 529 if (c == '\0') 530 break; 531 } 532 else { 533 if (len-- == 0) 534 break; 535 } 536 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 537 (void) fputc(c, fp); 538 else { 539 (void) fputc('\\', fp); 540 switch (c) { 541 542 case '\n': 543 (void) fputc('n', fp); 544 break; 545 546 case '\r': 547 (void) fputc('r', fp); 548 break; 549 550 case '\b': 551 (void) fputc('b', fp); 552 break; 553 554 case '\t': 555 (void) fputc('t', fp); 556 break; 557 558 case '\f': 559 (void) fputc('f', fp); 560 break; 561 562 case '\v': 563 (void) fputc('v', fp); 564 break; 565 566 default: 567 (void) fprintf(fp, "%.3o", c & 0377); 568 break; 569 } 570 } 571 } 572 } 573 574 /* 575 * eatsize(): Eat the size spec from a number [eg. 10UL] 576 */ 577 static void 578 eatsize(p) 579 char **p; 580 { 581 char *l = *p; 582 583 if (LOWCASE(*l) == 'u') 584 l++; 585 586 switch (LOWCASE(*l)) { 587 case 'l': /* long */ 588 case 's': /* short */ 589 case 'h': /* short */ 590 case 'b': /* char/byte */ 591 case 'c': /* char/byte */ 592 l++; 593 /*FALLTHROUGH*/ 594 default: 595 break; 596 } 597 598 *p = l; 599 } 600