1 /* 2 ** find file types by using a modified "magic" file 3 ** 4 ** based on file v3.22 by Ian F. Darwin (see below) 5 ** 6 ** Modified for mkhybrid James Pearson 19/5/98 7 */ 8 9 /* 10 * apprentice - make one pass through /etc/magic, learning its secrets. 11 * 12 * Copyright (c) Ian F. Darwin, 1987. 13 * Written by Ian F. Darwin. 14 * 15 * This software is not subject to any license of the American Telephone 16 * and Telegraph Company or of the Regents of the University of California. 17 * 18 * Permission is granted to anyone to use this software for any purpose on 19 * any computer system, and to alter it and redistribute it freely, subject 20 * to the following restrictions: 21 * 22 * 1. The author is not responsible for the consequences of use of this 23 * software, no matter how awful, even if they arise from flaws in it. 24 * 25 * 2. The origin of this software must not be misrepresented, either by 26 * explicit claim or by omission. Since few users ever read sources, 27 * credits must appear in the documentation. 28 * 29 * 3. Altered versions must be plainly marked as such, and must not be 30 * misrepresented as being the original software. Since few users 31 * ever read sources, credits must appear in the documentation. 32 * 33 * 4. This notice may not be removed or altered. 34 */ 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <ctype.h> 40 #include <errno.h> 41 #include "file.h" 42 43 #define EATAB {while (isascii((unsigned char) *l) && \ 44 isspace((unsigned char) *l)) ++l;} 45 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 46 tolower((unsigned char) (l)) : (l)) 47 48 49 static int getvalue __P((struct magic *, char **)); 50 static int hextoint __P((int)); 51 static char *getstr __P((char *, char *, int, int *)); 52 static int parse __P((char *, int *, int)); 53 static void eatsize __P((char **)); 54 55 static int maxmagic = 0; 56 57 static int apprentice_1 __P((char *, int)); 58 59 /* 60 * init_magic - read magic file and set up mapping 61 * based on the original apprentice() 62 */ 63 int 64 init_magic(fn) 65 char *fn; /* list of magic files */ 66 { 67 maxmagic = MAXMAGIS; 68 magic = (struct magic *) calloc(sizeof(struct magic), maxmagic); 69 if (magic == NULL) 70 return -1; 71 72 return(apprentice_1(fn, 0)); 73 } 74 75 static int 76 apprentice_1(fn, check) 77 char *fn; /* name of magic file */ 78 int check; /* non-zero? checking-only run. */ 79 { 80 static const char hdr[] = 81 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 82 FILE *f; 83 char line[BUFSIZ+1]; 84 int errs = 0; 85 86 f = fopen(fn, "r"); 87 if (f==NULL) { 88 return -1; 89 } 90 91 /* parse it */ 92 if (check) /* print silly verbose header for USG compat. */ 93 (void) printf("%s\n", hdr); 94 95 for (lineno = 1;fgets(line, sizeof(line), f) != NULL; lineno++) { 96 if (line[0]=='#') /* comment, do not parse */ 97 continue; 98 /* delete newline */ 99 line[strcspn(line, "\n")] = '\0'; 100 if (line[0] == '\0') 101 continue; 102 if (parse(line, &nmagic, check) != 0) 103 errs = 1; 104 } 105 106 (void) fclose(f); 107 return errs; 108 } 109 110 /* 111 * extend the sign bit if the comparison is to be signed 112 */ 113 uint32 114 signextend(m, v) 115 struct magic *m; 116 uint32 v; 117 { 118 if (!(m->flag & UNSIGNED)) 119 switch(m->type) { 120 /* 121 * Do not remove the casts below. They are 122 * vital. When later compared with the data, 123 * the sign extension must have happened. 124 */ 125 case BYTE: 126 v = (char) v; 127 break; 128 case SHORT: 129 case BESHORT: 130 case LESHORT: 131 v = (short) v; 132 break; 133 case DATE: 134 case BEDATE: 135 case LEDATE: 136 case LONG: 137 case BELONG: 138 case LELONG: 139 v = (int32) v; 140 break; 141 case STRING: 142 break; 143 default: 144 return -1; 145 } 146 return v; 147 } 148 149 /* 150 * parse one line from magic file, put into magic[index++] if valid 151 */ 152 static int 153 parse(l, ndx, check) 154 char *l; 155 int *ndx, check; 156 { 157 int i = 0, nd = *ndx; 158 struct magic *m; 159 char *t, *s; 160 161 #define ALLOC_INCR 20 162 if (nd+1 >= maxmagic){ 163 maxmagic += ALLOC_INCR; 164 if ((magic = (struct magic *) realloc(magic, 165 sizeof(struct magic) * 166 maxmagic)) == NULL) { 167 (void) fprintf(stderr, "%s: Out of memory.\n", progname); 168 if (check) 169 return -1; 170 else 171 exit(1); 172 } 173 memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR); 174 } 175 m = &magic[*ndx]; 176 m->flag = 0; 177 m->cont_level = 0; 178 179 while (*l == '>') { 180 ++l; /* step over */ 181 m->cont_level++; 182 } 183 184 if (m->cont_level != 0 && *l == '(') { 185 ++l; /* step over */ 186 m->flag |= INDIR; 187 } 188 if (m->cont_level != 0 && *l == '&') { 189 ++l; /* step over */ 190 m->flag |= ADD; 191 } 192 193 /* get offset, then skip over it */ 194 m->offset = (int) strtoul(l,&t,0); 195 /* 196 if (l == t) 197 magwarn("offset %s invalid", l); 198 */ 199 l = t; 200 201 if (m->flag & INDIR) { 202 m->in.type = LONG; 203 m->in.offset = 0; 204 /* 205 * read [.lbs][+-]nnnnn) 206 */ 207 if (*l == '.') { 208 l++; 209 switch (LOWCASE(*l)) { 210 case 'l': 211 m->in.type = LONG; 212 break; 213 case 'h': 214 case 's': 215 m->in.type = SHORT; 216 break; 217 case 'c': 218 case 'b': 219 m->in.type = BYTE; 220 break; 221 default: 222 break; 223 } 224 l++; 225 } 226 s = l; 227 if (*l == '+' || *l == '-') l++; 228 if (isdigit((unsigned char)*l)) { 229 m->in.offset = strtoul(l, &t, 0); 230 if (*s == '-') m->in.offset = - m->in.offset; 231 } 232 else 233 t = l; 234 /* 235 if (*t++ != ')') 236 magwarn("missing ')' in indirect offset"); 237 */ 238 l = t; 239 } 240 241 242 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l)) 243 ++l; 244 EATAB; 245 246 #define NBYTE 4 247 #define NSHORT 5 248 #define NLONG 4 249 #define NSTRING 6 250 #define NDATE 4 251 #define NBESHORT 7 252 #define NBELONG 6 253 #define NBEDATE 6 254 #define NLESHORT 7 255 #define NLELONG 6 256 #define NLEDATE 6 257 258 if (*l == 'u') { 259 ++l; 260 m->flag |= UNSIGNED; 261 } 262 263 /* get type, skip it */ 264 if (strncmp(l, "byte", NBYTE)==0) { 265 m->type = BYTE; 266 l += NBYTE; 267 } else if (strncmp(l, "short", NSHORT)==0) { 268 m->type = SHORT; 269 l += NSHORT; 270 } else if (strncmp(l, "long", NLONG)==0) { 271 m->type = LONG; 272 l += NLONG; 273 } else if (strncmp(l, "string", NSTRING)==0) { 274 m->type = STRING; 275 l += NSTRING; 276 } else if (strncmp(l, "date", NDATE)==0) { 277 m->type = DATE; 278 l += NDATE; 279 } else if (strncmp(l, "beshort", NBESHORT)==0) { 280 m->type = BESHORT; 281 l += NBESHORT; 282 } else if (strncmp(l, "belong", NBELONG)==0) { 283 m->type = BELONG; 284 l += NBELONG; 285 } else if (strncmp(l, "bedate", NBEDATE)==0) { 286 m->type = BEDATE; 287 l += NBEDATE; 288 } else if (strncmp(l, "leshort", NLESHORT)==0) { 289 m->type = LESHORT; 290 l += NLESHORT; 291 } else if (strncmp(l, "lelong", NLELONG)==0) { 292 m->type = LELONG; 293 l += NLELONG; 294 } else if (strncmp(l, "ledate", NLEDATE)==0) { 295 m->type = LEDATE; 296 l += NLEDATE; 297 } else { 298 return -1; 299 } 300 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 301 if (*l == '&') { 302 ++l; 303 m->mask = signextend(m, strtoul(l, &l, 0)); 304 eatsize(&l); 305 } else 306 m->mask = ~0L; 307 EATAB; 308 309 switch (*l) { 310 case '>': 311 case '<': 312 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 313 case '&': 314 case '^': 315 case '=': 316 m->reln = *l; 317 ++l; 318 break; 319 case '!': 320 if (m->type != STRING) { 321 m->reln = *l; 322 ++l; 323 break; 324 } 325 /* FALL THROUGH */ 326 default: 327 if (*l == 'x' && isascii((unsigned char)l[1]) && 328 isspace((unsigned char)l[1])) { 329 m->reln = *l; 330 ++l; 331 goto GetDesc; /* Bill The Cat */ 332 } 333 m->reln = '='; 334 break; 335 } 336 EATAB; 337 338 if (getvalue(m, &l)) 339 return -1; 340 /* 341 * TODO finish this macro and start using it! 342 * #define offsetcheck {if (offset > HOWMANY-1) 343 * magwarn("offset too big"); } 344 */ 345 346 /* 347 * now get last part - the description 348 */ 349 GetDesc: 350 EATAB; 351 if (l[0] == '\b') { 352 ++l; 353 m->nospflag = 1; 354 } else if ((l[0] == '\\') && (l[1] == 'b')) { 355 ++l; 356 ++l; 357 m->nospflag = 1; 358 } else 359 m->nospflag = 0; 360 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC) 361 /* NULLBODY */; 362 363 ++(*ndx); /* make room for next */ 364 return 0; 365 } 366 367 /* 368 * Read a numeric value from a pointer, into the value union of a magic 369 * pointer, according to the magic type. Update the string pointer to point 370 * just after the number read. Return 0 for success, non-zero for failure. 371 */ 372 static int 373 getvalue(m, p) 374 struct magic *m; 375 char **p; 376 { 377 int slen; 378 379 if (m->type == STRING) { 380 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); 381 m->vallen = slen; 382 } else 383 if (m->reln != 'x') { 384 m->value.l = signextend(m, strtoul(*p, p, 0)); 385 eatsize(p); 386 } 387 return 0; 388 } 389 390 /* 391 * Convert a string containing C character escapes. Stop at an unescaped 392 * space or tab. 393 * Copy the converted version to "p", returning its length in *slen. 394 * Return updated scan pointer as function result. 395 */ 396 static char * 397 getstr(s, p, plen, slen) 398 register char *s; 399 register char *p; 400 int plen, *slen; 401 { 402 char *origs = s, *origp = p; 403 char *pmax = p + plen - 1; 404 register int c; 405 register int val; 406 407 while ((c = *s++) != '\0') { 408 if (isspace((unsigned char) c)) 409 break; 410 if (p >= pmax) { 411 fprintf(stderr, "String too long: %s\n", origs); 412 break; 413 } 414 if(c == '\\') { 415 switch(c = *s++) { 416 417 case '\0': 418 goto out; 419 420 default: 421 *p++ = (char) c; 422 break; 423 424 case 'n': 425 *p++ = '\n'; 426 break; 427 428 case 'r': 429 *p++ = '\r'; 430 break; 431 432 case 'b': 433 *p++ = '\b'; 434 break; 435 436 case 't': 437 *p++ = '\t'; 438 break; 439 440 case 'f': 441 *p++ = '\f'; 442 break; 443 444 case 'v': 445 *p++ = '\v'; 446 break; 447 448 /* \ and up to 3 octal digits */ 449 case '0': 450 case '1': 451 case '2': 452 case '3': 453 case '4': 454 case '5': 455 case '6': 456 case '7': 457 val = c - '0'; 458 c = *s++; /* try for 2 */ 459 if(c >= '0' && c <= '7') { 460 val = (val<<3) | (c - '0'); 461 c = *s++; /* try for 3 */ 462 if(c >= '0' && c <= '7') 463 val = (val<<3) | (c-'0'); 464 else 465 --s; 466 } 467 else 468 --s; 469 *p++ = (char)val; 470 break; 471 472 /* \x and up to 2 hex digits */ 473 case 'x': 474 val = 'x'; /* Default if no digits */ 475 c = hextoint(*s++); /* Get next char */ 476 if (c >= 0) { 477 val = c; 478 c = hextoint(*s++); 479 if (c >= 0) 480 val = (val << 4) + c; 481 else 482 --s; 483 } else 484 --s; 485 *p++ = (char)val; 486 break; 487 } 488 } else 489 *p++ = (char)c; 490 } 491 out: 492 *p = '\0'; 493 *slen = p - origp; 494 return s; 495 } 496 497 498 /* Single hex char to int; -1 if not a hex char. */ 499 static int 500 hextoint(c) 501 int c; 502 { 503 if (!isascii((unsigned char) c)) return -1; 504 if (isdigit((unsigned char) c)) return c - '0'; 505 if ((c>='a')&&(c<='f')) return c + 10 - 'a'; 506 if ((c>='A')&&(c<='F')) return c + 10 - 'A'; 507 return -1; 508 } 509 510 511 /* 512 * Print a string containing C character escapes. 513 */ 514 void 515 showstr(fp, s, len) 516 FILE *fp; 517 const char *s; 518 int len; 519 { 520 register char c; 521 522 for (;;) { 523 c = *s++; 524 if (len == -1) { 525 if (c == '\0') 526 break; 527 } 528 else { 529 if (len-- == 0) 530 break; 531 } 532 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 533 (void) fputc(c, fp); 534 else { 535 (void) fputc('\\', fp); 536 switch (c) { 537 538 case '\n': 539 (void) fputc('n', fp); 540 break; 541 542 case '\r': 543 (void) fputc('r', fp); 544 break; 545 546 case '\b': 547 (void) fputc('b', fp); 548 break; 549 550 case '\t': 551 (void) fputc('t', fp); 552 break; 553 554 case '\f': 555 (void) fputc('f', fp); 556 break; 557 558 case '\v': 559 (void) fputc('v', fp); 560 break; 561 562 default: 563 (void) fprintf(fp, "%.3o", c & 0377); 564 break; 565 } 566 } 567 } 568 } 569 570 /* 571 * eatsize(): Eat the size spec from a number [eg. 10UL] 572 */ 573 static void 574 eatsize(p) 575 char **p; 576 { 577 char *l = *p; 578 579 if (LOWCASE(*l) == 'u') 580 l++; 581 582 switch (LOWCASE(*l)) { 583 case 'l': /* long */ 584 case 's': /* short */ 585 case 'h': /* short */ 586 case 'b': /* char/byte */ 587 case 'c': /* char/byte */ 588 l++; 589 /*FALLTHROUGH*/ 590 default: 591 break; 592 } 593 594 *p = l; 595 } 596