1 /* $OpenBSD: magic-load.c,v 1.26 2017/07/02 10:58:15 brynet Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER 15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <limits.h> 25 #include <regex.h> 26 #include <stdarg.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "magic.h" 32 #include "xmalloc.h" 33 34 static int 35 magic_odigit(u_char c) 36 { 37 if (c >= '0' && c <= '7') 38 return (c - '0'); 39 return (-1); 40 } 41 42 static int 43 magic_xdigit(u_char c) 44 { 45 if (c >= '0' && c <= '9') 46 return (c - '0'); 47 if (c >= 'a' && c <= 'f') 48 return (10 + c - 'a'); 49 if (c >= 'A' && c <= 'F') 50 return (10 + c - 'A'); 51 return (-1); 52 } 53 54 static void 55 magic_mark_text(struct magic_line *ml, int text) 56 { 57 do { 58 ml->text = text; 59 ml = ml->parent; 60 } while (ml != NULL); 61 } 62 63 static int 64 magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re, 65 const char *p) 66 { 67 int error; 68 char errbuf[256]; 69 70 error = regcomp(re, p, REG_EXTENDED|REG_NOSUB); 71 if (error != 0) { 72 regerror(error, re, errbuf, sizeof errbuf); 73 magic_warn(ml, "bad %s pattern: %s", name, errbuf); 74 return (-1); 75 } 76 return (0); 77 } 78 79 static int 80 magic_set_result(struct magic_line *ml, const char *s) 81 { 82 const char *fmt, *endfmt, *cp; 83 regex_t *re = NULL; 84 regmatch_t pmatch; 85 size_t fmtlen; 86 87 while (isspace((u_char)*s)) 88 s++; 89 if (*s == '\0') { 90 ml->result = NULL; 91 return (0); 92 } 93 ml->result = xstrdup(s); 94 95 fmt = NULL; 96 for (cp = s; *cp != '\0'; cp++) { 97 if (cp[0] == '%' && cp[1] != '%') { 98 if (fmt != NULL) { 99 magic_warn(ml, "multiple formats"); 100 return (-1); 101 } 102 fmt = cp; 103 } 104 } 105 if (fmt == NULL) 106 return (0); 107 fmt++; 108 109 for (endfmt = fmt; *endfmt != '\0'; endfmt++) { 110 if (strchr("diouxXeEfFgGsc", *endfmt) != NULL) 111 break; 112 } 113 if (*endfmt == '\0') { 114 magic_warn(ml, "unterminated format"); 115 return (-1); 116 } 117 fmtlen = endfmt + 1 - fmt; 118 if (fmtlen > 32) { 119 magic_warn(ml, "format too long"); 120 return (-1); 121 } 122 123 if (*endfmt == 's') { 124 switch (ml->type) { 125 case MAGIC_TYPE_DATE: 126 case MAGIC_TYPE_LDATE: 127 case MAGIC_TYPE_UDATE: 128 case MAGIC_TYPE_ULDATE: 129 case MAGIC_TYPE_BEDATE: 130 case MAGIC_TYPE_BELDATE: 131 case MAGIC_TYPE_UBEDATE: 132 case MAGIC_TYPE_UBELDATE: 133 case MAGIC_TYPE_QDATE: 134 case MAGIC_TYPE_QLDATE: 135 case MAGIC_TYPE_UQDATE: 136 case MAGIC_TYPE_UQLDATE: 137 case MAGIC_TYPE_BEQDATE: 138 case MAGIC_TYPE_BEQLDATE: 139 case MAGIC_TYPE_UBEQDATE: 140 case MAGIC_TYPE_UBEQLDATE: 141 case MAGIC_TYPE_LEQDATE: 142 case MAGIC_TYPE_LEQLDATE: 143 case MAGIC_TYPE_ULEQDATE: 144 case MAGIC_TYPE_ULEQLDATE: 145 case MAGIC_TYPE_LEDATE: 146 case MAGIC_TYPE_LELDATE: 147 case MAGIC_TYPE_ULEDATE: 148 case MAGIC_TYPE_ULELDATE: 149 case MAGIC_TYPE_MEDATE: 150 case MAGIC_TYPE_MELDATE: 151 case MAGIC_TYPE_STRING: 152 case MAGIC_TYPE_PSTRING: 153 case MAGIC_TYPE_BESTRING16: 154 case MAGIC_TYPE_LESTRING16: 155 case MAGIC_TYPE_REGEX: 156 case MAGIC_TYPE_SEARCH: 157 break; 158 default: 159 ml->stringify = 1; 160 break; 161 } 162 } 163 164 if (!ml->root->compiled) { 165 /* 166 * XXX %ld (and %lu and so on) is invalid on 64-bit platforms 167 * with byte, short, long. We get lucky because our first and 168 * only argument ends up in a register. Accept it for now. 169 */ 170 if (magic_make_pattern(ml, "short", &ml->root->format_short, 171 "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0) 172 return (-1); 173 if (magic_make_pattern(ml, "long", &ml->root->format_long, 174 "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0) 175 return (-1); 176 if (magic_make_pattern(ml, "quad", &ml->root->format_quad, 177 "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0) 178 return (-1); 179 if (magic_make_pattern(ml, "float", &ml->root->format_float, 180 "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0) 181 return (-1); 182 if (magic_make_pattern(ml, "string", &ml->root->format_string, 183 "^-?[0-9]*(\\.[0-9]*)?s$") != 0) 184 return (-1); 185 ml->root->compiled = 1; 186 } 187 188 if (ml->stringify) 189 re = &ml->root->format_string; 190 else { 191 switch (ml->type) { 192 case MAGIC_TYPE_NONE: 193 case MAGIC_TYPE_BESTRING16: 194 case MAGIC_TYPE_LESTRING16: 195 case MAGIC_TYPE_NAME: 196 case MAGIC_TYPE_USE: 197 return (0); /* don't use result */ 198 case MAGIC_TYPE_BYTE: 199 case MAGIC_TYPE_UBYTE: 200 case MAGIC_TYPE_SHORT: 201 case MAGIC_TYPE_USHORT: 202 case MAGIC_TYPE_BESHORT: 203 case MAGIC_TYPE_UBESHORT: 204 case MAGIC_TYPE_LESHORT: 205 case MAGIC_TYPE_ULESHORT: 206 re = &ml->root->format_short; 207 break; 208 case MAGIC_TYPE_LONG: 209 case MAGIC_TYPE_ULONG: 210 case MAGIC_TYPE_BELONG: 211 case MAGIC_TYPE_UBELONG: 212 case MAGIC_TYPE_LELONG: 213 case MAGIC_TYPE_ULELONG: 214 case MAGIC_TYPE_MELONG: 215 re = &ml->root->format_long; 216 break; 217 case MAGIC_TYPE_QUAD: 218 case MAGIC_TYPE_UQUAD: 219 case MAGIC_TYPE_BEQUAD: 220 case MAGIC_TYPE_UBEQUAD: 221 case MAGIC_TYPE_LEQUAD: 222 case MAGIC_TYPE_ULEQUAD: 223 re = &ml->root->format_quad; 224 break; 225 case MAGIC_TYPE_FLOAT: 226 case MAGIC_TYPE_BEFLOAT: 227 case MAGIC_TYPE_LEFLOAT: 228 case MAGIC_TYPE_DOUBLE: 229 case MAGIC_TYPE_BEDOUBLE: 230 case MAGIC_TYPE_LEDOUBLE: 231 re = &ml->root->format_float; 232 break; 233 case MAGIC_TYPE_DATE: 234 case MAGIC_TYPE_LDATE: 235 case MAGIC_TYPE_UDATE: 236 case MAGIC_TYPE_ULDATE: 237 case MAGIC_TYPE_BEDATE: 238 case MAGIC_TYPE_BELDATE: 239 case MAGIC_TYPE_UBEDATE: 240 case MAGIC_TYPE_UBELDATE: 241 case MAGIC_TYPE_QDATE: 242 case MAGIC_TYPE_QLDATE: 243 case MAGIC_TYPE_UQDATE: 244 case MAGIC_TYPE_UQLDATE: 245 case MAGIC_TYPE_BEQDATE: 246 case MAGIC_TYPE_BEQLDATE: 247 case MAGIC_TYPE_UBEQDATE: 248 case MAGIC_TYPE_UBEQLDATE: 249 case MAGIC_TYPE_LEQDATE: 250 case MAGIC_TYPE_LEQLDATE: 251 case MAGIC_TYPE_ULEQDATE: 252 case MAGIC_TYPE_ULEQLDATE: 253 case MAGIC_TYPE_LEDATE: 254 case MAGIC_TYPE_LELDATE: 255 case MAGIC_TYPE_ULEDATE: 256 case MAGIC_TYPE_ULELDATE: 257 case MAGIC_TYPE_MEDATE: 258 case MAGIC_TYPE_MELDATE: 259 case MAGIC_TYPE_STRING: 260 case MAGIC_TYPE_PSTRING: 261 case MAGIC_TYPE_REGEX: 262 case MAGIC_TYPE_SEARCH: 263 case MAGIC_TYPE_DEFAULT: 264 case MAGIC_TYPE_CLEAR: 265 re = &ml->root->format_string; 266 break; 267 } 268 } 269 270 pmatch.rm_so = 0; 271 pmatch.rm_eo = fmtlen; 272 if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) { 273 magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string, 274 (int)fmtlen, fmt); 275 return (-1); 276 } 277 278 return (0); 279 } 280 281 static u_int 282 magic_get_strength(struct magic_line *ml) 283 { 284 int n; 285 size_t size; 286 287 if (ml->type == MAGIC_TYPE_NONE) 288 return (0); 289 290 if (ml->test_not || ml->test_operator == 'x') { 291 n = 1; 292 goto skip; 293 } 294 295 n = 2 * MAGIC_STRENGTH_MULTIPLIER; 296 switch (ml->type) { 297 case MAGIC_TYPE_NONE: 298 case MAGIC_TYPE_DEFAULT: 299 return (0); 300 case MAGIC_TYPE_CLEAR: 301 case MAGIC_TYPE_NAME: 302 case MAGIC_TYPE_USE: 303 break; 304 case MAGIC_TYPE_BYTE: 305 case MAGIC_TYPE_UBYTE: 306 n += 1 * MAGIC_STRENGTH_MULTIPLIER; 307 break; 308 case MAGIC_TYPE_SHORT: 309 case MAGIC_TYPE_USHORT: 310 case MAGIC_TYPE_BESHORT: 311 case MAGIC_TYPE_UBESHORT: 312 case MAGIC_TYPE_LESHORT: 313 case MAGIC_TYPE_ULESHORT: 314 n += 2 * MAGIC_STRENGTH_MULTIPLIER; 315 break; 316 case MAGIC_TYPE_LONG: 317 case MAGIC_TYPE_ULONG: 318 case MAGIC_TYPE_FLOAT: 319 case MAGIC_TYPE_DATE: 320 case MAGIC_TYPE_LDATE: 321 case MAGIC_TYPE_UDATE: 322 case MAGIC_TYPE_ULDATE: 323 case MAGIC_TYPE_BELONG: 324 case MAGIC_TYPE_UBELONG: 325 case MAGIC_TYPE_BEFLOAT: 326 case MAGIC_TYPE_BEDATE: 327 case MAGIC_TYPE_BELDATE: 328 case MAGIC_TYPE_UBEDATE: 329 case MAGIC_TYPE_UBELDATE: 330 n += 4 * MAGIC_STRENGTH_MULTIPLIER; 331 break; 332 case MAGIC_TYPE_QUAD: 333 case MAGIC_TYPE_UQUAD: 334 case MAGIC_TYPE_DOUBLE: 335 case MAGIC_TYPE_QDATE: 336 case MAGIC_TYPE_QLDATE: 337 case MAGIC_TYPE_UQDATE: 338 case MAGIC_TYPE_UQLDATE: 339 case MAGIC_TYPE_BEQUAD: 340 case MAGIC_TYPE_UBEQUAD: 341 case MAGIC_TYPE_BEDOUBLE: 342 case MAGIC_TYPE_BEQDATE: 343 case MAGIC_TYPE_BEQLDATE: 344 case MAGIC_TYPE_UBEQDATE: 345 case MAGIC_TYPE_UBEQLDATE: 346 case MAGIC_TYPE_LEQUAD: 347 case MAGIC_TYPE_ULEQUAD: 348 case MAGIC_TYPE_LEDOUBLE: 349 case MAGIC_TYPE_LEQDATE: 350 case MAGIC_TYPE_LEQLDATE: 351 case MAGIC_TYPE_ULEQDATE: 352 case MAGIC_TYPE_ULEQLDATE: 353 case MAGIC_TYPE_LELONG: 354 case MAGIC_TYPE_ULELONG: 355 case MAGIC_TYPE_LEFLOAT: 356 case MAGIC_TYPE_LEDATE: 357 case MAGIC_TYPE_LELDATE: 358 case MAGIC_TYPE_ULEDATE: 359 case MAGIC_TYPE_ULELDATE: 360 case MAGIC_TYPE_MELONG: 361 case MAGIC_TYPE_MEDATE: 362 case MAGIC_TYPE_MELDATE: 363 n += 8 * MAGIC_STRENGTH_MULTIPLIER; 364 break; 365 case MAGIC_TYPE_STRING: 366 case MAGIC_TYPE_PSTRING: 367 n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER; 368 break; 369 case MAGIC_TYPE_BESTRING16: 370 case MAGIC_TYPE_LESTRING16: 371 n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2; 372 break; 373 case MAGIC_TYPE_REGEX: 374 case MAGIC_TYPE_SEARCH: 375 size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size; 376 if (size < 1) 377 size = 1; 378 n += ml->test_string_size * size; 379 break; 380 } 381 switch (ml->test_operator) { 382 case '=': 383 n += MAGIC_STRENGTH_MULTIPLIER; 384 break; 385 case '<': 386 case '>': 387 case '[': 388 case ']': 389 n -= 2 * MAGIC_STRENGTH_MULTIPLIER; 390 break; 391 case '^': 392 case '&': 393 n -= MAGIC_STRENGTH_MULTIPLIER; 394 break; 395 } 396 397 skip: 398 switch (ml->strength_operator) { 399 case '+': 400 n += ml->strength_value; 401 break; 402 case '-': 403 n -= ml->strength_value; 404 break; 405 case '*': 406 n *= ml->strength_value; 407 break; 408 case '/': 409 n /= ml->strength_value; 410 break; 411 } 412 return (n <= 0 ? 1 : n); 413 } 414 415 static int 416 magic_get_string(char **line, char *out, size_t *outlen) 417 { 418 char *start, *cp, c; 419 int d0, d1, d2; 420 421 start = out; 422 for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) { 423 if (*cp != '\\') { 424 *out++ = *cp; 425 continue; 426 } 427 428 switch (c = *++cp) { 429 case '\0': /* end of line */ 430 return (-1); 431 case ' ': 432 *out++ = ' '; 433 break; 434 case '0': 435 case '1': 436 case '2': 437 case '3': 438 case '4': 439 case '5': 440 case '6': 441 case '7': 442 d0 = magic_odigit(cp[0]); 443 if (cp[0] != '\0') 444 d1 = magic_odigit(cp[1]); 445 else 446 d1 = -1; 447 if (cp[0] != '\0' && cp[1] != '\0') 448 d2 = magic_odigit(cp[2]); 449 else 450 d2 = -1; 451 452 if (d0 != -1 && d1 != -1 && d2 != -1) { 453 *out = d2 | (d1 << 3) | (d0 << 6); 454 cp += 2; 455 } else if (d0 != -1 && d1 != -1) { 456 *out = d1 | (d0 << 3); 457 cp++; 458 } else if (d0 != -1) 459 *out = d0; 460 else 461 return (-1); 462 out++; 463 break; 464 case 'x': 465 d0 = magic_xdigit(cp[1]); 466 if (cp[1] != '\0') 467 d1 = magic_xdigit(cp[2]); 468 else 469 d1 = -1; 470 471 if (d0 != -1 && d1 != -1) { 472 *out = d1 | (d0 << 4); 473 cp += 2; 474 } else if (d0 != -1) { 475 *out = d0; 476 cp++; 477 } else 478 return (-1); 479 out++; 480 481 break; 482 case 'a': 483 *out++ = '\a'; 484 break; 485 case 'b': 486 *out++ = '\b'; 487 break; 488 case 't': 489 *out++ = '\t'; 490 break; 491 case 'f': 492 *out++ = '\f'; 493 break; 494 case 'n': 495 *out++ = '\n'; 496 break; 497 case 'r': 498 *out++ = '\r'; 499 break; 500 case '\\': 501 *out++ = '\\'; 502 break; 503 case '\'': 504 *out++ = '\''; 505 break; 506 case '\"': 507 *out++ = '\"'; 508 break; 509 default: 510 *out++ = c; 511 break; 512 } 513 } 514 *out = '\0'; 515 *outlen = out - start; 516 517 *line = cp; 518 return (0); 519 } 520 521 static int 522 magic_parse_offset(struct magic_line *ml, char **line) 523 { 524 char *copy, *s, *cp, *endptr; 525 526 while (isspace((u_char)**line)) 527 (*line)++; 528 copy = s = cp = xmalloc(strlen(*line) + 1); 529 while (**line != '\0' && !isspace((u_char)**line)) 530 *cp++ = *(*line)++; 531 *cp = '\0'; 532 533 ml->offset = 0; 534 ml->offset_relative = 0; 535 536 ml->indirect_type = ' '; 537 ml->indirect_relative = 0; 538 ml->indirect_offset = 0; 539 ml->indirect_operator = ' '; 540 ml->indirect_operand = 0; 541 542 if (*s == '&') { 543 ml->offset_relative = 1; 544 s++; 545 } 546 547 if (*s != '(') { 548 endptr = magic_strtoll(s, &ml->offset); 549 if (endptr == NULL || *endptr != '\0') { 550 magic_warn(ml, "missing closing bracket"); 551 goto fail; 552 } 553 if (ml->offset < 0 && !ml->offset_relative) { 554 magic_warn(ml, "negative absolute offset"); 555 goto fail; 556 } 557 goto done; 558 } 559 s++; 560 561 if (*s == '&') { 562 ml->indirect_relative = 1; 563 s++; 564 } 565 566 endptr = magic_strtoll(s, &ml->indirect_offset); 567 if (endptr == NULL) { 568 magic_warn(ml, "can't parse offset: %s", s); 569 goto fail; 570 } 571 s = endptr; 572 if (*s == ')') 573 goto done; 574 575 if (*s == '.') { 576 s++; 577 if (*s == '\0' || strchr("bslBSL", *s) == NULL) { 578 magic_warn(ml, "unknown offset type: %c", *s); 579 goto fail; 580 } 581 ml->indirect_type = *s; 582 s++; 583 if (*s == ')') 584 goto done; 585 } 586 587 if (*s == '\0' || strchr("+-*", *s) == NULL) { 588 magic_warn(ml, "unknown offset operator: %c", *s); 589 goto fail; 590 } 591 ml->indirect_operator = *s; 592 s++; 593 if (*s == ')') 594 goto done; 595 596 if (*s == '(') { 597 s++; 598 endptr = magic_strtoll(s, &ml->indirect_operand); 599 if (endptr == NULL || *endptr != ')') { 600 magic_warn(ml, "missing closing bracket"); 601 goto fail; 602 } 603 if (*++endptr != ')') { 604 magic_warn(ml, "missing closing bracket"); 605 goto fail; 606 } 607 } else { 608 endptr = magic_strtoll(s, &ml->indirect_operand); 609 if (endptr == NULL || *endptr != ')') { 610 magic_warn(ml, "missing closing bracket"); 611 goto fail; 612 } 613 } 614 615 done: 616 free(copy); 617 return (0); 618 619 fail: 620 free(copy); 621 return (-1); 622 } 623 624 static int 625 magic_parse_type(struct magic_line *ml, char **line) 626 { 627 char *copy, *s, *cp, *endptr; 628 629 while (isspace((u_char)**line)) 630 (*line)++; 631 copy = s = cp = xmalloc(strlen(*line) + 1); 632 while (**line != '\0' && !isspace((u_char)**line)) 633 *cp++ = *(*line)++; 634 *cp = '\0'; 635 636 ml->type = MAGIC_TYPE_NONE; 637 ml->type_operator = ' '; 638 ml->type_operand = 0; 639 640 if (strcmp(s, "name") == 0) { 641 ml->type = MAGIC_TYPE_NAME; 642 ml->type_string = xstrdup(s); 643 goto done; 644 } 645 if (strcmp(s, "use") == 0) { 646 ml->type = MAGIC_TYPE_USE; 647 ml->type_string = xstrdup(s); 648 goto done; 649 } 650 651 if (strncmp(s, "string", (sizeof "string") - 1) == 0 || 652 strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) { 653 if (*s == 'u') 654 ml->type_string = xstrdup(s + 1); 655 else 656 ml->type_string = xstrdup(s); 657 ml->type = MAGIC_TYPE_STRING; 658 magic_mark_text(ml, 0); 659 goto done; 660 } 661 if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 || 662 strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) { 663 if (*s == 'u') 664 ml->type_string = xstrdup(s + 1); 665 else 666 ml->type_string = xstrdup(s); 667 ml->type = MAGIC_TYPE_PSTRING; 668 magic_mark_text(ml, 0); 669 goto done; 670 } 671 if (strncmp(s, "search", (sizeof "search") - 1) == 0 || 672 strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) { 673 if (*s == 'u') 674 ml->type_string = xstrdup(s + 1); 675 else 676 ml->type_string = xstrdup(s); 677 ml->type = MAGIC_TYPE_SEARCH; 678 goto done; 679 } 680 if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 || 681 strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) { 682 if (*s == 'u') 683 ml->type_string = xstrdup(s + 1); 684 else 685 ml->type_string = xstrdup(s); 686 ml->type = MAGIC_TYPE_REGEX; 687 goto done; 688 } 689 ml->type_string = xstrdup(s); 690 691 cp = &s[strcspn(s, "+-&/%*")]; 692 if (*cp != '\0') { 693 ml->type_operator = *cp; 694 endptr = magic_strtoull(cp + 1, &ml->type_operand); 695 if (endptr == NULL || *endptr != '\0') { 696 magic_warn(ml, "can't parse operand: %s", cp + 1); 697 goto fail; 698 } 699 *cp = '\0'; 700 } 701 702 if (strcmp(s, "byte") == 0) 703 ml->type = MAGIC_TYPE_BYTE; 704 else if (strcmp(s, "short") == 0) 705 ml->type = MAGIC_TYPE_SHORT; 706 else if (strcmp(s, "long") == 0) 707 ml->type = MAGIC_TYPE_LONG; 708 else if (strcmp(s, "quad") == 0) 709 ml->type = MAGIC_TYPE_QUAD; 710 else if (strcmp(s, "ubyte") == 0) 711 ml->type = MAGIC_TYPE_UBYTE; 712 else if (strcmp(s, "ushort") == 0) 713 ml->type = MAGIC_TYPE_USHORT; 714 else if (strcmp(s, "ulong") == 0) 715 ml->type = MAGIC_TYPE_ULONG; 716 else if (strcmp(s, "uquad") == 0) 717 ml->type = MAGIC_TYPE_UQUAD; 718 else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0) 719 ml->type = MAGIC_TYPE_FLOAT; 720 else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0) 721 ml->type = MAGIC_TYPE_DOUBLE; 722 else if (strcmp(s, "date") == 0) 723 ml->type = MAGIC_TYPE_DATE; 724 else if (strcmp(s, "qdate") == 0) 725 ml->type = MAGIC_TYPE_QDATE; 726 else if (strcmp(s, "ldate") == 0) 727 ml->type = MAGIC_TYPE_LDATE; 728 else if (strcmp(s, "qldate") == 0) 729 ml->type = MAGIC_TYPE_QLDATE; 730 else if (strcmp(s, "udate") == 0) 731 ml->type = MAGIC_TYPE_UDATE; 732 else if (strcmp(s, "uqdate") == 0) 733 ml->type = MAGIC_TYPE_UQDATE; 734 else if (strcmp(s, "uldate") == 0) 735 ml->type = MAGIC_TYPE_ULDATE; 736 else if (strcmp(s, "uqldate") == 0) 737 ml->type = MAGIC_TYPE_UQLDATE; 738 else if (strcmp(s, "beshort") == 0) 739 ml->type = MAGIC_TYPE_BESHORT; 740 else if (strcmp(s, "belong") == 0) 741 ml->type = MAGIC_TYPE_BELONG; 742 else if (strcmp(s, "bequad") == 0) 743 ml->type = MAGIC_TYPE_BEQUAD; 744 else if (strcmp(s, "ubeshort") == 0) 745 ml->type = MAGIC_TYPE_UBESHORT; 746 else if (strcmp(s, "ubelong") == 0) 747 ml->type = MAGIC_TYPE_UBELONG; 748 else if (strcmp(s, "ubequad") == 0) 749 ml->type = MAGIC_TYPE_UBEQUAD; 750 else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0) 751 ml->type = MAGIC_TYPE_BEFLOAT; 752 else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0) 753 ml->type = MAGIC_TYPE_BEDOUBLE; 754 else if (strcmp(s, "bedate") == 0) 755 ml->type = MAGIC_TYPE_BEDATE; 756 else if (strcmp(s, "beqdate") == 0) 757 ml->type = MAGIC_TYPE_BEQDATE; 758 else if (strcmp(s, "beldate") == 0) 759 ml->type = MAGIC_TYPE_BELDATE; 760 else if (strcmp(s, "beqldate") == 0) 761 ml->type = MAGIC_TYPE_BEQLDATE; 762 else if (strcmp(s, "ubedate") == 0) 763 ml->type = MAGIC_TYPE_UBEDATE; 764 else if (strcmp(s, "ubeqdate") == 0) 765 ml->type = MAGIC_TYPE_UBEQDATE; 766 else if (strcmp(s, "ubeldate") == 0) 767 ml->type = MAGIC_TYPE_UBELDATE; 768 else if (strcmp(s, "ubeqldate") == 0) 769 ml->type = MAGIC_TYPE_UBEQLDATE; 770 else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0) 771 ml->type = MAGIC_TYPE_BESTRING16; 772 else if (strcmp(s, "leshort") == 0) 773 ml->type = MAGIC_TYPE_LESHORT; 774 else if (strcmp(s, "lelong") == 0) 775 ml->type = MAGIC_TYPE_LELONG; 776 else if (strcmp(s, "lequad") == 0) 777 ml->type = MAGIC_TYPE_LEQUAD; 778 else if (strcmp(s, "uleshort") == 0) 779 ml->type = MAGIC_TYPE_ULESHORT; 780 else if (strcmp(s, "ulelong") == 0) 781 ml->type = MAGIC_TYPE_ULELONG; 782 else if (strcmp(s, "ulequad") == 0) 783 ml->type = MAGIC_TYPE_ULEQUAD; 784 else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0) 785 ml->type = MAGIC_TYPE_LEFLOAT; 786 else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0) 787 ml->type = MAGIC_TYPE_LEDOUBLE; 788 else if (strcmp(s, "ledate") == 0) 789 ml->type = MAGIC_TYPE_LEDATE; 790 else if (strcmp(s, "leqdate") == 0) 791 ml->type = MAGIC_TYPE_LEQDATE; 792 else if (strcmp(s, "leldate") == 0) 793 ml->type = MAGIC_TYPE_LELDATE; 794 else if (strcmp(s, "leqldate") == 0) 795 ml->type = MAGIC_TYPE_LEQLDATE; 796 else if (strcmp(s, "uledate") == 0) 797 ml->type = MAGIC_TYPE_ULEDATE; 798 else if (strcmp(s, "uleqdate") == 0) 799 ml->type = MAGIC_TYPE_ULEQDATE; 800 else if (strcmp(s, "uleldate") == 0) 801 ml->type = MAGIC_TYPE_ULELDATE; 802 else if (strcmp(s, "uleqldate") == 0) 803 ml->type = MAGIC_TYPE_ULEQLDATE; 804 else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0) 805 ml->type = MAGIC_TYPE_LESTRING16; 806 else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0) 807 ml->type = MAGIC_TYPE_MELONG; 808 else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0) 809 ml->type = MAGIC_TYPE_MEDATE; 810 else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0) 811 ml->type = MAGIC_TYPE_MELDATE; 812 else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0) 813 ml->type = MAGIC_TYPE_DEFAULT; 814 else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0) 815 ml->type = MAGIC_TYPE_CLEAR; 816 else { 817 magic_warn(ml, "unknown type: %s", s); 818 goto fail; 819 } 820 magic_mark_text(ml, 0); 821 822 done: 823 free(copy); 824 return (0); 825 826 fail: 827 free(copy); 828 return (-1); 829 } 830 831 static int 832 magic_parse_value(struct magic_line *ml, char **line) 833 { 834 char *copy, *s, *cp, *endptr; 835 size_t slen; 836 uint64_t u; 837 838 while (isspace((u_char)**line)) 839 (*line)++; 840 841 ml->test_operator = '='; 842 ml->test_not = 0; 843 ml->test_string = NULL; 844 ml->test_string_size = 0; 845 ml->test_unsigned = 0; 846 ml->test_signed = 0; 847 848 if (**line == '\0') 849 return (0); 850 851 s = *line; 852 if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) { 853 (*line)++; 854 ml->test_operator = 'x'; 855 return (0); 856 } 857 858 if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) { 859 magic_warn(ml, "test specified for default or clear"); 860 ml->test_operator = 'x'; 861 return (0); 862 } 863 864 if (**line == '!') { 865 ml->test_not = 1; 866 (*line)++; 867 } 868 869 switch (ml->type) { 870 case MAGIC_TYPE_NAME: 871 case MAGIC_TYPE_USE: 872 copy = s = xmalloc(strlen(*line) + 1); 873 if (magic_get_string(line, s, &slen) != 0 || slen == 0) { 874 magic_warn(ml, "can't parse string"); 875 goto fail; 876 } 877 if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) { 878 magic_warn(ml, "invalid name"); 879 goto fail; 880 } 881 ml->name = s; 882 return (0); /* do not free */ 883 case MAGIC_TYPE_STRING: 884 case MAGIC_TYPE_PSTRING: 885 case MAGIC_TYPE_SEARCH: 886 if (**line == '>' || **line == '<' || **line == '=') { 887 ml->test_operator = **line; 888 (*line)++; 889 } 890 /* FALLTHROUGH */ 891 case MAGIC_TYPE_REGEX: 892 if (**line == '=') 893 (*line)++; 894 copy = s = xmalloc(strlen(*line) + 1); 895 if (magic_get_string(line, s, &slen) != 0) { 896 magic_warn(ml, "can't parse string"); 897 goto fail; 898 } 899 ml->test_string_size = slen; 900 ml->test_string = s; 901 return (0); /* do not free */ 902 default: 903 break; 904 } 905 906 while (isspace((u_char)**line)) 907 (*line)++; 908 if ((*line)[0] == '<' && (*line)[1] == '=') { 909 ml->test_operator = '['; 910 (*line) += 2; 911 } else if ((*line)[0] == '>' && (*line)[1] == '=') { 912 ml->test_operator = ']'; 913 (*line) += 2; 914 } else if (**line != '\0' && strchr("=<>&^", **line) != NULL) { 915 ml->test_operator = **line; 916 (*line)++; 917 } 918 919 while (isspace((u_char)**line)) 920 (*line)++; 921 copy = cp = xmalloc(strlen(*line) + 1); 922 while (**line != '\0' && !isspace((u_char)**line)) 923 *cp++ = *(*line)++; 924 *cp = '\0'; 925 926 switch (ml->type) { 927 case MAGIC_TYPE_FLOAT: 928 case MAGIC_TYPE_DOUBLE: 929 case MAGIC_TYPE_BEFLOAT: 930 case MAGIC_TYPE_BEDOUBLE: 931 case MAGIC_TYPE_LEFLOAT: 932 case MAGIC_TYPE_LEDOUBLE: 933 errno = 0; 934 ml->test_double = strtod(copy, &endptr); 935 if (errno == ERANGE) 936 endptr = NULL; 937 break; 938 default: 939 if (*ml->type_string == 'u') 940 endptr = magic_strtoull(copy, &ml->test_unsigned); 941 else { 942 endptr = magic_strtoll(copy, &ml->test_signed); 943 if (endptr == NULL || *endptr != '\0') { 944 /* 945 * If we can't parse this as a signed number, 946 * try as unsigned instead. 947 */ 948 endptr = magic_strtoull(copy, &u); 949 if (endptr != NULL && *endptr == '\0') 950 ml->test_signed = (int64_t)u; 951 } 952 } 953 break; 954 } 955 if (endptr == NULL || *endptr != '\0') { 956 magic_warn(ml, "can't parse number: %s", copy); 957 goto fail; 958 } 959 960 free(copy); 961 return (0); 962 963 fail: 964 free(copy); 965 return (-1); 966 } 967 968 int 969 magic_compare(struct magic_line *ml1, struct magic_line *ml2) 970 { 971 if (ml1->strength < ml2->strength) 972 return (1); 973 if (ml1->strength > ml2->strength) 974 return (-1); 975 976 /* 977 * The original file depends on the (undefined!) qsort(3) behaviour 978 * when the strength is equal. This is impossible to reproduce with an 979 * RB tree so just use the line number and hope for the best. 980 */ 981 if (ml1->line < ml2->line) 982 return (-1); 983 if (ml1->line > ml2->line) 984 return (1); 985 986 return (0); 987 } 988 RB_GENERATE(magic_tree, magic_line, node, magic_compare); 989 990 int 991 magic_named_compare(struct magic_line *ml1, struct magic_line *ml2) 992 { 993 return (strcmp(ml1->name, ml2->name)); 994 } 995 RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare); 996 997 static void 998 magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml, 999 char *line) 1000 { 1001 char *cp, *s; 1002 int64_t value; 1003 1004 cp = line + (sizeof "!:strength") - 1; 1005 while (isspace((u_char)*cp)) 1006 cp++; 1007 s = cp; 1008 1009 cp = strchr(s, '#'); 1010 if (cp != NULL) 1011 *cp = '\0'; 1012 cp = s; 1013 1014 if (*s == '\0' || strchr("+-*/", *s) == NULL) { 1015 magic_warnm(m, at, "invalid strength operator: %s", s); 1016 return; 1017 } 1018 ml->strength_operator = *cp++; 1019 1020 while (isspace((u_char)*cp)) 1021 cp++; 1022 cp = magic_strtoll(cp, &value); 1023 while (cp != NULL && isspace((u_char)*cp)) 1024 cp++; 1025 if (cp == NULL || *cp != '\0' || value < 0 || value > 255) { 1026 magic_warnm(m, at, "invalid strength value: %s", s); 1027 return; 1028 } 1029 ml->strength_value = value; 1030 } 1031 1032 static void 1033 magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line) 1034 { 1035 char *mimetype, *cp; 1036 1037 mimetype = line + (sizeof "!:mime") - 1; 1038 while (isspace((u_char)*mimetype)) 1039 mimetype++; 1040 1041 cp = strchr(mimetype, '#'); 1042 if (cp != NULL) 1043 *cp = '\0'; 1044 1045 if (*mimetype != '\0') { 1046 cp = mimetype + strlen(mimetype) - 1; 1047 while (cp != mimetype && isspace((u_char)*cp)) 1048 *cp-- = '\0'; 1049 } 1050 1051 cp = mimetype; 1052 while (*cp != '\0') { 1053 if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL) 1054 break; 1055 cp++; 1056 } 1057 if (*mimetype == '\0' || *cp != '\0') { 1058 magic_warnm(m, at, "invalid MIME type: %s", mimetype); 1059 return; 1060 } 1061 if (ml == NULL) { 1062 magic_warnm(m, at, "stray MIME type: %s", mimetype); 1063 return; 1064 } 1065 ml->mimetype = xstrdup(mimetype); 1066 } 1067 1068 struct magic * 1069 magic_load(FILE *f, const char *path, int warnings) 1070 { 1071 struct magic *m; 1072 struct magic_line *ml = NULL, *parent, *parent0; 1073 char *line, *tmp; 1074 size_t size; 1075 ssize_t slen; 1076 u_int at, level, n, i; 1077 1078 m = xcalloc(1, sizeof *m); 1079 m->path = xstrdup(path); 1080 m->warnings = warnings; 1081 RB_INIT(&m->tree); 1082 1083 parent = NULL; 1084 parent0 = NULL; 1085 level = 0; 1086 1087 at = 0; 1088 tmp = NULL; 1089 size = 0; 1090 while ((slen = getline(&tmp, &size, f)) != -1) { 1091 line = tmp; 1092 if (line[slen - 1] == '\n') 1093 line[slen - 1] = '\0'; 1094 1095 at++; 1096 1097 while (isspace((u_char)*line)) 1098 line++; 1099 if (*line == '\0' || *line == '#') 1100 continue; 1101 1102 if (strncmp (line, "!:mime", 6) == 0) { 1103 magic_set_mimetype(m, at, ml, line); 1104 continue; 1105 } 1106 if (strncmp (line, "!:strength", 10) == 0) { 1107 magic_adjust_strength(m, at, ml, line); 1108 continue; 1109 } 1110 if (strncmp (line, "!:", 2) == 0) { 1111 for (i = 0; i < 64 && line[i] != '\0'; i++) { 1112 if (isspace((u_char)line[i])) 1113 break; 1114 } 1115 magic_warnm(m, at, "%.*s not supported", i, line); 1116 continue; 1117 } 1118 1119 n = 0; 1120 for (; *line == '>'; line++) 1121 n++; 1122 1123 ml = xcalloc(1, sizeof *ml); 1124 ml->root = m; 1125 ml->line = at; 1126 ml->type = MAGIC_TYPE_NONE; 1127 TAILQ_INIT(&ml->children); 1128 ml->text = 1; 1129 1130 /* 1131 * At this point n is the level we want, level is the current 1132 * level. parent0 is the last line at the same level and parent 1133 * is the last line at the previous level. 1134 */ 1135 if (n == level + 1) { 1136 parent = parent0; 1137 } else if (n < level) { 1138 for (i = n; i < level && parent != NULL; i++) 1139 parent = parent->parent; 1140 } else if (n != level) { 1141 magic_warn(ml, "level skipped (%u->%u)", level, n); 1142 free(ml); 1143 continue; 1144 } 1145 ml->parent = parent; 1146 level = n; 1147 1148 if (magic_parse_offset(ml, &line) != 0 || 1149 magic_parse_type(ml, &line) != 0 || 1150 magic_parse_value(ml, &line) != 0 || 1151 magic_set_result(ml, line) != 0) { 1152 /* 1153 * An invalid line still needs to appear in the tree in 1154 * case it has any children. 1155 */ 1156 ml->type = MAGIC_TYPE_NONE; 1157 } 1158 1159 ml->strength = magic_get_strength(ml); 1160 if (ml->parent == NULL) { 1161 if (ml->name != NULL) 1162 RB_INSERT(magic_named_tree, &m->named, ml); 1163 else 1164 RB_INSERT(magic_tree, &m->tree, ml); 1165 } else 1166 TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry); 1167 parent0 = ml; 1168 } 1169 free(tmp); 1170 if (ferror(f)) 1171 err(1, "%s", path); 1172 1173 return (m); 1174 } 1175