1 /* $OpenBSD: magic-load.c,v 1.26 2017/07/02 10:58:15 brynet Exp $ */
2
3 /*
4 * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/types.h>
20
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <regex.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "magic.h"
32 #include "xmalloc.h"
33
34 static int
magic_odigit(u_char c)35 magic_odigit(u_char c)
36 {
37 if (c >= '0' && c <= '7')
38 return (c - '0');
39 return (-1);
40 }
41
42 static int
magic_xdigit(u_char c)43 magic_xdigit(u_char c)
44 {
45 if (c >= '0' && c <= '9')
46 return (c - '0');
47 if (c >= 'a' && c <= 'f')
48 return (10 + c - 'a');
49 if (c >= 'A' && c <= 'F')
50 return (10 + c - 'A');
51 return (-1);
52 }
53
54 static void
magic_mark_text(struct magic_line * ml,int text)55 magic_mark_text(struct magic_line *ml, int text)
56 {
57 do {
58 ml->text = text;
59 ml = ml->parent;
60 } while (ml != NULL);
61 }
62
63 static int
magic_make_pattern(struct magic_line * ml,const char * name,regex_t * re,const char * p)64 magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
65 const char *p)
66 {
67 int error;
68 char errbuf[256];
69
70 error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
71 if (error != 0) {
72 regerror(error, re, errbuf, sizeof errbuf);
73 magic_warn(ml, "bad %s pattern: %s", name, errbuf);
74 return (-1);
75 }
76 return (0);
77 }
78
79 static int
magic_set_result(struct magic_line * ml,const char * s)80 magic_set_result(struct magic_line *ml, const char *s)
81 {
82 const char *fmt, *endfmt, *cp;
83 regex_t *re = NULL;
84 regmatch_t pmatch;
85 size_t fmtlen;
86
87 while (isspace((u_char)*s))
88 s++;
89 if (*s == '\0') {
90 ml->result = NULL;
91 return (0);
92 }
93 ml->result = xstrdup(s);
94
95 fmt = NULL;
96 for (cp = s; *cp != '\0'; cp++) {
97 if (cp[0] == '%' && cp[1] != '%') {
98 if (fmt != NULL) {
99 magic_warn(ml, "multiple formats");
100 return (-1);
101 }
102 fmt = cp;
103 }
104 }
105 if (fmt == NULL)
106 return (0);
107 fmt++;
108
109 for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
110 if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
111 break;
112 }
113 if (*endfmt == '\0') {
114 magic_warn(ml, "unterminated format");
115 return (-1);
116 }
117 fmtlen = endfmt + 1 - fmt;
118 if (fmtlen > 32) {
119 magic_warn(ml, "format too long");
120 return (-1);
121 }
122
123 if (*endfmt == 's') {
124 switch (ml->type) {
125 case MAGIC_TYPE_DATE:
126 case MAGIC_TYPE_LDATE:
127 case MAGIC_TYPE_UDATE:
128 case MAGIC_TYPE_ULDATE:
129 case MAGIC_TYPE_BEDATE:
130 case MAGIC_TYPE_BELDATE:
131 case MAGIC_TYPE_UBEDATE:
132 case MAGIC_TYPE_UBELDATE:
133 case MAGIC_TYPE_QDATE:
134 case MAGIC_TYPE_QLDATE:
135 case MAGIC_TYPE_UQDATE:
136 case MAGIC_TYPE_UQLDATE:
137 case MAGIC_TYPE_BEQDATE:
138 case MAGIC_TYPE_BEQLDATE:
139 case MAGIC_TYPE_UBEQDATE:
140 case MAGIC_TYPE_UBEQLDATE:
141 case MAGIC_TYPE_LEQDATE:
142 case MAGIC_TYPE_LEQLDATE:
143 case MAGIC_TYPE_ULEQDATE:
144 case MAGIC_TYPE_ULEQLDATE:
145 case MAGIC_TYPE_LEDATE:
146 case MAGIC_TYPE_LELDATE:
147 case MAGIC_TYPE_ULEDATE:
148 case MAGIC_TYPE_ULELDATE:
149 case MAGIC_TYPE_MEDATE:
150 case MAGIC_TYPE_MELDATE:
151 case MAGIC_TYPE_STRING:
152 case MAGIC_TYPE_PSTRING:
153 case MAGIC_TYPE_BESTRING16:
154 case MAGIC_TYPE_LESTRING16:
155 case MAGIC_TYPE_REGEX:
156 case MAGIC_TYPE_SEARCH:
157 break;
158 default:
159 ml->stringify = 1;
160 break;
161 }
162 }
163
164 if (!ml->root->compiled) {
165 /*
166 * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
167 * with byte, short, long. We get lucky because our first and
168 * only argument ends up in a register. Accept it for now.
169 */
170 if (magic_make_pattern(ml, "short", &ml->root->format_short,
171 "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
172 return (-1);
173 if (magic_make_pattern(ml, "long", &ml->root->format_long,
174 "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
175 return (-1);
176 if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
177 "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
178 return (-1);
179 if (magic_make_pattern(ml, "float", &ml->root->format_float,
180 "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
181 return (-1);
182 if (magic_make_pattern(ml, "string", &ml->root->format_string,
183 "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
184 return (-1);
185 ml->root->compiled = 1;
186 }
187
188 if (ml->stringify)
189 re = &ml->root->format_string;
190 else {
191 switch (ml->type) {
192 case MAGIC_TYPE_NONE:
193 case MAGIC_TYPE_BESTRING16:
194 case MAGIC_TYPE_LESTRING16:
195 case MAGIC_TYPE_NAME:
196 case MAGIC_TYPE_USE:
197 return (0); /* don't use result */
198 case MAGIC_TYPE_BYTE:
199 case MAGIC_TYPE_UBYTE:
200 case MAGIC_TYPE_SHORT:
201 case MAGIC_TYPE_USHORT:
202 case MAGIC_TYPE_BESHORT:
203 case MAGIC_TYPE_UBESHORT:
204 case MAGIC_TYPE_LESHORT:
205 case MAGIC_TYPE_ULESHORT:
206 re = &ml->root->format_short;
207 break;
208 case MAGIC_TYPE_LONG:
209 case MAGIC_TYPE_ULONG:
210 case MAGIC_TYPE_BELONG:
211 case MAGIC_TYPE_UBELONG:
212 case MAGIC_TYPE_LELONG:
213 case MAGIC_TYPE_ULELONG:
214 case MAGIC_TYPE_MELONG:
215 re = &ml->root->format_long;
216 break;
217 case MAGIC_TYPE_QUAD:
218 case MAGIC_TYPE_UQUAD:
219 case MAGIC_TYPE_BEQUAD:
220 case MAGIC_TYPE_UBEQUAD:
221 case MAGIC_TYPE_LEQUAD:
222 case MAGIC_TYPE_ULEQUAD:
223 re = &ml->root->format_quad;
224 break;
225 case MAGIC_TYPE_FLOAT:
226 case MAGIC_TYPE_BEFLOAT:
227 case MAGIC_TYPE_LEFLOAT:
228 case MAGIC_TYPE_DOUBLE:
229 case MAGIC_TYPE_BEDOUBLE:
230 case MAGIC_TYPE_LEDOUBLE:
231 re = &ml->root->format_float;
232 break;
233 case MAGIC_TYPE_DATE:
234 case MAGIC_TYPE_LDATE:
235 case MAGIC_TYPE_UDATE:
236 case MAGIC_TYPE_ULDATE:
237 case MAGIC_TYPE_BEDATE:
238 case MAGIC_TYPE_BELDATE:
239 case MAGIC_TYPE_UBEDATE:
240 case MAGIC_TYPE_UBELDATE:
241 case MAGIC_TYPE_QDATE:
242 case MAGIC_TYPE_QLDATE:
243 case MAGIC_TYPE_UQDATE:
244 case MAGIC_TYPE_UQLDATE:
245 case MAGIC_TYPE_BEQDATE:
246 case MAGIC_TYPE_BEQLDATE:
247 case MAGIC_TYPE_UBEQDATE:
248 case MAGIC_TYPE_UBEQLDATE:
249 case MAGIC_TYPE_LEQDATE:
250 case MAGIC_TYPE_LEQLDATE:
251 case MAGIC_TYPE_ULEQDATE:
252 case MAGIC_TYPE_ULEQLDATE:
253 case MAGIC_TYPE_LEDATE:
254 case MAGIC_TYPE_LELDATE:
255 case MAGIC_TYPE_ULEDATE:
256 case MAGIC_TYPE_ULELDATE:
257 case MAGIC_TYPE_MEDATE:
258 case MAGIC_TYPE_MELDATE:
259 case MAGIC_TYPE_STRING:
260 case MAGIC_TYPE_PSTRING:
261 case MAGIC_TYPE_REGEX:
262 case MAGIC_TYPE_SEARCH:
263 case MAGIC_TYPE_DEFAULT:
264 case MAGIC_TYPE_CLEAR:
265 re = &ml->root->format_string;
266 break;
267 }
268 }
269
270 pmatch.rm_so = 0;
271 pmatch.rm_eo = fmtlen;
272 if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
273 magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
274 (int)fmtlen, fmt);
275 return (-1);
276 }
277
278 return (0);
279 }
280
281 static u_int
magic_get_strength(struct magic_line * ml)282 magic_get_strength(struct magic_line *ml)
283 {
284 int n;
285 size_t size;
286
287 if (ml->type == MAGIC_TYPE_NONE)
288 return (0);
289
290 if (ml->test_not || ml->test_operator == 'x') {
291 n = 1;
292 goto skip;
293 }
294
295 n = 2 * MAGIC_STRENGTH_MULTIPLIER;
296 switch (ml->type) {
297 case MAGIC_TYPE_NONE:
298 case MAGIC_TYPE_DEFAULT:
299 return (0);
300 case MAGIC_TYPE_CLEAR:
301 case MAGIC_TYPE_NAME:
302 case MAGIC_TYPE_USE:
303 break;
304 case MAGIC_TYPE_BYTE:
305 case MAGIC_TYPE_UBYTE:
306 n += 1 * MAGIC_STRENGTH_MULTIPLIER;
307 break;
308 case MAGIC_TYPE_SHORT:
309 case MAGIC_TYPE_USHORT:
310 case MAGIC_TYPE_BESHORT:
311 case MAGIC_TYPE_UBESHORT:
312 case MAGIC_TYPE_LESHORT:
313 case MAGIC_TYPE_ULESHORT:
314 n += 2 * MAGIC_STRENGTH_MULTIPLIER;
315 break;
316 case MAGIC_TYPE_LONG:
317 case MAGIC_TYPE_ULONG:
318 case MAGIC_TYPE_FLOAT:
319 case MAGIC_TYPE_DATE:
320 case MAGIC_TYPE_LDATE:
321 case MAGIC_TYPE_UDATE:
322 case MAGIC_TYPE_ULDATE:
323 case MAGIC_TYPE_BELONG:
324 case MAGIC_TYPE_UBELONG:
325 case MAGIC_TYPE_BEFLOAT:
326 case MAGIC_TYPE_BEDATE:
327 case MAGIC_TYPE_BELDATE:
328 case MAGIC_TYPE_UBEDATE:
329 case MAGIC_TYPE_UBELDATE:
330 n += 4 * MAGIC_STRENGTH_MULTIPLIER;
331 break;
332 case MAGIC_TYPE_QUAD:
333 case MAGIC_TYPE_UQUAD:
334 case MAGIC_TYPE_DOUBLE:
335 case MAGIC_TYPE_QDATE:
336 case MAGIC_TYPE_QLDATE:
337 case MAGIC_TYPE_UQDATE:
338 case MAGIC_TYPE_UQLDATE:
339 case MAGIC_TYPE_BEQUAD:
340 case MAGIC_TYPE_UBEQUAD:
341 case MAGIC_TYPE_BEDOUBLE:
342 case MAGIC_TYPE_BEQDATE:
343 case MAGIC_TYPE_BEQLDATE:
344 case MAGIC_TYPE_UBEQDATE:
345 case MAGIC_TYPE_UBEQLDATE:
346 case MAGIC_TYPE_LEQUAD:
347 case MAGIC_TYPE_ULEQUAD:
348 case MAGIC_TYPE_LEDOUBLE:
349 case MAGIC_TYPE_LEQDATE:
350 case MAGIC_TYPE_LEQLDATE:
351 case MAGIC_TYPE_ULEQDATE:
352 case MAGIC_TYPE_ULEQLDATE:
353 case MAGIC_TYPE_LELONG:
354 case MAGIC_TYPE_ULELONG:
355 case MAGIC_TYPE_LEFLOAT:
356 case MAGIC_TYPE_LEDATE:
357 case MAGIC_TYPE_LELDATE:
358 case MAGIC_TYPE_ULEDATE:
359 case MAGIC_TYPE_ULELDATE:
360 case MAGIC_TYPE_MELONG:
361 case MAGIC_TYPE_MEDATE:
362 case MAGIC_TYPE_MELDATE:
363 n += 8 * MAGIC_STRENGTH_MULTIPLIER;
364 break;
365 case MAGIC_TYPE_STRING:
366 case MAGIC_TYPE_PSTRING:
367 n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
368 break;
369 case MAGIC_TYPE_BESTRING16:
370 case MAGIC_TYPE_LESTRING16:
371 n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
372 break;
373 case MAGIC_TYPE_REGEX:
374 case MAGIC_TYPE_SEARCH:
375 size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
376 if (size < 1)
377 size = 1;
378 n += ml->test_string_size * size;
379 break;
380 }
381 switch (ml->test_operator) {
382 case '=':
383 n += MAGIC_STRENGTH_MULTIPLIER;
384 break;
385 case '<':
386 case '>':
387 case '[':
388 case ']':
389 n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
390 break;
391 case '^':
392 case '&':
393 n -= MAGIC_STRENGTH_MULTIPLIER;
394 break;
395 }
396
397 skip:
398 switch (ml->strength_operator) {
399 case '+':
400 n += ml->strength_value;
401 break;
402 case '-':
403 n -= ml->strength_value;
404 break;
405 case '*':
406 n *= ml->strength_value;
407 break;
408 case '/':
409 n /= ml->strength_value;
410 break;
411 }
412 return (n <= 0 ? 1 : n);
413 }
414
415 static int
magic_get_string(char ** line,char * out,size_t * outlen)416 magic_get_string(char **line, char *out, size_t *outlen)
417 {
418 char *start, *cp, c;
419 int d0, d1, d2;
420
421 start = out;
422 for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
423 if (*cp != '\\') {
424 *out++ = *cp;
425 continue;
426 }
427
428 switch (c = *++cp) {
429 case '\0': /* end of line */
430 return (-1);
431 case ' ':
432 *out++ = ' ';
433 break;
434 case '0':
435 case '1':
436 case '2':
437 case '3':
438 case '4':
439 case '5':
440 case '6':
441 case '7':
442 d0 = magic_odigit(cp[0]);
443 if (cp[0] != '\0')
444 d1 = magic_odigit(cp[1]);
445 else
446 d1 = -1;
447 if (cp[0] != '\0' && cp[1] != '\0')
448 d2 = magic_odigit(cp[2]);
449 else
450 d2 = -1;
451
452 if (d0 != -1 && d1 != -1 && d2 != -1) {
453 *out = d2 | (d1 << 3) | (d0 << 6);
454 cp += 2;
455 } else if (d0 != -1 && d1 != -1) {
456 *out = d1 | (d0 << 3);
457 cp++;
458 } else if (d0 != -1)
459 *out = d0;
460 else
461 return (-1);
462 out++;
463 break;
464 case 'x':
465 d0 = magic_xdigit(cp[1]);
466 if (cp[1] != '\0')
467 d1 = magic_xdigit(cp[2]);
468 else
469 d1 = -1;
470
471 if (d0 != -1 && d1 != -1) {
472 *out = d1 | (d0 << 4);
473 cp += 2;
474 } else if (d0 != -1) {
475 *out = d0;
476 cp++;
477 } else
478 return (-1);
479 out++;
480
481 break;
482 case 'a':
483 *out++ = '\a';
484 break;
485 case 'b':
486 *out++ = '\b';
487 break;
488 case 't':
489 *out++ = '\t';
490 break;
491 case 'f':
492 *out++ = '\f';
493 break;
494 case 'n':
495 *out++ = '\n';
496 break;
497 case 'r':
498 *out++ = '\r';
499 break;
500 case '\\':
501 *out++ = '\\';
502 break;
503 case '\'':
504 *out++ = '\'';
505 break;
506 case '\"':
507 *out++ = '\"';
508 break;
509 default:
510 *out++ = c;
511 break;
512 }
513 }
514 *out = '\0';
515 *outlen = out - start;
516
517 *line = cp;
518 return (0);
519 }
520
521 static int
magic_parse_offset(struct magic_line * ml,char ** line)522 magic_parse_offset(struct magic_line *ml, char **line)
523 {
524 char *copy, *s, *cp, *endptr;
525
526 while (isspace((u_char)**line))
527 (*line)++;
528 copy = s = cp = xmalloc(strlen(*line) + 1);
529 while (**line != '\0' && !isspace((u_char)**line))
530 *cp++ = *(*line)++;
531 *cp = '\0';
532
533 ml->offset = 0;
534 ml->offset_relative = 0;
535
536 ml->indirect_type = ' ';
537 ml->indirect_relative = 0;
538 ml->indirect_offset = 0;
539 ml->indirect_operator = ' ';
540 ml->indirect_operand = 0;
541
542 if (*s == '&') {
543 ml->offset_relative = 1;
544 s++;
545 }
546
547 if (*s != '(') {
548 endptr = magic_strtoll(s, &ml->offset);
549 if (endptr == NULL || *endptr != '\0') {
550 magic_warn(ml, "missing closing bracket");
551 goto fail;
552 }
553 if (ml->offset < 0 && !ml->offset_relative) {
554 magic_warn(ml, "negative absolute offset");
555 goto fail;
556 }
557 goto done;
558 }
559 s++;
560
561 if (*s == '&') {
562 ml->indirect_relative = 1;
563 s++;
564 }
565
566 endptr = magic_strtoll(s, &ml->indirect_offset);
567 if (endptr == NULL) {
568 magic_warn(ml, "can't parse offset: %s", s);
569 goto fail;
570 }
571 s = endptr;
572 if (*s == ')')
573 goto done;
574
575 if (*s == '.') {
576 s++;
577 if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
578 magic_warn(ml, "unknown offset type: %c", *s);
579 goto fail;
580 }
581 ml->indirect_type = *s;
582 s++;
583 if (*s == ')')
584 goto done;
585 }
586
587 if (*s == '\0' || strchr("+-*", *s) == NULL) {
588 magic_warn(ml, "unknown offset operator: %c", *s);
589 goto fail;
590 }
591 ml->indirect_operator = *s;
592 s++;
593 if (*s == ')')
594 goto done;
595
596 if (*s == '(') {
597 s++;
598 endptr = magic_strtoll(s, &ml->indirect_operand);
599 if (endptr == NULL || *endptr != ')') {
600 magic_warn(ml, "missing closing bracket");
601 goto fail;
602 }
603 if (*++endptr != ')') {
604 magic_warn(ml, "missing closing bracket");
605 goto fail;
606 }
607 } else {
608 endptr = magic_strtoll(s, &ml->indirect_operand);
609 if (endptr == NULL || *endptr != ')') {
610 magic_warn(ml, "missing closing bracket");
611 goto fail;
612 }
613 }
614
615 done:
616 free(copy);
617 return (0);
618
619 fail:
620 free(copy);
621 return (-1);
622 }
623
624 static int
magic_parse_type(struct magic_line * ml,char ** line)625 magic_parse_type(struct magic_line *ml, char **line)
626 {
627 char *copy, *s, *cp, *endptr;
628
629 while (isspace((u_char)**line))
630 (*line)++;
631 copy = s = cp = xmalloc(strlen(*line) + 1);
632 while (**line != '\0' && !isspace((u_char)**line))
633 *cp++ = *(*line)++;
634 *cp = '\0';
635
636 ml->type = MAGIC_TYPE_NONE;
637 ml->type_operator = ' ';
638 ml->type_operand = 0;
639
640 if (strcmp(s, "name") == 0) {
641 ml->type = MAGIC_TYPE_NAME;
642 ml->type_string = xstrdup(s);
643 goto done;
644 }
645 if (strcmp(s, "use") == 0) {
646 ml->type = MAGIC_TYPE_USE;
647 ml->type_string = xstrdup(s);
648 goto done;
649 }
650
651 if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
652 strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
653 if (*s == 'u')
654 ml->type_string = xstrdup(s + 1);
655 else
656 ml->type_string = xstrdup(s);
657 ml->type = MAGIC_TYPE_STRING;
658 magic_mark_text(ml, 0);
659 goto done;
660 }
661 if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
662 strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
663 if (*s == 'u')
664 ml->type_string = xstrdup(s + 1);
665 else
666 ml->type_string = xstrdup(s);
667 ml->type = MAGIC_TYPE_PSTRING;
668 magic_mark_text(ml, 0);
669 goto done;
670 }
671 if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
672 strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
673 if (*s == 'u')
674 ml->type_string = xstrdup(s + 1);
675 else
676 ml->type_string = xstrdup(s);
677 ml->type = MAGIC_TYPE_SEARCH;
678 goto done;
679 }
680 if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
681 strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
682 if (*s == 'u')
683 ml->type_string = xstrdup(s + 1);
684 else
685 ml->type_string = xstrdup(s);
686 ml->type = MAGIC_TYPE_REGEX;
687 goto done;
688 }
689 ml->type_string = xstrdup(s);
690
691 cp = &s[strcspn(s, "+-&/%*")];
692 if (*cp != '\0') {
693 ml->type_operator = *cp;
694 endptr = magic_strtoull(cp + 1, &ml->type_operand);
695 if (endptr == NULL || *endptr != '\0') {
696 magic_warn(ml, "can't parse operand: %s", cp + 1);
697 goto fail;
698 }
699 *cp = '\0';
700 }
701
702 if (strcmp(s, "byte") == 0)
703 ml->type = MAGIC_TYPE_BYTE;
704 else if (strcmp(s, "short") == 0)
705 ml->type = MAGIC_TYPE_SHORT;
706 else if (strcmp(s, "long") == 0)
707 ml->type = MAGIC_TYPE_LONG;
708 else if (strcmp(s, "quad") == 0)
709 ml->type = MAGIC_TYPE_QUAD;
710 else if (strcmp(s, "ubyte") == 0)
711 ml->type = MAGIC_TYPE_UBYTE;
712 else if (strcmp(s, "ushort") == 0)
713 ml->type = MAGIC_TYPE_USHORT;
714 else if (strcmp(s, "ulong") == 0)
715 ml->type = MAGIC_TYPE_ULONG;
716 else if (strcmp(s, "uquad") == 0)
717 ml->type = MAGIC_TYPE_UQUAD;
718 else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
719 ml->type = MAGIC_TYPE_FLOAT;
720 else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
721 ml->type = MAGIC_TYPE_DOUBLE;
722 else if (strcmp(s, "date") == 0)
723 ml->type = MAGIC_TYPE_DATE;
724 else if (strcmp(s, "qdate") == 0)
725 ml->type = MAGIC_TYPE_QDATE;
726 else if (strcmp(s, "ldate") == 0)
727 ml->type = MAGIC_TYPE_LDATE;
728 else if (strcmp(s, "qldate") == 0)
729 ml->type = MAGIC_TYPE_QLDATE;
730 else if (strcmp(s, "udate") == 0)
731 ml->type = MAGIC_TYPE_UDATE;
732 else if (strcmp(s, "uqdate") == 0)
733 ml->type = MAGIC_TYPE_UQDATE;
734 else if (strcmp(s, "uldate") == 0)
735 ml->type = MAGIC_TYPE_ULDATE;
736 else if (strcmp(s, "uqldate") == 0)
737 ml->type = MAGIC_TYPE_UQLDATE;
738 else if (strcmp(s, "beshort") == 0)
739 ml->type = MAGIC_TYPE_BESHORT;
740 else if (strcmp(s, "belong") == 0)
741 ml->type = MAGIC_TYPE_BELONG;
742 else if (strcmp(s, "bequad") == 0)
743 ml->type = MAGIC_TYPE_BEQUAD;
744 else if (strcmp(s, "ubeshort") == 0)
745 ml->type = MAGIC_TYPE_UBESHORT;
746 else if (strcmp(s, "ubelong") == 0)
747 ml->type = MAGIC_TYPE_UBELONG;
748 else if (strcmp(s, "ubequad") == 0)
749 ml->type = MAGIC_TYPE_UBEQUAD;
750 else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
751 ml->type = MAGIC_TYPE_BEFLOAT;
752 else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
753 ml->type = MAGIC_TYPE_BEDOUBLE;
754 else if (strcmp(s, "bedate") == 0)
755 ml->type = MAGIC_TYPE_BEDATE;
756 else if (strcmp(s, "beqdate") == 0)
757 ml->type = MAGIC_TYPE_BEQDATE;
758 else if (strcmp(s, "beldate") == 0)
759 ml->type = MAGIC_TYPE_BELDATE;
760 else if (strcmp(s, "beqldate") == 0)
761 ml->type = MAGIC_TYPE_BEQLDATE;
762 else if (strcmp(s, "ubedate") == 0)
763 ml->type = MAGIC_TYPE_UBEDATE;
764 else if (strcmp(s, "ubeqdate") == 0)
765 ml->type = MAGIC_TYPE_UBEQDATE;
766 else if (strcmp(s, "ubeldate") == 0)
767 ml->type = MAGIC_TYPE_UBELDATE;
768 else if (strcmp(s, "ubeqldate") == 0)
769 ml->type = MAGIC_TYPE_UBEQLDATE;
770 else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
771 ml->type = MAGIC_TYPE_BESTRING16;
772 else if (strcmp(s, "leshort") == 0)
773 ml->type = MAGIC_TYPE_LESHORT;
774 else if (strcmp(s, "lelong") == 0)
775 ml->type = MAGIC_TYPE_LELONG;
776 else if (strcmp(s, "lequad") == 0)
777 ml->type = MAGIC_TYPE_LEQUAD;
778 else if (strcmp(s, "uleshort") == 0)
779 ml->type = MAGIC_TYPE_ULESHORT;
780 else if (strcmp(s, "ulelong") == 0)
781 ml->type = MAGIC_TYPE_ULELONG;
782 else if (strcmp(s, "ulequad") == 0)
783 ml->type = MAGIC_TYPE_ULEQUAD;
784 else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
785 ml->type = MAGIC_TYPE_LEFLOAT;
786 else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
787 ml->type = MAGIC_TYPE_LEDOUBLE;
788 else if (strcmp(s, "ledate") == 0)
789 ml->type = MAGIC_TYPE_LEDATE;
790 else if (strcmp(s, "leqdate") == 0)
791 ml->type = MAGIC_TYPE_LEQDATE;
792 else if (strcmp(s, "leldate") == 0)
793 ml->type = MAGIC_TYPE_LELDATE;
794 else if (strcmp(s, "leqldate") == 0)
795 ml->type = MAGIC_TYPE_LEQLDATE;
796 else if (strcmp(s, "uledate") == 0)
797 ml->type = MAGIC_TYPE_ULEDATE;
798 else if (strcmp(s, "uleqdate") == 0)
799 ml->type = MAGIC_TYPE_ULEQDATE;
800 else if (strcmp(s, "uleldate") == 0)
801 ml->type = MAGIC_TYPE_ULELDATE;
802 else if (strcmp(s, "uleqldate") == 0)
803 ml->type = MAGIC_TYPE_ULEQLDATE;
804 else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
805 ml->type = MAGIC_TYPE_LESTRING16;
806 else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
807 ml->type = MAGIC_TYPE_MELONG;
808 else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
809 ml->type = MAGIC_TYPE_MEDATE;
810 else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
811 ml->type = MAGIC_TYPE_MELDATE;
812 else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
813 ml->type = MAGIC_TYPE_DEFAULT;
814 else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
815 ml->type = MAGIC_TYPE_CLEAR;
816 else {
817 magic_warn(ml, "unknown type: %s", s);
818 goto fail;
819 }
820 magic_mark_text(ml, 0);
821
822 done:
823 free(copy);
824 return (0);
825
826 fail:
827 free(copy);
828 return (-1);
829 }
830
831 static int
magic_parse_value(struct magic_line * ml,char ** line)832 magic_parse_value(struct magic_line *ml, char **line)
833 {
834 char *copy, *s, *cp, *endptr;
835 size_t slen;
836 uint64_t u;
837
838 while (isspace((u_char)**line))
839 (*line)++;
840
841 ml->test_operator = '=';
842 ml->test_not = 0;
843 ml->test_string = NULL;
844 ml->test_string_size = 0;
845 ml->test_unsigned = 0;
846 ml->test_signed = 0;
847
848 if (**line == '\0')
849 return (0);
850
851 s = *line;
852 if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
853 (*line)++;
854 ml->test_operator = 'x';
855 return (0);
856 }
857
858 if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
859 magic_warn(ml, "test specified for default or clear");
860 ml->test_operator = 'x';
861 return (0);
862 }
863
864 if (**line == '!') {
865 ml->test_not = 1;
866 (*line)++;
867 }
868
869 switch (ml->type) {
870 case MAGIC_TYPE_NAME:
871 case MAGIC_TYPE_USE:
872 copy = s = xmalloc(strlen(*line) + 1);
873 if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
874 magic_warn(ml, "can't parse string");
875 goto fail;
876 }
877 if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
878 magic_warn(ml, "invalid name");
879 goto fail;
880 }
881 ml->name = s;
882 return (0); /* do not free */
883 case MAGIC_TYPE_STRING:
884 case MAGIC_TYPE_PSTRING:
885 case MAGIC_TYPE_SEARCH:
886 if (**line == '>' || **line == '<' || **line == '=') {
887 ml->test_operator = **line;
888 (*line)++;
889 }
890 /* FALLTHROUGH */
891 case MAGIC_TYPE_REGEX:
892 if (**line == '=')
893 (*line)++;
894 copy = s = xmalloc(strlen(*line) + 1);
895 if (magic_get_string(line, s, &slen) != 0) {
896 magic_warn(ml, "can't parse string");
897 goto fail;
898 }
899 ml->test_string_size = slen;
900 ml->test_string = s;
901 return (0); /* do not free */
902 default:
903 break;
904 }
905
906 while (isspace((u_char)**line))
907 (*line)++;
908 if ((*line)[0] == '<' && (*line)[1] == '=') {
909 ml->test_operator = '[';
910 (*line) += 2;
911 } else if ((*line)[0] == '>' && (*line)[1] == '=') {
912 ml->test_operator = ']';
913 (*line) += 2;
914 } else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
915 ml->test_operator = **line;
916 (*line)++;
917 }
918
919 while (isspace((u_char)**line))
920 (*line)++;
921 copy = cp = xmalloc(strlen(*line) + 1);
922 while (**line != '\0' && !isspace((u_char)**line))
923 *cp++ = *(*line)++;
924 *cp = '\0';
925
926 switch (ml->type) {
927 case MAGIC_TYPE_FLOAT:
928 case MAGIC_TYPE_DOUBLE:
929 case MAGIC_TYPE_BEFLOAT:
930 case MAGIC_TYPE_BEDOUBLE:
931 case MAGIC_TYPE_LEFLOAT:
932 case MAGIC_TYPE_LEDOUBLE:
933 errno = 0;
934 ml->test_double = strtod(copy, &endptr);
935 if (errno == ERANGE)
936 endptr = NULL;
937 break;
938 default:
939 if (*ml->type_string == 'u')
940 endptr = magic_strtoull(copy, &ml->test_unsigned);
941 else {
942 endptr = magic_strtoll(copy, &ml->test_signed);
943 if (endptr == NULL || *endptr != '\0') {
944 /*
945 * If we can't parse this as a signed number,
946 * try as unsigned instead.
947 */
948 endptr = magic_strtoull(copy, &u);
949 if (endptr != NULL && *endptr == '\0')
950 ml->test_signed = (int64_t)u;
951 }
952 }
953 break;
954 }
955 if (endptr == NULL || *endptr != '\0') {
956 magic_warn(ml, "can't parse number: %s", copy);
957 goto fail;
958 }
959
960 free(copy);
961 return (0);
962
963 fail:
964 free(copy);
965 return (-1);
966 }
967
968 int
magic_compare(struct magic_line * ml1,struct magic_line * ml2)969 magic_compare(struct magic_line *ml1, struct magic_line *ml2)
970 {
971 if (ml1->strength < ml2->strength)
972 return (1);
973 if (ml1->strength > ml2->strength)
974 return (-1);
975
976 /*
977 * The original file depends on the (undefined!) qsort(3) behaviour
978 * when the strength is equal. This is impossible to reproduce with an
979 * RB tree so just use the line number and hope for the best.
980 */
981 if (ml1->line < ml2->line)
982 return (-1);
983 if (ml1->line > ml2->line)
984 return (1);
985
986 return (0);
987 }
988 RB_GENERATE(magic_tree, magic_line, node, magic_compare);
989
990 int
magic_named_compare(struct magic_line * ml1,struct magic_line * ml2)991 magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
992 {
993 return (strcmp(ml1->name, ml2->name));
994 }
995 RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);
996
997 static void
magic_adjust_strength(struct magic * m,u_int at,struct magic_line * ml,char * line)998 magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
999 char *line)
1000 {
1001 char *cp, *s;
1002 int64_t value;
1003
1004 cp = line + (sizeof "!:strength") - 1;
1005 while (isspace((u_char)*cp))
1006 cp++;
1007 s = cp;
1008
1009 cp = strchr(s, '#');
1010 if (cp != NULL)
1011 *cp = '\0';
1012 cp = s;
1013
1014 if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1015 magic_warnm(m, at, "invalid strength operator: %s", s);
1016 return;
1017 }
1018 ml->strength_operator = *cp++;
1019
1020 while (isspace((u_char)*cp))
1021 cp++;
1022 cp = magic_strtoll(cp, &value);
1023 while (cp != NULL && isspace((u_char)*cp))
1024 cp++;
1025 if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
1026 magic_warnm(m, at, "invalid strength value: %s", s);
1027 return;
1028 }
1029 ml->strength_value = value;
1030 }
1031
1032 static void
magic_set_mimetype(struct magic * m,u_int at,struct magic_line * ml,char * line)1033 magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1034 {
1035 char *mimetype, *cp;
1036
1037 mimetype = line + (sizeof "!:mime") - 1;
1038 while (isspace((u_char)*mimetype))
1039 mimetype++;
1040
1041 cp = strchr(mimetype, '#');
1042 if (cp != NULL)
1043 *cp = '\0';
1044
1045 if (*mimetype != '\0') {
1046 cp = mimetype + strlen(mimetype) - 1;
1047 while (cp != mimetype && isspace((u_char)*cp))
1048 *cp-- = '\0';
1049 }
1050
1051 cp = mimetype;
1052 while (*cp != '\0') {
1053 if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1054 break;
1055 cp++;
1056 }
1057 if (*mimetype == '\0' || *cp != '\0') {
1058 magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1059 return;
1060 }
1061 if (ml == NULL) {
1062 magic_warnm(m, at, "stray MIME type: %s", mimetype);
1063 return;
1064 }
1065 ml->mimetype = xstrdup(mimetype);
1066 }
1067
1068 struct magic *
magic_load(FILE * f,const char * path,int warnings)1069 magic_load(FILE *f, const char *path, int warnings)
1070 {
1071 struct magic *m;
1072 struct magic_line *ml = NULL, *parent, *parent0;
1073 char *line, *tmp;
1074 size_t size;
1075 ssize_t slen;
1076 u_int at, level, n, i;
1077
1078 m = xcalloc(1, sizeof *m);
1079 m->path = xstrdup(path);
1080 m->warnings = warnings;
1081 RB_INIT(&m->tree);
1082
1083 parent = NULL;
1084 parent0 = NULL;
1085 level = 0;
1086
1087 at = 0;
1088 tmp = NULL;
1089 size = 0;
1090 while ((slen = getline(&tmp, &size, f)) != -1) {
1091 line = tmp;
1092 if (line[slen - 1] == '\n')
1093 line[slen - 1] = '\0';
1094
1095 at++;
1096
1097 while (isspace((u_char)*line))
1098 line++;
1099 if (*line == '\0' || *line == '#')
1100 continue;
1101
1102 if (strncmp (line, "!:mime", 6) == 0) {
1103 magic_set_mimetype(m, at, ml, line);
1104 continue;
1105 }
1106 if (strncmp (line, "!:strength", 10) == 0) {
1107 magic_adjust_strength(m, at, ml, line);
1108 continue;
1109 }
1110 if (strncmp (line, "!:", 2) == 0) {
1111 for (i = 0; i < 64 && line[i] != '\0'; i++) {
1112 if (isspace((u_char)line[i]))
1113 break;
1114 }
1115 magic_warnm(m, at, "%.*s not supported", i, line);
1116 continue;
1117 }
1118
1119 n = 0;
1120 for (; *line == '>'; line++)
1121 n++;
1122
1123 ml = xcalloc(1, sizeof *ml);
1124 ml->root = m;
1125 ml->line = at;
1126 ml->type = MAGIC_TYPE_NONE;
1127 TAILQ_INIT(&ml->children);
1128 ml->text = 1;
1129
1130 /*
1131 * At this point n is the level we want, level is the current
1132 * level. parent0 is the last line at the same level and parent
1133 * is the last line at the previous level.
1134 */
1135 if (n == level + 1) {
1136 parent = parent0;
1137 } else if (n < level) {
1138 for (i = n; i < level && parent != NULL; i++)
1139 parent = parent->parent;
1140 } else if (n != level) {
1141 magic_warn(ml, "level skipped (%u->%u)", level, n);
1142 free(ml);
1143 continue;
1144 }
1145 ml->parent = parent;
1146 level = n;
1147
1148 if (magic_parse_offset(ml, &line) != 0 ||
1149 magic_parse_type(ml, &line) != 0 ||
1150 magic_parse_value(ml, &line) != 0 ||
1151 magic_set_result(ml, line) != 0) {
1152 /*
1153 * An invalid line still needs to appear in the tree in
1154 * case it has any children.
1155 */
1156 ml->type = MAGIC_TYPE_NONE;
1157 }
1158
1159 ml->strength = magic_get_strength(ml);
1160 if (ml->parent == NULL) {
1161 if (ml->name != NULL)
1162 RB_INSERT(magic_named_tree, &m->named, ml);
1163 else
1164 RB_INSERT(magic_tree, &m->tree, ml);
1165 } else
1166 TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1167 parent0 = ml;
1168 }
1169 free(tmp);
1170 if (ferror(f))
1171 err(1, "%s", path);
1172
1173 return (m);
1174 }
1175