xref: /openbsd/usr.bin/file/magic-load.c (revision 3cab2bb3)
1 /* $OpenBSD: magic-load.c,v 1.26 2017/07/02 10:58:15 brynet Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <regex.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "magic.h"
32 #include "xmalloc.h"
33 
34 static int
35 magic_odigit(u_char c)
36 {
37 	if (c >= '0' && c <= '7')
38 		return (c - '0');
39 	return (-1);
40 }
41 
42 static int
43 magic_xdigit(u_char c)
44 {
45 	if (c >= '0' && c <= '9')
46 		return (c - '0');
47 	if (c >= 'a' && c <= 'f')
48 		return (10 + c - 'a');
49 	if (c >= 'A' && c <= 'F')
50 		return (10 + c - 'A');
51 	return (-1);
52 }
53 
54 static void
55 magic_mark_text(struct magic_line *ml, int text)
56 {
57 	do {
58 		ml->text = text;
59 		ml = ml->parent;
60 	} while (ml != NULL);
61 }
62 
63 static int
64 magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
65     const char *p)
66 {
67 	int	error;
68 	char	errbuf[256];
69 
70 	error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
71 	if (error != 0) {
72 		regerror(error, re, errbuf, sizeof errbuf);
73 		magic_warn(ml, "bad %s pattern: %s", name, errbuf);
74 		return (-1);
75 	}
76 	return (0);
77 }
78 
79 static int
80 magic_set_result(struct magic_line *ml, const char *s)
81 {
82 	const char	*fmt, *endfmt, *cp;
83 	regex_t		*re = NULL;
84 	regmatch_t	 pmatch;
85 	size_t		 fmtlen;
86 
87 	while (isspace((u_char)*s))
88 		s++;
89 	if (*s == '\0') {
90 		ml->result = NULL;
91 		return (0);
92 	}
93 	ml->result = xstrdup(s);
94 
95 	fmt = NULL;
96 	for (cp = s; *cp != '\0'; cp++) {
97 		if (cp[0] == '%' && cp[1] != '%') {
98 			if (fmt != NULL) {
99 				magic_warn(ml, "multiple formats");
100 				return (-1);
101 			}
102 			fmt = cp;
103 		}
104 	}
105 	if (fmt == NULL)
106 		return (0);
107 	fmt++;
108 
109 	for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
110 		if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
111 			break;
112 	}
113 	if (*endfmt == '\0') {
114 		magic_warn(ml, "unterminated format");
115 		return (-1);
116 	}
117 	fmtlen = endfmt + 1 - fmt;
118 	if (fmtlen > 32) {
119 		magic_warn(ml, "format too long");
120 		return (-1);
121 	}
122 
123 	if (*endfmt == 's') {
124 		switch (ml->type) {
125 		case MAGIC_TYPE_DATE:
126 		case MAGIC_TYPE_LDATE:
127 		case MAGIC_TYPE_UDATE:
128 		case MAGIC_TYPE_ULDATE:
129 		case MAGIC_TYPE_BEDATE:
130 		case MAGIC_TYPE_BELDATE:
131 		case MAGIC_TYPE_UBEDATE:
132 		case MAGIC_TYPE_UBELDATE:
133 		case MAGIC_TYPE_QDATE:
134 		case MAGIC_TYPE_QLDATE:
135 		case MAGIC_TYPE_UQDATE:
136 		case MAGIC_TYPE_UQLDATE:
137 		case MAGIC_TYPE_BEQDATE:
138 		case MAGIC_TYPE_BEQLDATE:
139 		case MAGIC_TYPE_UBEQDATE:
140 		case MAGIC_TYPE_UBEQLDATE:
141 		case MAGIC_TYPE_LEQDATE:
142 		case MAGIC_TYPE_LEQLDATE:
143 		case MAGIC_TYPE_ULEQDATE:
144 		case MAGIC_TYPE_ULEQLDATE:
145 		case MAGIC_TYPE_LEDATE:
146 		case MAGIC_TYPE_LELDATE:
147 		case MAGIC_TYPE_ULEDATE:
148 		case MAGIC_TYPE_ULELDATE:
149 		case MAGIC_TYPE_MEDATE:
150 		case MAGIC_TYPE_MELDATE:
151 		case MAGIC_TYPE_STRING:
152 		case MAGIC_TYPE_PSTRING:
153 		case MAGIC_TYPE_BESTRING16:
154 		case MAGIC_TYPE_LESTRING16:
155 		case MAGIC_TYPE_REGEX:
156 		case MAGIC_TYPE_SEARCH:
157 			break;
158 		default:
159 			ml->stringify = 1;
160 			break;
161 		}
162 	}
163 
164 	if (!ml->root->compiled) {
165 		/*
166 		 * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
167 		 * with byte, short, long. We get lucky because our first and
168 		 * only argument ends up in a register. Accept it for now.
169 		 */
170 		if (magic_make_pattern(ml, "short", &ml->root->format_short,
171 		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
172 			return (-1);
173 		if (magic_make_pattern(ml, "long", &ml->root->format_long,
174 		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
175 			return (-1);
176 		if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
177 		    "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
178 			return (-1);
179 		if (magic_make_pattern(ml, "float", &ml->root->format_float,
180 		    "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
181 			return (-1);
182 		if (magic_make_pattern(ml, "string", &ml->root->format_string,
183 		    "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
184 			return (-1);
185 		ml->root->compiled = 1;
186 	}
187 
188 	if (ml->stringify)
189 		re = &ml->root->format_string;
190 	else {
191 		switch (ml->type) {
192 		case MAGIC_TYPE_NONE:
193 		case MAGIC_TYPE_BESTRING16:
194 		case MAGIC_TYPE_LESTRING16:
195 		case MAGIC_TYPE_NAME:
196 		case MAGIC_TYPE_USE:
197 			return (0); /* don't use result */
198 		case MAGIC_TYPE_BYTE:
199 		case MAGIC_TYPE_UBYTE:
200 		case MAGIC_TYPE_SHORT:
201 		case MAGIC_TYPE_USHORT:
202 		case MAGIC_TYPE_BESHORT:
203 		case MAGIC_TYPE_UBESHORT:
204 		case MAGIC_TYPE_LESHORT:
205 		case MAGIC_TYPE_ULESHORT:
206 			re = &ml->root->format_short;
207 			break;
208 		case MAGIC_TYPE_LONG:
209 		case MAGIC_TYPE_ULONG:
210 		case MAGIC_TYPE_BELONG:
211 		case MAGIC_TYPE_UBELONG:
212 		case MAGIC_TYPE_LELONG:
213 		case MAGIC_TYPE_ULELONG:
214 		case MAGIC_TYPE_MELONG:
215 			re = &ml->root->format_long;
216 			break;
217 		case MAGIC_TYPE_QUAD:
218 		case MAGIC_TYPE_UQUAD:
219 		case MAGIC_TYPE_BEQUAD:
220 		case MAGIC_TYPE_UBEQUAD:
221 		case MAGIC_TYPE_LEQUAD:
222 		case MAGIC_TYPE_ULEQUAD:
223 			re = &ml->root->format_quad;
224 			break;
225 		case MAGIC_TYPE_FLOAT:
226 		case MAGIC_TYPE_BEFLOAT:
227 		case MAGIC_TYPE_LEFLOAT:
228 		case MAGIC_TYPE_DOUBLE:
229 		case MAGIC_TYPE_BEDOUBLE:
230 		case MAGIC_TYPE_LEDOUBLE:
231 			re = &ml->root->format_float;
232 			break;
233 		case MAGIC_TYPE_DATE:
234 		case MAGIC_TYPE_LDATE:
235 		case MAGIC_TYPE_UDATE:
236 		case MAGIC_TYPE_ULDATE:
237 		case MAGIC_TYPE_BEDATE:
238 		case MAGIC_TYPE_BELDATE:
239 		case MAGIC_TYPE_UBEDATE:
240 		case MAGIC_TYPE_UBELDATE:
241 		case MAGIC_TYPE_QDATE:
242 		case MAGIC_TYPE_QLDATE:
243 		case MAGIC_TYPE_UQDATE:
244 		case MAGIC_TYPE_UQLDATE:
245 		case MAGIC_TYPE_BEQDATE:
246 		case MAGIC_TYPE_BEQLDATE:
247 		case MAGIC_TYPE_UBEQDATE:
248 		case MAGIC_TYPE_UBEQLDATE:
249 		case MAGIC_TYPE_LEQDATE:
250 		case MAGIC_TYPE_LEQLDATE:
251 		case MAGIC_TYPE_ULEQDATE:
252 		case MAGIC_TYPE_ULEQLDATE:
253 		case MAGIC_TYPE_LEDATE:
254 		case MAGIC_TYPE_LELDATE:
255 		case MAGIC_TYPE_ULEDATE:
256 		case MAGIC_TYPE_ULELDATE:
257 		case MAGIC_TYPE_MEDATE:
258 		case MAGIC_TYPE_MELDATE:
259 		case MAGIC_TYPE_STRING:
260 		case MAGIC_TYPE_PSTRING:
261 		case MAGIC_TYPE_REGEX:
262 		case MAGIC_TYPE_SEARCH:
263 		case MAGIC_TYPE_DEFAULT:
264 		case MAGIC_TYPE_CLEAR:
265 			re = &ml->root->format_string;
266 			break;
267 		}
268 	}
269 
270 	pmatch.rm_so = 0;
271 	pmatch.rm_eo = fmtlen;
272 	if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
273 		magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
274 		    (int)fmtlen, fmt);
275 		return (-1);
276 	}
277 
278 	return (0);
279 }
280 
281 static u_int
282 magic_get_strength(struct magic_line *ml)
283 {
284 	int	n;
285 	size_t	size;
286 
287 	if (ml->type == MAGIC_TYPE_NONE)
288 		return (0);
289 
290 	if (ml->test_not || ml->test_operator == 'x') {
291 		n = 1;
292 		goto skip;
293 	}
294 
295 	n = 2 * MAGIC_STRENGTH_MULTIPLIER;
296 	switch (ml->type) {
297 	case MAGIC_TYPE_NONE:
298 	case MAGIC_TYPE_DEFAULT:
299 		return (0);
300 	case MAGIC_TYPE_CLEAR:
301 	case MAGIC_TYPE_NAME:
302 	case MAGIC_TYPE_USE:
303 		break;
304 	case MAGIC_TYPE_BYTE:
305 	case MAGIC_TYPE_UBYTE:
306 		n += 1 * MAGIC_STRENGTH_MULTIPLIER;
307 		break;
308 	case MAGIC_TYPE_SHORT:
309 	case MAGIC_TYPE_USHORT:
310 	case MAGIC_TYPE_BESHORT:
311 	case MAGIC_TYPE_UBESHORT:
312 	case MAGIC_TYPE_LESHORT:
313 	case MAGIC_TYPE_ULESHORT:
314 		n += 2 * MAGIC_STRENGTH_MULTIPLIER;
315 		break;
316 	case MAGIC_TYPE_LONG:
317 	case MAGIC_TYPE_ULONG:
318 	case MAGIC_TYPE_FLOAT:
319 	case MAGIC_TYPE_DATE:
320 	case MAGIC_TYPE_LDATE:
321 	case MAGIC_TYPE_UDATE:
322 	case MAGIC_TYPE_ULDATE:
323 	case MAGIC_TYPE_BELONG:
324 	case MAGIC_TYPE_UBELONG:
325 	case MAGIC_TYPE_BEFLOAT:
326 	case MAGIC_TYPE_BEDATE:
327 	case MAGIC_TYPE_BELDATE:
328 	case MAGIC_TYPE_UBEDATE:
329 	case MAGIC_TYPE_UBELDATE:
330 		n += 4 * MAGIC_STRENGTH_MULTIPLIER;
331 		break;
332 	case MAGIC_TYPE_QUAD:
333 	case MAGIC_TYPE_UQUAD:
334 	case MAGIC_TYPE_DOUBLE:
335 	case MAGIC_TYPE_QDATE:
336 	case MAGIC_TYPE_QLDATE:
337 	case MAGIC_TYPE_UQDATE:
338 	case MAGIC_TYPE_UQLDATE:
339 	case MAGIC_TYPE_BEQUAD:
340 	case MAGIC_TYPE_UBEQUAD:
341 	case MAGIC_TYPE_BEDOUBLE:
342 	case MAGIC_TYPE_BEQDATE:
343 	case MAGIC_TYPE_BEQLDATE:
344 	case MAGIC_TYPE_UBEQDATE:
345 	case MAGIC_TYPE_UBEQLDATE:
346 	case MAGIC_TYPE_LEQUAD:
347 	case MAGIC_TYPE_ULEQUAD:
348 	case MAGIC_TYPE_LEDOUBLE:
349 	case MAGIC_TYPE_LEQDATE:
350 	case MAGIC_TYPE_LEQLDATE:
351 	case MAGIC_TYPE_ULEQDATE:
352 	case MAGIC_TYPE_ULEQLDATE:
353 	case MAGIC_TYPE_LELONG:
354 	case MAGIC_TYPE_ULELONG:
355 	case MAGIC_TYPE_LEFLOAT:
356 	case MAGIC_TYPE_LEDATE:
357 	case MAGIC_TYPE_LELDATE:
358 	case MAGIC_TYPE_ULEDATE:
359 	case MAGIC_TYPE_ULELDATE:
360 	case MAGIC_TYPE_MELONG:
361 	case MAGIC_TYPE_MEDATE:
362 	case MAGIC_TYPE_MELDATE:
363 		n += 8 * MAGIC_STRENGTH_MULTIPLIER;
364 		break;
365 	case MAGIC_TYPE_STRING:
366 	case MAGIC_TYPE_PSTRING:
367 		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
368 		break;
369 	case MAGIC_TYPE_BESTRING16:
370 	case MAGIC_TYPE_LESTRING16:
371 		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
372 		break;
373 	case MAGIC_TYPE_REGEX:
374 	case MAGIC_TYPE_SEARCH:
375 		size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
376 		if (size < 1)
377 			size = 1;
378 		n += ml->test_string_size * size;
379 		break;
380 	}
381 	switch (ml->test_operator) {
382 	case '=':
383 		n += MAGIC_STRENGTH_MULTIPLIER;
384 		break;
385 	case '<':
386 	case '>':
387 	case '[':
388 	case ']':
389 		n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
390 		break;
391 	case '^':
392 	case '&':
393 		n -= MAGIC_STRENGTH_MULTIPLIER;
394 		break;
395 	}
396 
397 skip:
398 	switch (ml->strength_operator) {
399 	case '+':
400 		n += ml->strength_value;
401 		break;
402 	case '-':
403 		n -= ml->strength_value;
404 		break;
405 	case '*':
406 		n *= ml->strength_value;
407 		break;
408 	case '/':
409 		n /= ml->strength_value;
410 		break;
411 	}
412 	return (n <= 0 ? 1 : n);
413 }
414 
415 static int
416 magic_get_string(char **line, char *out, size_t *outlen)
417 {
418 	char	*start, *cp, c;
419 	int	 d0, d1, d2;
420 
421 	start = out;
422 	for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
423 		if (*cp != '\\') {
424 			*out++ = *cp;
425 			continue;
426 		}
427 
428 		switch (c = *++cp) {
429 		case '\0': /* end of line */
430 			return (-1);
431 		case ' ':
432 			*out++ = ' ';
433 			break;
434 		case '0':
435 		case '1':
436 		case '2':
437 		case '3':
438 		case '4':
439 		case '5':
440 		case '6':
441 		case '7':
442 			d0 = magic_odigit(cp[0]);
443 			if (cp[0] != '\0')
444 				d1 = magic_odigit(cp[1]);
445 			else
446 				d1 = -1;
447 			if (cp[0] != '\0' && cp[1] != '\0')
448 				d2 = magic_odigit(cp[2]);
449 			else
450 				d2 = -1;
451 
452 			if (d0 != -1 && d1 != -1 && d2 != -1) {
453 				*out = d2 | (d1 << 3) | (d0 << 6);
454 				cp += 2;
455 			} else if (d0 != -1 && d1 != -1) {
456 				*out = d1 | (d0 << 3);
457 				cp++;
458 			} else if (d0 != -1)
459 				*out = d0;
460 			else
461 				return (-1);
462 			out++;
463 			break;
464 		case 'x':
465 			d0 = magic_xdigit(cp[1]);
466 			if (cp[1] != '\0')
467 				d1 = magic_xdigit(cp[2]);
468 			else
469 				d1 = -1;
470 
471 			if (d0 != -1 && d1 != -1) {
472 				*out = d1 | (d0 << 4);
473 				cp += 2;
474 			} else if (d0 != -1) {
475 				*out = d0;
476 				cp++;
477 			} else
478 				return (-1);
479 			out++;
480 
481 			break;
482 		case 'a':
483 			*out++ = '\a';
484 			break;
485 		case 'b':
486 			*out++ = '\b';
487 			break;
488 		case 't':
489 			*out++ = '\t';
490 			break;
491 		case 'f':
492 			*out++ = '\f';
493 			break;
494 		case 'n':
495 			*out++ = '\n';
496 			break;
497 		case 'r':
498 			*out++ = '\r';
499 			break;
500 		case '\\':
501 			*out++ = '\\';
502 			break;
503 		case '\'':
504 			*out++ = '\'';
505 			break;
506 		case '\"':
507 			*out++ = '\"';
508 			break;
509 		default:
510 			*out++ = c;
511 			break;
512 		}
513 	}
514 	*out = '\0';
515 	*outlen = out - start;
516 
517 	*line = cp;
518 	return (0);
519 }
520 
521 static int
522 magic_parse_offset(struct magic_line *ml, char **line)
523 {
524 	char	*copy, *s, *cp, *endptr;
525 
526 	while (isspace((u_char)**line))
527 		(*line)++;
528 	copy = s = cp = xmalloc(strlen(*line) + 1);
529 	while (**line != '\0' && !isspace((u_char)**line))
530 		*cp++ = *(*line)++;
531 	*cp = '\0';
532 
533 	ml->offset = 0;
534 	ml->offset_relative = 0;
535 
536 	ml->indirect_type = ' ';
537 	ml->indirect_relative = 0;
538 	ml->indirect_offset = 0;
539 	ml->indirect_operator = ' ';
540 	ml->indirect_operand = 0;
541 
542 	if (*s == '&') {
543 		ml->offset_relative = 1;
544 		s++;
545 	}
546 
547 	if (*s != '(') {
548 		endptr = magic_strtoll(s, &ml->offset);
549 		if (endptr == NULL || *endptr != '\0') {
550 			magic_warn(ml, "missing closing bracket");
551 			goto fail;
552 		}
553 		if (ml->offset < 0 && !ml->offset_relative) {
554 			magic_warn(ml, "negative absolute offset");
555 			goto fail;
556 		}
557 		goto done;
558 	}
559 	s++;
560 
561 	if (*s == '&') {
562 		ml->indirect_relative = 1;
563 		s++;
564 	}
565 
566 	endptr = magic_strtoll(s, &ml->indirect_offset);
567 	if (endptr == NULL) {
568 		magic_warn(ml, "can't parse offset: %s", s);
569 		goto fail;
570 	}
571 	s = endptr;
572 	if (*s == ')')
573 		goto done;
574 
575 	if (*s == '.') {
576 		s++;
577 		if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
578 			magic_warn(ml, "unknown offset type: %c", *s);
579 			goto fail;
580 		}
581 		ml->indirect_type = *s;
582 		s++;
583 		if (*s == ')')
584 			goto done;
585 	}
586 
587 	if (*s == '\0' || strchr("+-*", *s) == NULL) {
588 		magic_warn(ml, "unknown offset operator: %c", *s);
589 		goto fail;
590 	}
591 	ml->indirect_operator = *s;
592 	s++;
593 	if (*s == ')')
594 		goto done;
595 
596 	if (*s == '(') {
597 		s++;
598 		endptr = magic_strtoll(s, &ml->indirect_operand);
599 		if (endptr == NULL || *endptr != ')') {
600 			magic_warn(ml, "missing closing bracket");
601 			goto fail;
602 		}
603 		if (*++endptr != ')') {
604 			magic_warn(ml, "missing closing bracket");
605 			goto fail;
606 		}
607 	} else {
608 		endptr = magic_strtoll(s, &ml->indirect_operand);
609 		if (endptr == NULL || *endptr != ')') {
610 			magic_warn(ml, "missing closing bracket");
611 			goto fail;
612 		}
613 	}
614 
615 done:
616 	free(copy);
617 	return (0);
618 
619 fail:
620 	free(copy);
621 	return (-1);
622 }
623 
624 static int
625 magic_parse_type(struct magic_line *ml, char **line)
626 {
627 	char	*copy, *s, *cp, *endptr;
628 
629 	while (isspace((u_char)**line))
630 		(*line)++;
631 	copy = s = cp = xmalloc(strlen(*line) + 1);
632 	while (**line != '\0' && !isspace((u_char)**line))
633 		*cp++ = *(*line)++;
634 	*cp = '\0';
635 
636 	ml->type = MAGIC_TYPE_NONE;
637 	ml->type_operator = ' ';
638 	ml->type_operand = 0;
639 
640 	if (strcmp(s, "name") == 0) {
641 		ml->type = MAGIC_TYPE_NAME;
642 		ml->type_string = xstrdup(s);
643 		goto done;
644 	}
645 	if (strcmp(s, "use") == 0) {
646 		ml->type = MAGIC_TYPE_USE;
647 		ml->type_string = xstrdup(s);
648 		goto done;
649 	}
650 
651 	if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
652 	    strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
653 		if (*s == 'u')
654 			ml->type_string = xstrdup(s + 1);
655 		else
656 			ml->type_string = xstrdup(s);
657 		ml->type = MAGIC_TYPE_STRING;
658 		magic_mark_text(ml, 0);
659 		goto done;
660 	}
661 	if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
662 	    strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
663 		if (*s == 'u')
664 			ml->type_string = xstrdup(s + 1);
665 		else
666 			ml->type_string = xstrdup(s);
667 		ml->type = MAGIC_TYPE_PSTRING;
668 		magic_mark_text(ml, 0);
669 		goto done;
670 	}
671 	if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
672 	    strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
673 		if (*s == 'u')
674 			ml->type_string = xstrdup(s + 1);
675 		else
676 			ml->type_string = xstrdup(s);
677 		ml->type = MAGIC_TYPE_SEARCH;
678 		goto done;
679 	}
680 	if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
681 	    strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
682 		if (*s == 'u')
683 			ml->type_string = xstrdup(s + 1);
684 		else
685 			ml->type_string = xstrdup(s);
686 		ml->type = MAGIC_TYPE_REGEX;
687 		goto done;
688 	}
689 	ml->type_string = xstrdup(s);
690 
691 	cp = &s[strcspn(s, "+-&/%*")];
692 	if (*cp != '\0') {
693 		ml->type_operator = *cp;
694 		endptr = magic_strtoull(cp + 1, &ml->type_operand);
695 		if (endptr == NULL || *endptr != '\0') {
696 			magic_warn(ml, "can't parse operand: %s", cp + 1);
697 			goto fail;
698 		}
699 		*cp = '\0';
700 	}
701 
702 	if (strcmp(s, "byte") == 0)
703 		ml->type = MAGIC_TYPE_BYTE;
704 	else if (strcmp(s, "short") == 0)
705 		ml->type = MAGIC_TYPE_SHORT;
706 	else if (strcmp(s, "long") == 0)
707 		ml->type = MAGIC_TYPE_LONG;
708 	else if (strcmp(s, "quad") == 0)
709 		ml->type = MAGIC_TYPE_QUAD;
710 	else if (strcmp(s, "ubyte") == 0)
711 		ml->type = MAGIC_TYPE_UBYTE;
712 	else if (strcmp(s, "ushort") == 0)
713 		ml->type = MAGIC_TYPE_USHORT;
714 	else if (strcmp(s, "ulong") == 0)
715 		ml->type = MAGIC_TYPE_ULONG;
716 	else if (strcmp(s, "uquad") == 0)
717 		ml->type = MAGIC_TYPE_UQUAD;
718 	else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
719 		ml->type = MAGIC_TYPE_FLOAT;
720 	else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
721 		ml->type = MAGIC_TYPE_DOUBLE;
722 	else if (strcmp(s, "date") == 0)
723 		ml->type = MAGIC_TYPE_DATE;
724 	else if (strcmp(s, "qdate") == 0)
725 		ml->type = MAGIC_TYPE_QDATE;
726 	else if (strcmp(s, "ldate") == 0)
727 		ml->type = MAGIC_TYPE_LDATE;
728 	else if (strcmp(s, "qldate") == 0)
729 		ml->type = MAGIC_TYPE_QLDATE;
730 	else if (strcmp(s, "udate") == 0)
731 		ml->type = MAGIC_TYPE_UDATE;
732 	else if (strcmp(s, "uqdate") == 0)
733 		ml->type = MAGIC_TYPE_UQDATE;
734 	else if (strcmp(s, "uldate") == 0)
735 		ml->type = MAGIC_TYPE_ULDATE;
736 	else if (strcmp(s, "uqldate") == 0)
737 		ml->type = MAGIC_TYPE_UQLDATE;
738 	else if (strcmp(s, "beshort") == 0)
739 		ml->type = MAGIC_TYPE_BESHORT;
740 	else if (strcmp(s, "belong") == 0)
741 		ml->type = MAGIC_TYPE_BELONG;
742 	else if (strcmp(s, "bequad") == 0)
743 		ml->type = MAGIC_TYPE_BEQUAD;
744 	else if (strcmp(s, "ubeshort") == 0)
745 		ml->type = MAGIC_TYPE_UBESHORT;
746 	else if (strcmp(s, "ubelong") == 0)
747 		ml->type = MAGIC_TYPE_UBELONG;
748 	else if (strcmp(s, "ubequad") == 0)
749 		ml->type = MAGIC_TYPE_UBEQUAD;
750 	else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
751 		ml->type = MAGIC_TYPE_BEFLOAT;
752 	else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
753 		ml->type = MAGIC_TYPE_BEDOUBLE;
754 	else if (strcmp(s, "bedate") == 0)
755 		ml->type = MAGIC_TYPE_BEDATE;
756 	else if (strcmp(s, "beqdate") == 0)
757 		ml->type = MAGIC_TYPE_BEQDATE;
758 	else if (strcmp(s, "beldate") == 0)
759 		ml->type = MAGIC_TYPE_BELDATE;
760 	else if (strcmp(s, "beqldate") == 0)
761 		ml->type = MAGIC_TYPE_BEQLDATE;
762 	else if (strcmp(s, "ubedate") == 0)
763 		ml->type = MAGIC_TYPE_UBEDATE;
764 	else if (strcmp(s, "ubeqdate") == 0)
765 		ml->type = MAGIC_TYPE_UBEQDATE;
766 	else if (strcmp(s, "ubeldate") == 0)
767 		ml->type = MAGIC_TYPE_UBELDATE;
768 	else if (strcmp(s, "ubeqldate") == 0)
769 		ml->type = MAGIC_TYPE_UBEQLDATE;
770 	else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
771 		ml->type = MAGIC_TYPE_BESTRING16;
772 	else if (strcmp(s, "leshort") == 0)
773 		ml->type = MAGIC_TYPE_LESHORT;
774 	else if (strcmp(s, "lelong") == 0)
775 		ml->type = MAGIC_TYPE_LELONG;
776 	else if (strcmp(s, "lequad") == 0)
777 		ml->type = MAGIC_TYPE_LEQUAD;
778 	else if (strcmp(s, "uleshort") == 0)
779 		ml->type = MAGIC_TYPE_ULESHORT;
780 	else if (strcmp(s, "ulelong") == 0)
781 		ml->type = MAGIC_TYPE_ULELONG;
782 	else if (strcmp(s, "ulequad") == 0)
783 		ml->type = MAGIC_TYPE_ULEQUAD;
784 	else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
785 		ml->type = MAGIC_TYPE_LEFLOAT;
786 	else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
787 		ml->type = MAGIC_TYPE_LEDOUBLE;
788 	else if (strcmp(s, "ledate") == 0)
789 		ml->type = MAGIC_TYPE_LEDATE;
790 	else if (strcmp(s, "leqdate") == 0)
791 		ml->type = MAGIC_TYPE_LEQDATE;
792 	else if (strcmp(s, "leldate") == 0)
793 		ml->type = MAGIC_TYPE_LELDATE;
794 	else if (strcmp(s, "leqldate") == 0)
795 		ml->type = MAGIC_TYPE_LEQLDATE;
796 	else if (strcmp(s, "uledate") == 0)
797 		ml->type = MAGIC_TYPE_ULEDATE;
798 	else if (strcmp(s, "uleqdate") == 0)
799 		ml->type = MAGIC_TYPE_ULEQDATE;
800 	else if (strcmp(s, "uleldate") == 0)
801 		ml->type = MAGIC_TYPE_ULELDATE;
802 	else if (strcmp(s, "uleqldate") == 0)
803 		ml->type = MAGIC_TYPE_ULEQLDATE;
804 	else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
805 		ml->type = MAGIC_TYPE_LESTRING16;
806 	else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
807 		ml->type = MAGIC_TYPE_MELONG;
808 	else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
809 		ml->type = MAGIC_TYPE_MEDATE;
810 	else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
811 		ml->type = MAGIC_TYPE_MELDATE;
812 	else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
813 		ml->type = MAGIC_TYPE_DEFAULT;
814 	else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
815 		ml->type = MAGIC_TYPE_CLEAR;
816 	else {
817 		magic_warn(ml, "unknown type: %s", s);
818 		goto fail;
819 	}
820 	magic_mark_text(ml, 0);
821 
822 done:
823 	free(copy);
824 	return (0);
825 
826 fail:
827 	free(copy);
828 	return (-1);
829 }
830 
831 static int
832 magic_parse_value(struct magic_line *ml, char **line)
833 {
834 	char	*copy, *s, *cp, *endptr;
835 	size_t	 slen;
836 	uint64_t u;
837 
838 	while (isspace((u_char)**line))
839 		(*line)++;
840 
841 	ml->test_operator = '=';
842 	ml->test_not = 0;
843 	ml->test_string = NULL;
844 	ml->test_string_size = 0;
845 	ml->test_unsigned = 0;
846 	ml->test_signed = 0;
847 
848 	if (**line == '\0')
849 		return (0);
850 
851 	s = *line;
852 	if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
853 		(*line)++;
854 		ml->test_operator = 'x';
855 		return (0);
856 	}
857 
858 	if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
859 		magic_warn(ml, "test specified for default or clear");
860 		ml->test_operator = 'x';
861 		return (0);
862 	}
863 
864 	if (**line == '!') {
865 		ml->test_not = 1;
866 		(*line)++;
867 	}
868 
869 	switch (ml->type) {
870 	case MAGIC_TYPE_NAME:
871 	case MAGIC_TYPE_USE:
872 		copy = s = xmalloc(strlen(*line) + 1);
873 		if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
874 			magic_warn(ml, "can't parse string");
875 			goto fail;
876 		}
877 		if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
878 			magic_warn(ml, "invalid name");
879 			goto fail;
880 		}
881 		ml->name = s;
882 		return (0); /* do not free */
883 	case MAGIC_TYPE_STRING:
884 	case MAGIC_TYPE_PSTRING:
885 	case MAGIC_TYPE_SEARCH:
886 		if (**line == '>' || **line == '<' || **line == '=') {
887 			ml->test_operator = **line;
888 			(*line)++;
889 		}
890 		/* FALLTHROUGH */
891 	case MAGIC_TYPE_REGEX:
892 		if (**line == '=')
893 			(*line)++;
894 		copy = s = xmalloc(strlen(*line) + 1);
895 		if (magic_get_string(line, s, &slen) != 0) {
896 			magic_warn(ml, "can't parse string");
897 			goto fail;
898 		}
899 		ml->test_string_size = slen;
900 		ml->test_string = s;
901 		return (0); /* do not free */
902 	default:
903 		break;
904 	}
905 
906 	while (isspace((u_char)**line))
907 		(*line)++;
908 	if ((*line)[0] == '<' && (*line)[1] == '=') {
909 		ml->test_operator = '[';
910 		(*line) += 2;
911 	} else if ((*line)[0] == '>' && (*line)[1] == '=') {
912 		ml->test_operator = ']';
913 		(*line) += 2;
914 	} else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
915 		ml->test_operator = **line;
916 		(*line)++;
917 	}
918 
919 	while (isspace((u_char)**line))
920 		(*line)++;
921 	copy = cp = xmalloc(strlen(*line) + 1);
922 	while (**line != '\0' && !isspace((u_char)**line))
923 		*cp++ = *(*line)++;
924 	*cp = '\0';
925 
926 	switch (ml->type) {
927 	case MAGIC_TYPE_FLOAT:
928 	case MAGIC_TYPE_DOUBLE:
929 	case MAGIC_TYPE_BEFLOAT:
930 	case MAGIC_TYPE_BEDOUBLE:
931 	case MAGIC_TYPE_LEFLOAT:
932 	case MAGIC_TYPE_LEDOUBLE:
933 		errno = 0;
934 		ml->test_double = strtod(copy, &endptr);
935 		if (errno == ERANGE)
936 			endptr = NULL;
937 		break;
938 	default:
939 		if (*ml->type_string == 'u')
940 			endptr = magic_strtoull(copy, &ml->test_unsigned);
941 		else {
942 			endptr = magic_strtoll(copy, &ml->test_signed);
943 			if (endptr == NULL || *endptr != '\0') {
944 				/*
945 				 * If we can't parse this as a signed number,
946 				 * try as unsigned instead.
947 				 */
948 				endptr = magic_strtoull(copy, &u);
949 				if (endptr != NULL && *endptr == '\0')
950 					ml->test_signed = (int64_t)u;
951 			}
952 		}
953 		break;
954 	}
955 	if (endptr == NULL || *endptr != '\0') {
956 		magic_warn(ml, "can't parse number: %s", copy);
957 		goto fail;
958 	}
959 
960 	free(copy);
961 	return (0);
962 
963 fail:
964 	free(copy);
965 	return (-1);
966 }
967 
968 int
969 magic_compare(struct magic_line *ml1, struct magic_line *ml2)
970 {
971 	if (ml1->strength < ml2->strength)
972 		return (1);
973 	if (ml1->strength > ml2->strength)
974 		return (-1);
975 
976 	/*
977 	 * The original file depends on the (undefined!) qsort(3) behaviour
978 	 * when the strength is equal. This is impossible to reproduce with an
979 	 * RB tree so just use the line number and hope for the best.
980 	 */
981 	if (ml1->line < ml2->line)
982 		return (-1);
983 	if (ml1->line > ml2->line)
984 		return (1);
985 
986 	return (0);
987 }
988 RB_GENERATE(magic_tree, magic_line, node, magic_compare);
989 
990 int
991 magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
992 {
993 	return (strcmp(ml1->name, ml2->name));
994 }
995 RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);
996 
997 static void
998 magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
999     char *line)
1000 {
1001 	char	*cp, *s;
1002 	int64_t	 value;
1003 
1004 	cp = line + (sizeof "!:strength") - 1;
1005 	while (isspace((u_char)*cp))
1006 		cp++;
1007 	s = cp;
1008 
1009 	cp = strchr(s, '#');
1010 	if (cp != NULL)
1011 		*cp = '\0';
1012 	cp = s;
1013 
1014 	if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1015 		magic_warnm(m, at, "invalid strength operator: %s", s);
1016 		return;
1017 	}
1018 	ml->strength_operator = *cp++;
1019 
1020 	while (isspace((u_char)*cp))
1021 		cp++;
1022 	cp = magic_strtoll(cp, &value);
1023 	while (cp != NULL && isspace((u_char)*cp))
1024 		cp++;
1025 	if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
1026 		magic_warnm(m, at, "invalid strength value: %s", s);
1027 		return;
1028 	}
1029 	ml->strength_value = value;
1030 }
1031 
1032 static void
1033 magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1034 {
1035 	char	*mimetype, *cp;
1036 
1037 	mimetype = line + (sizeof "!:mime") - 1;
1038 	while (isspace((u_char)*mimetype))
1039 		mimetype++;
1040 
1041 	cp = strchr(mimetype, '#');
1042 	if (cp != NULL)
1043 		*cp = '\0';
1044 
1045 	if (*mimetype != '\0') {
1046 		cp = mimetype + strlen(mimetype) - 1;
1047 		while (cp != mimetype && isspace((u_char)*cp))
1048 			*cp-- = '\0';
1049 	}
1050 
1051 	cp = mimetype;
1052 	while (*cp != '\0') {
1053 		if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1054 			break;
1055 		cp++;
1056 	}
1057 	if (*mimetype == '\0' || *cp != '\0') {
1058 		magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1059 		return;
1060 	}
1061 	if (ml == NULL) {
1062 		magic_warnm(m, at, "stray MIME type: %s", mimetype);
1063 		return;
1064 	}
1065 	ml->mimetype = xstrdup(mimetype);
1066 }
1067 
1068 struct magic *
1069 magic_load(FILE *f, const char *path, int warnings)
1070 {
1071 	struct magic		*m;
1072 	struct magic_line	*ml = NULL, *parent, *parent0;
1073 	char			*line, *tmp;
1074 	size_t			 size;
1075 	ssize_t			 slen;
1076 	u_int			 at, level, n, i;
1077 
1078 	m = xcalloc(1, sizeof *m);
1079 	m->path = xstrdup(path);
1080 	m->warnings = warnings;
1081 	RB_INIT(&m->tree);
1082 
1083 	parent = NULL;
1084 	parent0 = NULL;
1085 	level = 0;
1086 
1087 	at = 0;
1088 	tmp = NULL;
1089 	size = 0;
1090 	while ((slen = getline(&tmp, &size, f)) != -1) {
1091 		line = tmp;
1092 		if (line[slen - 1] == '\n')
1093 			line[slen - 1] = '\0';
1094 
1095 		at++;
1096 
1097 		while (isspace((u_char)*line))
1098 		    line++;
1099 		if (*line == '\0' || *line == '#')
1100 			continue;
1101 
1102 		if (strncmp (line, "!:mime", 6) == 0) {
1103 			magic_set_mimetype(m, at, ml, line);
1104 			continue;
1105 		}
1106 		if (strncmp (line, "!:strength", 10) == 0) {
1107 			magic_adjust_strength(m, at, ml, line);
1108 			continue;
1109 		}
1110 		if (strncmp (line, "!:", 2) == 0) {
1111 			for (i = 0; i < 64 && line[i] != '\0'; i++) {
1112 				if (isspace((u_char)line[i]))
1113 					break;
1114 			}
1115 			magic_warnm(m, at, "%.*s not supported", i, line);
1116 			continue;
1117 		}
1118 
1119 		n = 0;
1120 		for (; *line == '>'; line++)
1121 			n++;
1122 
1123 		ml = xcalloc(1, sizeof *ml);
1124 		ml->root = m;
1125 		ml->line = at;
1126 		ml->type = MAGIC_TYPE_NONE;
1127 		TAILQ_INIT(&ml->children);
1128 		ml->text = 1;
1129 
1130 		/*
1131 		 * At this point n is the level we want, level is the current
1132 		 * level. parent0 is the last line at the same level and parent
1133 		 * is the last line at the previous level.
1134 		 */
1135 		if (n == level + 1) {
1136 			parent = parent0;
1137 		} else if (n < level) {
1138 			for (i = n; i < level && parent != NULL; i++)
1139 				parent = parent->parent;
1140 		} else if (n != level) {
1141 			magic_warn(ml, "level skipped (%u->%u)", level, n);
1142 			free(ml);
1143 			continue;
1144 		}
1145 		ml->parent = parent;
1146 		level = n;
1147 
1148 		if (magic_parse_offset(ml, &line) != 0 ||
1149 		    magic_parse_type(ml, &line) != 0 ||
1150 		    magic_parse_value(ml, &line) != 0 ||
1151 		    magic_set_result(ml, line) != 0) {
1152 			/*
1153 			 * An invalid line still needs to appear in the tree in
1154 			 * case it has any children.
1155 			 */
1156 			ml->type = MAGIC_TYPE_NONE;
1157 		}
1158 
1159 		ml->strength = magic_get_strength(ml);
1160 		if (ml->parent == NULL) {
1161 			if (ml->name != NULL)
1162 				RB_INSERT(magic_named_tree, &m->named, ml);
1163 			else
1164 				RB_INSERT(magic_tree, &m->tree, ml);
1165 		} else
1166 			TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1167 		parent0 = ml;
1168 	}
1169 	free(tmp);
1170 	if (ferror(f))
1171 		err(1, "%s", path);
1172 
1173 	return (m);
1174 }
1175