1 /* $NetBSD: funcs.c,v 1.10 2015/01/02 21:15:32 christos Exp $ */
2
3 /*
4 * Copyright (c) Christos Zoulas 2003.
5 * All Rights Reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice immediately at the beginning of the file, without modification,
12 * this list of conditions, and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include "file.h"
30
31 #ifndef lint
32 #if 0
33 FILE_RCSID("@(#)$File: funcs.c,v 1.79 2014/12/16 20:52:49 christos Exp $")
34 #else
35 __RCSID("$NetBSD: funcs.c,v 1.10 2015/01/02 21:15:32 christos Exp $");
36 #endif
37 #endif /* lint */
38
39 #include "magic.h"
40 #include <assert.h>
41 #include <stdarg.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <ctype.h>
45 #if defined(HAVE_WCHAR_H)
46 #include <wchar.h>
47 #endif
48 #if defined(HAVE_WCTYPE_H)
49 #include <wctype.h>
50 #endif
51 #if defined(HAVE_LIMITS_H)
52 #include <limits.h>
53 #endif
54
55 #ifndef SIZE_MAX
56 #define SIZE_MAX ((size_t)~0)
57 #endif
58
59 /*
60 * Like printf, only we append to a buffer.
61 */
62 protected int
file_vprintf(struct magic_set * ms,const char * fmt,va_list ap)63 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap)
64 {
65 int len;
66 char *buf, *newstr;
67
68 if (ms->event_flags & EVENT_HAD_ERR)
69 return 0;
70 len = vasprintf(&buf, fmt, ap);
71 if (len < 0)
72 goto out;
73
74 if (ms->o.buf != NULL) {
75 len = asprintf(&newstr, "%s%s", ms->o.buf, buf);
76 free(buf);
77 if (len < 0)
78 goto out;
79 free(ms->o.buf);
80 buf = newstr;
81 }
82 ms->o.buf = buf;
83 return 0;
84 out:
85 file_error(ms, errno, "vasprintf failed");
86 return -1;
87 }
88
89 protected int
file_printf(struct magic_set * ms,const char * fmt,...)90 file_printf(struct magic_set *ms, const char *fmt, ...)
91 {
92 int rv;
93 va_list ap;
94
95 va_start(ap, fmt);
96 rv = file_vprintf(ms, fmt, ap);
97 va_end(ap);
98 return rv;
99 }
100
101 /*
102 * error - print best error message possible
103 */
104 /*VARARGS*/
105 __attribute__((__format__(__printf__, 3, 0)))
106 private void
file_error_core(struct magic_set * ms,int error,const char * f,va_list va,size_t lineno)107 file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
108 size_t lineno)
109 {
110 /* Only the first error is ok */
111 if (ms->event_flags & EVENT_HAD_ERR)
112 return;
113 if (lineno != 0) {
114 free(ms->o.buf);
115 ms->o.buf = NULL;
116 file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno);
117 }
118 file_vprintf(ms, f, va);
119 if (error > 0)
120 file_printf(ms, " (%s)", strerror(error));
121 ms->event_flags |= EVENT_HAD_ERR;
122 ms->error = error;
123 }
124
125 /*VARARGS*/
126 protected void
file_error(struct magic_set * ms,int error,const char * f,...)127 file_error(struct magic_set *ms, int error, const char *f, ...)
128 {
129 va_list va;
130 va_start(va, f);
131 file_error_core(ms, error, f, va, 0);
132 va_end(va);
133 }
134
135 /*
136 * Print an error with magic line number.
137 */
138 /*VARARGS*/
139 protected void
file_magerror(struct magic_set * ms,const char * f,...)140 file_magerror(struct magic_set *ms, const char *f, ...)
141 {
142 va_list va;
143 va_start(va, f);
144 file_error_core(ms, 0, f, va, ms->line);
145 va_end(va);
146 }
147
148 protected void
file_oomem(struct magic_set * ms,size_t len)149 file_oomem(struct magic_set *ms, size_t len)
150 {
151 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes",
152 len);
153 }
154
155 protected void
file_badseek(struct magic_set * ms)156 file_badseek(struct magic_set *ms)
157 {
158 file_error(ms, errno, "error seeking");
159 }
160
161 protected void
file_badread(struct magic_set * ms)162 file_badread(struct magic_set *ms)
163 {
164 file_error(ms, errno, "error reading");
165 }
166
167 #ifndef COMPILE_ONLY
168 /*ARGSUSED*/
169 protected int
file_buffer(struct magic_set * ms,int fd,const char * inname,const void * buf,size_t nb)170 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)),
171 const void *buf, size_t nb)
172 {
173 int m = 0, rv = 0, looks_text = 0;
174 int mime = ms->flags & MAGIC_MIME;
175 const unsigned char *ubuf = CAST(const unsigned char *, buf);
176 unichar *u8buf = NULL;
177 size_t ulen;
178 const char *code = NULL;
179 const char *code_mime = "binary";
180 const char *type = "application/octet-stream";
181 const char *def = "data";
182 const char *ftype = NULL;
183
184 if (nb == 0) {
185 def = "empty";
186 type = "application/x-empty";
187 goto simple;
188 } else if (nb == 1) {
189 def = "very short file (no magic)";
190 goto simple;
191 }
192
193 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
194 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen,
195 &code, &code_mime, &ftype);
196 }
197
198 #ifdef __EMX__
199 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
200 switch (file_os2_apptype(ms, inname, buf, nb)) {
201 case -1:
202 return -1;
203 case 0:
204 break;
205 default:
206 return 1;
207 }
208 }
209 #endif
210 #if HAVE_FORK
211 /* try compression stuff */
212 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0)
213 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) {
214 if ((ms->flags & MAGIC_DEBUG) != 0)
215 (void)fprintf(stderr, "zmagic %d\n", m);
216 goto done_encoding;
217 }
218 #endif
219 /* Check if we have a tar file */
220 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0)
221 if ((m = file_is_tar(ms, ubuf, nb)) != 0) {
222 if ((ms->flags & MAGIC_DEBUG) != 0)
223 (void)fprintf(stderr, "tar %d\n", m);
224 goto done;
225 }
226
227 /* Check if we have a CDF file */
228 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0)
229 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) {
230 if ((ms->flags & MAGIC_DEBUG) != 0)
231 (void)fprintf(stderr, "cdf %d\n", m);
232 goto done;
233 }
234
235 /* try soft magic tests */
236 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0)
237 if ((m = file_softmagic(ms, ubuf, nb, 0, NULL, BINTEST,
238 looks_text)) != 0) {
239 if ((ms->flags & MAGIC_DEBUG) != 0)
240 (void)fprintf(stderr, "softmagic %d\n", m);
241 #ifdef BUILTIN_ELF
242 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
243 nb > 5 && fd != -1) {
244 /*
245 * We matched something in the file, so this
246 * *might* be an ELF file, and the file is at
247 * least 5 bytes long, so if it's an ELF file
248 * it has at least one byte past the ELF magic
249 * number - try extracting information from the
250 * ELF headers that cannot easily * be
251 * extracted with rules in the magic file.
252 */
253 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0)
254 if ((ms->flags & MAGIC_DEBUG) != 0)
255 (void)fprintf(stderr,
256 "elf %d\n", m);
257 }
258 #endif
259 goto done;
260 }
261
262 /* try text properties */
263 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
264
265 if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) {
266 if ((ms->flags & MAGIC_DEBUG) != 0)
267 (void)fprintf(stderr, "ascmagic %d\n", m);
268 goto done;
269 }
270 }
271
272 simple:
273 /* give up */
274 m = 1;
275 if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
276 file_printf(ms, "%s", mime ? type : def) == -1) {
277 rv = -1;
278 }
279 done:
280 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
281 if (ms->flags & MAGIC_MIME_TYPE)
282 if (file_printf(ms, "; charset=") == -1)
283 rv = -1;
284 if (file_printf(ms, "%s", code_mime) == -1)
285 rv = -1;
286 }
287 #if HAVE_FORK
288 done_encoding:
289 #endif
290 free(u8buf);
291 if (rv)
292 return rv;
293
294 return m;
295 }
296 #endif
297
298 protected int
file_reset(struct magic_set * ms)299 file_reset(struct magic_set *ms)
300 {
301 if (ms->mlist[0] == NULL) {
302 file_error(ms, 0, "no magic files loaded");
303 return -1;
304 }
305 if (ms->o.buf) {
306 free(ms->o.buf);
307 ms->o.buf = NULL;
308 }
309 if (ms->o.pbuf) {
310 free(ms->o.pbuf);
311 ms->o.pbuf = NULL;
312 }
313 ms->event_flags &= ~EVENT_HAD_ERR;
314 ms->error = -1;
315 return 0;
316 }
317
318 #define OCTALIFY(n, o) \
319 /*LINTED*/ \
320 (void)(*(n)++ = '\\', \
321 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \
322 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \
323 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \
324 (o)++)
325
326 protected const char *
file_getbuffer(struct magic_set * ms)327 file_getbuffer(struct magic_set *ms)
328 {
329 char *pbuf, *op, *np;
330 size_t psize, len;
331
332 if (ms->event_flags & EVENT_HAD_ERR)
333 return NULL;
334
335 if (ms->flags & MAGIC_RAW)
336 return ms->o.buf;
337
338 if (ms->o.buf == NULL)
339 return NULL;
340
341 /* * 4 is for octal representation, + 1 is for NUL */
342 len = strlen(ms->o.buf);
343 if (len > (SIZE_MAX - 1) / 4) {
344 file_oomem(ms, len);
345 return NULL;
346 }
347 psize = len * 4 + 1;
348 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) {
349 file_oomem(ms, psize);
350 return NULL;
351 }
352 ms->o.pbuf = pbuf;
353
354 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
355 {
356 mbstate_t state;
357 wchar_t nextchar;
358 int mb_conv = 1;
359 size_t bytesconsumed;
360 char *eop;
361 (void)memset(&state, 0, sizeof(mbstate_t));
362
363 np = ms->o.pbuf;
364 op = ms->o.buf;
365 eop = op + len;
366
367 while (op < eop) {
368 bytesconsumed = mbrtowc(&nextchar, op,
369 (size_t)(eop - op), &state);
370 if (bytesconsumed == (size_t)(-1) ||
371 bytesconsumed == (size_t)(-2)) {
372 mb_conv = 0;
373 break;
374 }
375
376 if (iswprint(nextchar)) {
377 (void)memcpy(np, op, bytesconsumed);
378 op += bytesconsumed;
379 np += bytesconsumed;
380 } else {
381 while (bytesconsumed-- > 0)
382 OCTALIFY(np, op);
383 }
384 }
385 *np = '\0';
386
387 /* Parsing succeeded as a multi-byte sequence */
388 if (mb_conv != 0)
389 return ms->o.pbuf;
390 }
391 #endif
392
393 for (np = ms->o.pbuf, op = ms->o.buf; *op;) {
394 if (isprint((unsigned char)*op)) {
395 *np++ = *op++;
396 } else {
397 OCTALIFY(np, op);
398 }
399 }
400 *np = '\0';
401 return ms->o.pbuf;
402 }
403
404 protected int
file_check_mem(struct magic_set * ms,unsigned int level)405 file_check_mem(struct magic_set *ms, unsigned int level)
406 {
407 size_t len;
408
409 if (level >= ms->c.len) {
410 len = (ms->c.len += 20) * sizeof(*ms->c.li);
411 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ?
412 malloc(len) :
413 realloc(ms->c.li, len));
414 if (ms->c.li == NULL) {
415 file_oomem(ms, len);
416 return -1;
417 }
418 }
419 ms->c.li[level].got_match = 0;
420 #ifdef ENABLE_CONDITIONALS
421 ms->c.li[level].last_match = 0;
422 ms->c.li[level].last_cond = COND_NONE;
423 #endif /* ENABLE_CONDITIONALS */
424 return 0;
425 }
426
427 protected size_t
file_printedlen(const struct magic_set * ms)428 file_printedlen(const struct magic_set *ms)
429 {
430 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf);
431 }
432
433 protected int
file_replace(struct magic_set * ms,const char * pat,const char * rep)434 file_replace(struct magic_set *ms, const char *pat, const char *rep)
435 {
436 file_regex_t rx;
437 int rc, rv = -1;
438
439 rc = file_regcomp(&rx, pat, REG_EXTENDED);
440 if (rc) {
441 file_regerror(&rx, rc, ms);
442 } else {
443 regmatch_t rm;
444 int nm = 0;
445 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) {
446 ms->o.buf[rm.rm_so] = '\0';
447 if (file_printf(ms, "%s%s", rep,
448 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1)
449 goto out;
450 nm++;
451 }
452 rv = nm;
453 }
454 out:
455 file_regfree(&rx);
456 return rv;
457 }
458
459 protected int
file_regcomp(file_regex_t * rx,const char * pat,int flags)460 file_regcomp(file_regex_t *rx, const char *pat, int flags)
461 {
462 #ifdef USE_C_LOCALE
463 rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
464 assert(rx->c_lc_ctype != NULL);
465 rx->old_lc_ctype = uselocale(rx->c_lc_ctype);
466 assert(rx->old_lc_ctype != NULL);
467 #endif
468 rx->pat = pat;
469
470 return rx->rc = regcomp(&rx->rx, pat, flags);
471 }
472
473 protected int
file_regexec(file_regex_t * rx,const char * str,size_t nmatch,regmatch_t * pmatch,int eflags)474 file_regexec(file_regex_t *rx, const char *str, size_t nmatch,
475 regmatch_t* pmatch, int eflags)
476 {
477 assert(rx->rc == 0);
478 return regexec(&rx->rx, str, nmatch, pmatch, eflags);
479 }
480
481 protected void
file_regfree(file_regex_t * rx)482 file_regfree(file_regex_t *rx)
483 {
484 if (rx->rc == 0)
485 regfree(&rx->rx);
486 #ifdef USE_C_LOCALE
487 (void)uselocale(rx->old_lc_ctype);
488 freelocale(rx->c_lc_ctype);
489 #endif
490 }
491
492 protected void
file_regerror(file_regex_t * rx,int rc,struct magic_set * ms)493 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms)
494 {
495 char errmsg[512];
496
497 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg));
498 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat,
499 errmsg);
500 }
501
502 protected file_pushbuf_t *
file_push_buffer(struct magic_set * ms)503 file_push_buffer(struct magic_set *ms)
504 {
505 file_pushbuf_t *pb;
506
507 if (ms->event_flags & EVENT_HAD_ERR)
508 return NULL;
509
510 if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL)
511 return NULL;
512
513 pb->buf = ms->o.buf;
514 pb->offset = ms->offset;
515
516 ms->o.buf = NULL;
517 ms->offset = 0;
518
519 return pb;
520 }
521
522 protected char *
file_pop_buffer(struct magic_set * ms,file_pushbuf_t * pb)523 file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb)
524 {
525 char *rbuf;
526
527 if (ms->event_flags & EVENT_HAD_ERR) {
528 free(pb->buf);
529 free(pb);
530 return NULL;
531 }
532
533 rbuf = ms->o.buf;
534
535 ms->o.buf = pb->buf;
536 ms->offset = pb->offset;
537
538 free(pb);
539 return rbuf;
540 }
541
542 /*
543 * convert string to ascii printable format.
544 */
545 protected char *
file_printable(char * buf,size_t bufsiz,const char * str)546 file_printable(char *buf, size_t bufsiz, const char *str)
547 {
548 char *ptr, *eptr;
549 const unsigned char *s = (const unsigned char *)str;
550
551 for (ptr = buf, eptr = ptr + bufsiz - 1; ptr < eptr && *s; s++) {
552 if (isprint(*s)) {
553 *ptr++ = *s;
554 continue;
555 }
556 if (ptr >= eptr - 3)
557 break;
558 *ptr++ = '\\';
559 *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0';
560 *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0';
561 *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0';
562 }
563 *ptr = '\0';
564 return buf;
565 }
566