1 /* 2 * Copyright (c) Christos Zoulas 2003. 3 * All Rights Reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 #include "file.h" 28 29 #ifndef lint 30 FILE_RCSID("@(#)$File: funcs.c,v 1.60 2011/12/08 12:38:24 rrt Exp $") 31 #endif /* lint */ 32 33 #include "magic.h" 34 #include <stdarg.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <ctype.h> 38 #if defined(HAVE_WCHAR_H) 39 #include <wchar.h> 40 #endif 41 #if defined(HAVE_WCTYPE_H) 42 #include <wctype.h> 43 #endif 44 #if defined(HAVE_LIMITS_H) 45 #include <limits.h> 46 #endif 47 48 #ifndef SIZE_MAX 49 #define SIZE_MAX ((size_t)~0) 50 #endif 51 52 /* 53 * Like printf, only we append to a buffer. 54 */ 55 protected int 56 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 57 { 58 int len; 59 char *buf, *newstr; 60 61 len = vasprintf(&buf, fmt, ap); 62 if (len < 0) 63 goto out; 64 65 if (ms->o.buf != NULL) { 66 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 67 free(buf); 68 if (len < 0) 69 goto out; 70 free(ms->o.buf); 71 buf = newstr; 72 } 73 ms->o.buf = buf; 74 return 0; 75 out: 76 file_error(ms, errno, "vasprintf failed"); 77 return -1; 78 } 79 80 protected int 81 file_printf(struct magic_set *ms, const char *fmt, ...) 82 { 83 int rv; 84 va_list ap; 85 86 va_start(ap, fmt); 87 rv = file_vprintf(ms, fmt, ap); 88 va_end(ap); 89 return rv; 90 } 91 92 /* 93 * error - print best error message possible 94 */ 95 /*VARARGS*/ 96 private void 97 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 98 size_t lineno) 99 { 100 /* Only the first error is ok */ 101 if (ms->event_flags & EVENT_HAD_ERR) 102 return; 103 if (lineno != 0) { 104 free(ms->o.buf); 105 ms->o.buf = NULL; 106 file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno); 107 } 108 file_vprintf(ms, f, va); 109 if (error > 0) 110 file_printf(ms, " (%s)", strerror(error)); 111 ms->event_flags |= EVENT_HAD_ERR; 112 ms->error = error; 113 } 114 115 /*VARARGS*/ 116 protected void 117 file_error(struct magic_set *ms, int error, const char *f, ...) 118 { 119 va_list va; 120 va_start(va, f); 121 file_error_core(ms, error, f, va, 0); 122 va_end(va); 123 } 124 125 /* 126 * Print an error with magic line number. 127 */ 128 /*VARARGS*/ 129 protected void 130 file_magerror(struct magic_set *ms, const char *f, ...) 131 { 132 va_list va; 133 va_start(va, f); 134 file_error_core(ms, 0, f, va, ms->line); 135 va_end(va); 136 } 137 138 protected void 139 file_oomem(struct magic_set *ms, size_t len) 140 { 141 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 142 len); 143 } 144 145 protected void 146 file_badseek(struct magic_set *ms) 147 { 148 file_error(ms, errno, "error seeking"); 149 } 150 151 protected void 152 file_badread(struct magic_set *ms) 153 { 154 file_error(ms, errno, "error reading"); 155 } 156 157 #ifndef COMPILE_ONLY 158 protected int 159 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unused)), 160 const void *buf, size_t nb) 161 { 162 int m = 0, rv = 0, looks_text = 0; 163 int mime = ms->flags & MAGIC_MIME; 164 const unsigned char *ubuf = CAST(const unsigned char *, buf); 165 unichar *u8buf = NULL; 166 size_t ulen; 167 const char *code = NULL; 168 const char *code_mime = "binary"; 169 const char *type = NULL; 170 171 172 173 if (nb == 0) { 174 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 175 file_printf(ms, mime ? "application/x-empty" : 176 "empty") == -1) 177 return -1; 178 return 1; 179 } else if (nb == 1) { 180 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 181 file_printf(ms, mime ? "application/octet-stream" : 182 "very short file (no magic)") == -1) 183 return -1; 184 return 1; 185 } 186 187 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 188 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 189 &code, &code_mime, &type); 190 } 191 192 #ifdef __EMX__ 193 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 194 switch (file_os2_apptype(ms, inname, buf, nb)) { 195 case -1: 196 return -1; 197 case 0: 198 break; 199 default: 200 return 1; 201 } 202 } 203 #endif 204 #if HAVE_FORK 205 /* try compression stuff */ 206 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 207 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 208 if ((ms->flags & MAGIC_DEBUG) != 0) 209 (void)fprintf(stderr, "zmagic %d\n", m); 210 goto done; 211 } 212 #endif 213 /* Check if we have a tar file */ 214 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 215 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 216 if ((ms->flags & MAGIC_DEBUG) != 0) 217 (void)fprintf(stderr, "tar %d\n", m); 218 goto done; 219 } 220 221 /* Check if we have a CDF file */ 222 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 223 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 224 if ((ms->flags & MAGIC_DEBUG) != 0) 225 (void)fprintf(stderr, "cdf %d\n", m); 226 goto done; 227 } 228 229 /* try soft magic tests */ 230 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 231 if ((m = file_softmagic(ms, ubuf, nb, BINTEST, 232 looks_text)) != 0) { 233 if ((ms->flags & MAGIC_DEBUG) != 0) 234 (void)fprintf(stderr, "softmagic %d\n", m); 235 #ifdef BUILTIN_ELF 236 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 237 nb > 5 && fd != -1) { 238 /* 239 * We matched something in the file, so this 240 * *might* be an ELF file, and the file is at 241 * least 5 bytes long, so if it's an ELF file 242 * it has at least one byte past the ELF magic 243 * number - try extracting information from the 244 * ELF headers that cannot easily * be 245 * extracted with rules in the magic file. 246 */ 247 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 248 if ((ms->flags & MAGIC_DEBUG) != 0) 249 (void)fprintf(stderr, 250 "elf %d\n", m); 251 } 252 #endif 253 goto done; 254 } 255 256 /* try text properties */ 257 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 258 259 if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) { 260 if ((ms->flags & MAGIC_DEBUG) != 0) 261 (void)fprintf(stderr, "ascmagic %d\n", m); 262 goto done; 263 } 264 265 /* try to discover text encoding */ 266 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 267 if (looks_text == 0) 268 if ((m = file_ascmagic_with_encoding( ms, ubuf, 269 nb, u8buf, ulen, code, type, looks_text)) 270 != 0) { 271 if ((ms->flags & MAGIC_DEBUG) != 0) 272 (void)fprintf(stderr, 273 "ascmagic/enc %d\n", m); 274 goto done; 275 } 276 } 277 } 278 279 /* give up */ 280 m = 1; 281 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 282 file_printf(ms, mime ? "application/octet-stream" : "data") == -1) { 283 rv = -1; 284 } 285 done: 286 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 287 if (ms->flags & MAGIC_MIME_TYPE) 288 if (file_printf(ms, "; charset=") == -1) 289 rv = -1; 290 if (file_printf(ms, "%s", code_mime) == -1) 291 rv = -1; 292 } 293 free(u8buf); 294 if (rv) 295 return rv; 296 297 return m; 298 } 299 #endif 300 301 protected int 302 file_reset(struct magic_set *ms) 303 { 304 if (ms->mlist == NULL) { 305 file_error(ms, 0, "no magic files loaded"); 306 return -1; 307 } 308 if (ms->o.buf) { 309 free(ms->o.buf); 310 ms->o.buf = NULL; 311 } 312 if (ms->o.pbuf) { 313 free(ms->o.pbuf); 314 ms->o.pbuf = NULL; 315 } 316 ms->event_flags &= ~EVENT_HAD_ERR; 317 ms->error = -1; 318 return 0; 319 } 320 321 #define OCTALIFY(n, o) \ 322 /*LINTED*/ \ 323 (void)(*(n)++ = '\\', \ 324 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 325 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 326 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 327 (o)++) 328 329 protected const char * 330 file_getbuffer(struct magic_set *ms) 331 { 332 char *pbuf, *op, *np; 333 size_t psize, len; 334 335 if (ms->event_flags & EVENT_HAD_ERR) 336 return NULL; 337 338 if (ms->flags & MAGIC_RAW) 339 return ms->o.buf; 340 341 if (ms->o.buf == NULL) 342 return NULL; 343 344 /* * 4 is for octal representation, + 1 is for NUL */ 345 len = strlen(ms->o.buf); 346 if (len > (SIZE_MAX - 1) / 4) { 347 file_oomem(ms, len); 348 return NULL; 349 } 350 psize = len * 4 + 1; 351 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 352 file_oomem(ms, psize); 353 return NULL; 354 } 355 ms->o.pbuf = pbuf; 356 357 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 358 { 359 mbstate_t state; 360 wchar_t nextchar; 361 int mb_conv = 1; 362 size_t bytesconsumed; 363 char *eop; 364 (void)memset(&state, 0, sizeof(mbstate_t)); 365 366 np = ms->o.pbuf; 367 op = ms->o.buf; 368 eop = op + len; 369 370 while (op < eop) { 371 bytesconsumed = mbrtowc(&nextchar, op, 372 (size_t)(eop - op), &state); 373 if (bytesconsumed == (size_t)(-1) || 374 bytesconsumed == (size_t)(-2)) { 375 mb_conv = 0; 376 break; 377 } 378 379 if (iswprint(nextchar)) { 380 (void)memcpy(np, op, bytesconsumed); 381 op += bytesconsumed; 382 np += bytesconsumed; 383 } else { 384 while (bytesconsumed-- > 0) 385 OCTALIFY(np, op); 386 } 387 } 388 *np = '\0'; 389 390 /* Parsing succeeded as a multi-byte sequence */ 391 if (mb_conv != 0) 392 return ms->o.pbuf; 393 } 394 #endif 395 396 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 397 if (isprint((unsigned char)*op)) { 398 *np++ = *op++; 399 } else { 400 OCTALIFY(np, op); 401 } 402 } 403 *np = '\0'; 404 return ms->o.pbuf; 405 } 406 407 protected int 408 file_check_mem(struct magic_set *ms, unsigned int level) 409 { 410 size_t len; 411 412 if (level >= ms->c.len) { 413 len = (ms->c.len += 20) * sizeof(*ms->c.li); 414 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 415 malloc(len) : 416 realloc(ms->c.li, len)); 417 if (ms->c.li == NULL) { 418 file_oomem(ms, len); 419 return -1; 420 } 421 } 422 ms->c.li[level].got_match = 0; 423 #ifdef ENABLE_CONDITIONALS 424 ms->c.li[level].last_match = 0; 425 ms->c.li[level].last_cond = COND_NONE; 426 #endif /* ENABLE_CONDITIONALS */ 427 return 0; 428 } 429 430 protected size_t 431 file_printedlen(const struct magic_set *ms) 432 { 433 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 434 } 435 436 protected int 437 file_replace(struct magic_set *ms, const char *pat, const char *rep) 438 { 439 regex_t rx; 440 int rc; 441 442 rc = regcomp(&rx, pat, REG_EXTENDED); 443 if (rc) { 444 char errmsg[512]; 445 (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); 446 file_magerror(ms, "regex error %d, (%s)", rc, errmsg); 447 return -1; 448 } else { 449 regmatch_t rm; 450 int nm = 0; 451 while (regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 452 ms->o.buf[rm.rm_so] = '\0'; 453 if (file_printf(ms, "%s%s", rep, 454 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 455 return -1; 456 nm++; 457 } 458 regfree(&rx); 459 return nm; 460 } 461 } 462