1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * mod_mime_magic: MIME type lookup via file magic numbers
19 * Copyright (c) 1996-1997 Cisco Systems, Inc.
20 *
21 * This software was submitted by Cisco Systems to the Apache Software Foundation in July
22 * 1997. Future revisions and derivatives of this source code must
23 * acknowledge Cisco Systems as the original contributor of this module.
24 * All other licensing and usage conditions are those of the Apache Software Foundation.
25 *
26 * Some of this code is derived from the free version of the file command
27 * originally posted to comp.sources.unix. Copyright info for that program
28 * is included below as required.
29 * ---------------------------------------------------------------------------
30 * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin.
31 *
32 * This software is not subject to any license of the American Telephone and
33 * Telegraph Company or of the Regents of the University of California.
34 *
35 * Permission is granted to anyone to use this software for any purpose on any
36 * computer system, and to alter it and redistribute it freely, subject to
37 * the following restrictions:
38 *
39 * 1. The author is not responsible for the consequences of use of this
40 * software, no matter how awful, even if they arise from flaws in it.
41 *
42 * 2. The origin of this software must not be misrepresented, either by
43 * explicit claim or by omission. Since few users ever read sources, credits
44 * must appear in the documentation.
45 *
46 * 3. Altered versions must be plainly marked as such, and must not be
47 * misrepresented as being the original software. Since few users ever read
48 * sources, credits must appear in the documentation.
49 *
50 * 4. This notice may not be removed or altered.
51 * -------------------------------------------------------------------------
52 *
53 * For compliance with Mr Darwin's terms: this has been very significantly
54 * modified from the free "file" command.
55 * - all-in-one file for compilation convenience when moving from one
56 * version of Apache to the next.
57 * - Memory allocation is done through the Apache API's apr_pool_t structure.
58 * - All functions have had necessary Apache API request or server
59 * structures passed to them where necessary to call other Apache API
60 * routines. (i.e. usually for logging, files, or memory allocation in
61 * itself or a called function.)
62 * - struct magic has been converted from an array to a single-ended linked
63 * list because it only grows one record at a time, it's only accessed
64 * sequentially, and the Apache API has no equivalent of realloc().
65 * - Functions have been changed to get their parameters from the server
66 * configuration instead of globals. (It should be reentrant now but has
67 * not been tested in a threaded environment.)
68 * - Places where it used to print results to stdout now saves them in a
69 * list where they're used to set the MIME type in the Apache request
70 * record.
71 * - Command-line flags have been removed since they will never be used here.
72 *
73 * Ian Kluft <ikluft@cisco.com>
74 * Engineering Information Framework
75 * Central Engineering
76 * Cisco Systems, Inc.
77 * San Jose, CA, USA
78 *
79 * Initial installation July/August 1996
80 * Misc bug fixes May 1997
81 * Submission to Apache Software Foundation July 1997
82 *
83 */
84
85 #include "apr.h"
86 #include "apr_strings.h"
87 #include "apr_lib.h"
88 #define APR_WANT_STRFUNC
89 #include "apr_want.h"
90
91 #if APR_HAVE_UNISTD_H
92 #include <unistd.h>
93 #endif
94
95 #include "ap_config.h"
96 #include "httpd.h"
97 #include "http_config.h"
98 #include "http_request.h"
99 #include "http_core.h"
100 #include "http_log.h"
101 #include "http_protocol.h"
102 #include "util_script.h"
103
104 /* ### this isn't set by configure? does anybody set this? */
105 #ifdef HAVE_UTIME_H
106 #include <utime.h>
107 #endif
108
109 /*
110 * data structures and related constants
111 */
112
113 #define MODNAME "mod_mime_magic"
114 #define MIME_MAGIC_DEBUG 0
115
116 #define MIME_BINARY_UNKNOWN "application/octet-stream"
117 #define MIME_TEXT_UNKNOWN "text/plain"
118
119 #define MAXMIMESTRING 256
120
121 /* HOWMANY must be at least 4096 to make gzip -dcq work */
122 #define HOWMANY 4096
123 /* SMALL_HOWMANY limits how much work we do to figure out text files */
124 #define SMALL_HOWMANY 1024
125 #define MAXDESC 50 /* max leng of text description */
126 #define MAXstring 64 /* max leng of "string" types */
127
128 struct magic {
129 struct magic *next; /* link to next entry */
130 int lineno; /* line number from magic file */
131
132 short flag;
133 #define INDIR 1 /* if '>(...)' appears, */
134 #define UNSIGNED 2 /* comparison is unsigned */
135 short cont_level; /* level of ">" */
136 struct {
137 char type; /* byte short long */
138 long offset; /* offset from indirection */
139 } in;
140 long offset; /* offset to magic number */
141 unsigned char reln; /* relation (0=eq, '>'=gt, etc) */
142 char type; /* int, short, long or string. */
143 char vallen; /* length of string value, if any */
144 #define BYTE 1
145 #define SHORT 2
146 #define LONG 4
147 #define STRING 5
148 #define DATE 6
149 #define BESHORT 7
150 #define BELONG 8
151 #define BEDATE 9
152 #define LESHORT 10
153 #define LELONG 11
154 #define LEDATE 12
155 union VALUETYPE {
156 unsigned char b;
157 unsigned short h;
158 unsigned long l;
159 char s[MAXstring];
160 unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */
161 unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */
162 } value; /* either number or string */
163 unsigned long mask; /* mask before comparison with value */
164 char nospflag; /* suppress space character */
165
166 /* NOTE: this string is suspected of overrunning - find it! */
167 char desc[MAXDESC]; /* description */
168 };
169
170 /*
171 * data structures for tar file recognition
172 * --------------------------------------------------------------------------
173 * Header file for public domain tar (tape archive) program.
174 *
175 * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John
176 * Gilmore, ihnp4!hoptoad!gnu.
177 *
178 * Header block on tape.
179 *
180 * I'm going to use traditional DP naming conventions here. A "block" is a big
181 * chunk of stuff that we do I/O on. A "record" is a piece of info that we
182 * care about. Typically many "record"s fit into a "block".
183 */
184 #define RECORDSIZE 512
185 #define NAMSIZ 100
186 #define TUNMLEN 32
187 #define TGNMLEN 32
188
189 union record {
190 char charptr[RECORDSIZE];
191 struct header {
192 char name[NAMSIZ];
193 char mode[8];
194 char uid[8];
195 char gid[8];
196 char size[12];
197 char mtime[12];
198 char chksum[8];
199 char linkflag;
200 char linkname[NAMSIZ];
201 char magic[8];
202 char uname[TUNMLEN];
203 char gname[TGNMLEN];
204 char devmajor[8];
205 char devminor[8];
206 } header;
207 };
208
209 /* The magic field is filled with this if uname and gname are valid. */
210 #define TMAGIC "ustar " /* 7 chars and a null */
211
212 /*
213 * file-function prototypes
214 */
215 static int ascmagic(request_rec *, unsigned char *, apr_size_t);
216 static int is_tar(unsigned char *, apr_size_t);
217 static int softmagic(request_rec *, unsigned char *, apr_size_t);
218 static int tryit(request_rec *, unsigned char *, apr_size_t, int);
219 static int zmagic(request_rec *, unsigned char *, apr_size_t);
220
221 static int getvalue(server_rec *, struct magic *, char **);
222 static int hextoint(int);
223 static char *getstr(server_rec *, char *, char *, int, int *);
224 static int parse(server_rec *, apr_pool_t *p, char *, int);
225
226 static int match(request_rec *, unsigned char *, apr_size_t);
227 static int mget(request_rec *, union VALUETYPE *, unsigned char *,
228 struct magic *, apr_size_t);
229 static int mcheck(request_rec *, union VALUETYPE *, struct magic *);
230 static void mprint(request_rec *, union VALUETYPE *, struct magic *);
231
232 static int uncompress(request_rec *, int,
233 unsigned char **, apr_size_t);
234 static long from_oct(int, char *);
235 static int fsmagic(request_rec *r, const char *fn);
236
237 /*
238 * includes for ASCII substring recognition formerly "names.h" in file
239 * command
240 *
241 * Original notes: names and types used by ascmagic in file(1). These tokens are
242 * here because they can appear anywhere in the first HOWMANY bytes, while
243 * tokens in /etc/magic must appear at fixed offsets into the file. Don't
244 * make HOWMANY too high unless you have a very fast CPU.
245 */
246
247 /* these types are used to index the apr_table_t 'types': keep em in sync! */
248 /* HTML inserted in first because this is a web server module now */
249 #define L_HTML 0 /* HTML */
250 #define L_C 1 /* first and foremost on UNIX */
251 #define L_FORT 2 /* the oldest one */
252 #define L_MAKE 3 /* Makefiles */
253 #define L_PLI 4 /* PL/1 */
254 #define L_MACH 5 /* some kinda assembler */
255 #define L_ENG 6 /* English */
256 #define L_PAS 7 /* Pascal */
257 #define L_MAIL 8 /* Electronic mail */
258 #define L_NEWS 9 /* Usenet Netnews */
259
260 static const char *const types[] =
261 {
262 "text/html", /* HTML */
263 "text/plain", /* "c program text", */
264 "text/plain", /* "fortran program text", */
265 "text/plain", /* "make commands text", */
266 "text/plain", /* "pl/1 program text", */
267 "text/plain", /* "assembler program text", */
268 "text/plain", /* "English text", */
269 "text/plain", /* "pascal program text", */
270 "message/rfc822", /* "mail text", */
271 "message/news", /* "news text", */
272 "application/binary", /* "can't happen error on names.h/types", */
273 0
274 };
275
276 static const struct names {
277 const char *name;
278 short type;
279 } names[] = {
280
281 /* These must be sorted by eye for optimal hit rate */
282 /* Add to this list only after substantial meditation */
283 {
284 "<html>", L_HTML
285 },
286 {
287 "<HTML>", L_HTML
288 },
289 {
290 "<head>", L_HTML
291 },
292 {
293 "<HEAD>", L_HTML
294 },
295 {
296 "<title>", L_HTML
297 },
298 {
299 "<TITLE>", L_HTML
300 },
301 {
302 "<h1>", L_HTML
303 },
304 {
305 "<H1>", L_HTML
306 },
307 {
308 "<!--", L_HTML
309 },
310 {
311 "<!DOCTYPE HTML", L_HTML
312 },
313 {
314 "/*", L_C
315 }, /* must precede "The", "the", etc. */
316 {
317 "#include", L_C
318 },
319 {
320 "char", L_C
321 },
322 {
323 "The", L_ENG
324 },
325 {
326 "the", L_ENG
327 },
328 {
329 "double", L_C
330 },
331 {
332 "extern", L_C
333 },
334 {
335 "float", L_C
336 },
337 {
338 "real", L_C
339 },
340 {
341 "struct", L_C
342 },
343 {
344 "union", L_C
345 },
346 {
347 "CFLAGS", L_MAKE
348 },
349 {
350 "LDFLAGS", L_MAKE
351 },
352 {
353 "all:", L_MAKE
354 },
355 {
356 ".PRECIOUS", L_MAKE
357 },
358 /*
359 * Too many files of text have these words in them. Find another way to
360 * recognize Fortrash.
361 */
362 #ifdef NOTDEF
363 {
364 "subroutine", L_FORT
365 },
366 {
367 "function", L_FORT
368 },
369 {
370 "block", L_FORT
371 },
372 {
373 "common", L_FORT
374 },
375 {
376 "dimension", L_FORT
377 },
378 {
379 "integer", L_FORT
380 },
381 {
382 "data", L_FORT
383 },
384 #endif /* NOTDEF */
385 {
386 ".ascii", L_MACH
387 },
388 {
389 ".asciiz", L_MACH
390 },
391 {
392 ".byte", L_MACH
393 },
394 {
395 ".even", L_MACH
396 },
397 {
398 ".globl", L_MACH
399 },
400 {
401 "clr", L_MACH
402 },
403 {
404 "(input,", L_PAS
405 },
406 {
407 "dcl", L_PLI
408 },
409 {
410 "Received:", L_MAIL
411 },
412 {
413 ">From", L_MAIL
414 },
415 {
416 "Return-Path:", L_MAIL
417 },
418 {
419 "Cc:", L_MAIL
420 },
421 {
422 "Newsgroups:", L_NEWS
423 },
424 {
425 "Path:", L_NEWS
426 },
427 {
428 "Organization:", L_NEWS
429 },
430 {
431 NULL, 0
432 }
433 };
434
435 #define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)
436
437 /*
438 * Result String List (RSL)
439 *
440 * The file(1) command prints its output. Instead, we store the various
441 * "printed" strings in a list (allocating memory as we go) and concatenate
442 * them at the end when we finally know how much space they'll need.
443 */
444
445 typedef struct magic_rsl_s {
446 const char *str; /* string, possibly a fragment */
447 struct magic_rsl_s *next; /* pointer to next fragment */
448 } magic_rsl;
449
450 /*
451 * Apache module configuration structures
452 */
453
454 /* per-server info */
455 typedef struct {
456 const char *magicfile; /* where magic be found */
457 struct magic *magic; /* head of magic config list */
458 struct magic *last;
459 } magic_server_config_rec;
460
461 /* per-request info */
462 typedef struct {
463 magic_rsl *head; /* result string list */
464 magic_rsl *tail;
465 } magic_req_rec;
466
467 /*
468 * configuration functions - called by Apache API routines
469 */
470
471 module AP_MODULE_DECLARE_DATA mime_magic_module;
472
create_magic_server_config(apr_pool_t * p,server_rec * d)473 static void *create_magic_server_config(apr_pool_t *p, server_rec *d)
474 {
475 /* allocate the config - use pcalloc because it needs to be zeroed */
476 return apr_pcalloc(p, sizeof(magic_server_config_rec));
477 }
478
merge_magic_server_config(apr_pool_t * p,void * basev,void * addv)479 static void *merge_magic_server_config(apr_pool_t *p, void *basev, void *addv)
480 {
481 magic_server_config_rec *base = (magic_server_config_rec *) basev;
482 magic_server_config_rec *add = (magic_server_config_rec *) addv;
483 magic_server_config_rec *new = (magic_server_config_rec *)
484 apr_palloc(p, sizeof(magic_server_config_rec));
485
486 new->magicfile = add->magicfile ? add->magicfile : base->magicfile;
487 new->magic = NULL;
488 new->last = NULL;
489 return new;
490 }
491
set_magicfile(cmd_parms * cmd,void * dummy,const char * arg)492 static const char *set_magicfile(cmd_parms *cmd, void *dummy, const char *arg)
493 {
494 magic_server_config_rec *conf = (magic_server_config_rec *)
495 ap_get_module_config(cmd->server->module_config,
496 &mime_magic_module);
497
498 if (!conf) {
499 return MODNAME ": server structure not allocated";
500 }
501 conf->magicfile = arg;
502 return NULL;
503 }
504
505 /*
506 * configuration file commands - exported to Apache API
507 */
508
509 static const command_rec mime_magic_cmds[] =
510 {
511 AP_INIT_TAKE1("MimeMagicFile", set_magicfile, NULL, RSRC_CONF,
512 "Path to MIME Magic file (in file(1) format)"),
513 {NULL}
514 };
515
516 /*
517 * RSL (result string list) processing routines
518 *
519 * These collect strings that would have been printed in fragments by file(1)
520 * into a list of magic_rsl structures with the strings. When complete,
521 * they're concatenated together to become the MIME content and encoding
522 * types.
523 *
524 * return value conventions for these functions: functions which return int:
525 * failure = -1, other = result functions which return pointers: failure = 0,
526 * other = result
527 */
528
529 /* allocate a per-request structure and put it in the request record */
magic_set_config(request_rec * r)530 static magic_req_rec *magic_set_config(request_rec *r)
531 {
532 magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool,
533 sizeof(magic_req_rec));
534
535 req_dat->head = req_dat->tail = (magic_rsl *) NULL;
536 ap_set_module_config(r->request_config, &mime_magic_module, req_dat);
537 return req_dat;
538 }
539
540 /* add a string to the result string list for this request */
541 /* it is the responsibility of the caller to allocate "str" */
magic_rsl_add(request_rec * r,const char * str)542 static int magic_rsl_add(request_rec *r, const char *str)
543 {
544 magic_req_rec *req_dat = (magic_req_rec *)
545 ap_get_module_config(r->request_config, &mime_magic_module);
546 magic_rsl *rsl;
547
548 /* make sure we have a list to put it in */
549 if (!req_dat) {
550 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EINVAL, r, APLOGNO(01507)
551 MODNAME ": request config should not be NULL");
552 if (!(req_dat = magic_set_config(r))) {
553 /* failure */
554 return -1;
555 }
556 }
557
558 /* allocate the list entry */
559 rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl));
560
561 /* fill it */
562 rsl->str = str;
563 rsl->next = (magic_rsl *) NULL;
564
565 /* append to the list */
566 if (req_dat->head && req_dat->tail) {
567 req_dat->tail->next = rsl;
568 req_dat->tail = rsl;
569 }
570 else {
571 req_dat->head = req_dat->tail = rsl;
572 }
573
574 /* success */
575 return 0;
576 }
577
578 /* RSL hook for puts-type functions */
magic_rsl_puts(request_rec * r,const char * str)579 static int magic_rsl_puts(request_rec *r, const char *str)
580 {
581 return magic_rsl_add(r, str);
582 }
583
584 /* RSL hook for printf-type functions */
magic_rsl_printf(request_rec * r,char * str,...)585 static int magic_rsl_printf(request_rec *r, char *str,...)
586 {
587 va_list ap;
588
589 char buf[MAXMIMESTRING];
590
591 /* assemble the string into the buffer */
592 va_start(ap, str);
593 apr_vsnprintf(buf, sizeof(buf), str, ap);
594 va_end(ap);
595
596 /* add the buffer to the list */
597 return magic_rsl_add(r, apr_pstrdup(r->pool, buf));
598 }
599
600 /* RSL hook for putchar-type functions */
magic_rsl_putchar(request_rec * r,char c)601 static int magic_rsl_putchar(request_rec *r, char c)
602 {
603 char str[2];
604
605 /* high overhead for 1 char - just hope they don't do this much */
606 str[0] = c;
607 str[1] = '\0';
608 return magic_rsl_add(r, apr_pstrdup(r->pool, str));
609 }
610
611 /* allocate and copy a contiguous string from a result string list */
rsl_strdup(request_rec * r,int start_frag,int start_pos,int len)612 static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len)
613 {
614 char *result; /* return value */
615 int cur_frag, /* current fragment number/counter */
616 cur_pos, /* current position within fragment */
617 res_pos; /* position in result string */
618 magic_rsl *frag; /* list-traversal pointer */
619 magic_req_rec *req_dat = (magic_req_rec *)
620 ap_get_module_config(r->request_config, &mime_magic_module);
621
622 /* allocate the result string */
623 result = (char *) apr_palloc(r->pool, len + 1);
624
625 /* loop through and collect the string */
626 res_pos = 0;
627 for (frag = req_dat->head, cur_frag = 0;
628 frag->next;
629 frag = frag->next, cur_frag++) {
630 /* loop to the first fragment */
631 if (cur_frag < start_frag)
632 continue;
633
634 /* loop through and collect chars */
635 for (cur_pos = (cur_frag == start_frag) ? start_pos : 0;
636 frag->str[cur_pos];
637 cur_pos++) {
638 if (cur_frag >= start_frag
639 && cur_pos >= start_pos
640 && res_pos <= len) {
641 result[res_pos++] = frag->str[cur_pos];
642 if (res_pos > len) {
643 break;
644 }
645 }
646 }
647 }
648
649 /* clean up and return */
650 result[res_pos] = 0;
651 #if MIME_MAGIC_DEBUG
652 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01508)
653 MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result);
654 #endif
655 return result;
656 }
657
658 /* states for the state-machine algorithm in magic_rsl_to_request() */
659 typedef enum {
660 rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
661 } rsl_states;
662
663 /* process the RSL and set the MIME info in the request record */
magic_rsl_to_request(request_rec * r)664 static int magic_rsl_to_request(request_rec *r)
665 {
666 int cur_frag, /* current fragment number/counter */
667 cur_pos, /* current position within fragment */
668 type_frag, /* content type starting point: fragment */
669 type_pos, /* content type starting point: position */
670 type_len, /* content type length */
671 encoding_frag, /* content encoding starting point: fragment */
672 encoding_pos, /* content encoding starting point: position */
673 encoding_len; /* content encoding length */
674
675 char *tmp;
676 magic_rsl *frag; /* list-traversal pointer */
677 rsl_states state;
678
679 magic_req_rec *req_dat = (magic_req_rec *)
680 ap_get_module_config(r->request_config, &mime_magic_module);
681
682 /* check if we have a result */
683 if (!req_dat || !req_dat->head) {
684 /* empty - no match, we defer to other Apache modules */
685 return DECLINED;
686 }
687
688 /* start searching for the type and encoding */
689 state = rsl_leading_space;
690 type_frag = type_pos = type_len = 0;
691 encoding_frag = encoding_pos = encoding_len = 0;
692 for (frag = req_dat->head, cur_frag = 0;
693 frag && frag->next;
694 frag = frag->next, cur_frag++) {
695 /* loop through the characters in the fragment */
696 for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) {
697 if (apr_isspace(frag->str[cur_pos])) {
698 /* process whitespace actions for each state */
699 if (state == rsl_leading_space) {
700 /* eat whitespace in this state */
701 continue;
702 }
703 else if (state == rsl_type) {
704 /* whitespace: type has no slash! */
705 return DECLINED;
706 }
707 else if (state == rsl_subtype) {
708 /* whitespace: end of MIME type */
709 state++;
710 continue;
711 }
712 else if (state == rsl_separator) {
713 /* eat whitespace in this state */
714 continue;
715 }
716 else if (state == rsl_encoding) {
717 /* whitespace: end of MIME encoding */
718 /* we're done */
719 frag = req_dat->tail;
720 break;
721 }
722 else {
723 /* should not be possible */
724 /* abandon malfunctioning module */
725 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01509)
726 MODNAME ": bad state %d (ws)", state);
727 return DECLINED;
728 }
729 /* NOTREACHED */
730 }
731 else if (state == rsl_type &&
732 frag->str[cur_pos] == '/') {
733 /* copy the char and go to rsl_subtype state */
734 type_len++;
735 state++;
736 }
737 else {
738 /* process non-space actions for each state */
739 if (state == rsl_leading_space) {
740 /* non-space: begin MIME type */
741 state++;
742 type_frag = cur_frag;
743 type_pos = cur_pos;
744 type_len = 1;
745 continue;
746 }
747 else if (state == rsl_type ||
748 state == rsl_subtype) {
749 /* non-space: adds to type */
750 type_len++;
751 continue;
752 }
753 else if (state == rsl_separator) {
754 /* non-space: begin MIME encoding */
755 state++;
756 encoding_frag = cur_frag;
757 encoding_pos = cur_pos;
758 encoding_len = 1;
759 continue;
760 }
761 else if (state == rsl_encoding) {
762 /* non-space: adds to encoding */
763 encoding_len++;
764 continue;
765 }
766 else {
767 /* should not be possible */
768 /* abandon malfunctioning module */
769 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01510)
770 MODNAME ": bad state %d (ns)", state);
771 return DECLINED;
772 }
773 /* NOTREACHED */
774 }
775 /* NOTREACHED */
776 }
777 }
778
779 /* if we ended prior to state rsl_subtype, we had incomplete info */
780 if (state != rsl_subtype && state != rsl_separator &&
781 state != rsl_encoding) {
782 /* defer to other modules */
783 return DECLINED;
784 }
785
786 /* save the info in the request record */
787 tmp = rsl_strdup(r, type_frag, type_pos, type_len);
788 /* XXX: this could be done at config time I'm sure... but I'm
789 * confused by all this magic_rsl stuff. -djg */
790 ap_content_type_tolower(tmp);
791 ap_set_content_type(r, tmp);
792
793 if (state == rsl_encoding) {
794 tmp = rsl_strdup(r, encoding_frag,
795 encoding_pos, encoding_len);
796 /* XXX: this could be done at config time I'm sure... but I'm
797 * confused by all this magic_rsl stuff. -djg */
798 ap_str_tolower(tmp);
799 r->content_encoding = tmp;
800 }
801
802 /* detect memory allocation or other errors */
803 if (!r->content_type ||
804 (state == rsl_encoding && !r->content_encoding)) {
805 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01511)
806 MODNAME ": unexpected state %d; could be caused by bad "
807 "data in magic file",
808 state);
809 return HTTP_INTERNAL_SERVER_ERROR;
810 }
811
812 /* success! */
813 return OK;
814 }
815
816 /*
817 * magic_process - process input file r Apache API request record
818 * (formerly called "process" in file command, prefix added for clarity) Opens
819 * the file and reads a fixed-size buffer to begin processing the contents.
820 */
magic_process(request_rec * r)821 static int magic_process(request_rec *r)
822 {
823 apr_file_t *fd = NULL;
824 unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
825 apr_size_t nbytes = 0; /* number of bytes read from a datafile */
826 int result;
827
828 /*
829 * first try judging the file based on its filesystem status
830 */
831 switch ((result = fsmagic(r, r->filename))) {
832 case DONE:
833 magic_rsl_putchar(r, '\n');
834 return OK;
835 case OK:
836 break;
837 default:
838 /* fatal error, bail out */
839 return result;
840 }
841
842 if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
843 /* We can't open it, but we were able to stat it. */
844 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01512)
845 MODNAME ": can't read `%s'", r->filename);
846 /* let some other handler decide what the problem is */
847 return DECLINED;
848 }
849
850 /*
851 * try looking at the first HOWMANY bytes
852 */
853 nbytes = sizeof(buf) - 1;
854 if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) {
855 ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r, APLOGNO(01513)
856 MODNAME ": read failed: %s", r->filename);
857 return HTTP_INTERNAL_SERVER_ERROR;
858 }
859
860 if (nbytes == 0) {
861 return DECLINED;
862 }
863 else {
864 buf[nbytes++] = '\0'; /* null-terminate it */
865 result = tryit(r, buf, nbytes, 1);
866 if (result != OK) {
867 return result;
868 }
869 }
870
871 (void) apr_file_close(fd);
872 (void) magic_rsl_putchar(r, '\n');
873
874 return OK;
875 }
876
877
tryit(request_rec * r,unsigned char * buf,apr_size_t nb,int checkzmagic)878 static int tryit(request_rec *r, unsigned char *buf, apr_size_t nb,
879 int checkzmagic)
880 {
881 /*
882 * Try compression stuff
883 */
884 if (checkzmagic == 1) {
885 if (zmagic(r, buf, nb) == 1)
886 return OK;
887 }
888
889 /*
890 * try tests in /etc/magic (or surrogate magic file)
891 */
892 if (softmagic(r, buf, nb) == 1)
893 return OK;
894
895 /*
896 * try known keywords, check for ascii-ness too.
897 */
898 if (ascmagic(r, buf, nb) == 1)
899 return OK;
900
901 /*
902 * abandon hope, all ye who remain here
903 */
904 return DECLINED;
905 }
906
907 #define EATAB {while (apr_isspace(*l)) ++l;}
908
909 /*
910 * apprentice - load configuration from the magic file r
911 * API request record
912 */
apprentice(server_rec * s,apr_pool_t * p)913 static int apprentice(server_rec *s, apr_pool_t *p)
914 {
915 apr_file_t *f = NULL;
916 apr_status_t result;
917 char line[BUFSIZ + 1];
918 int errs = 0;
919 int lineno;
920 #if MIME_MAGIC_DEBUG
921 int rule = 0;
922 struct magic *m, *prevm;
923 #endif
924 magic_server_config_rec *conf = (magic_server_config_rec *)
925 ap_get_module_config(s->module_config, &mime_magic_module);
926 const char *fname = ap_server_root_relative(p, conf->magicfile);
927
928 if (!fname) {
929 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s, APLOGNO(01514)
930 MODNAME ": Invalid magic file path %s", conf->magicfile);
931 return -1;
932 }
933 if ((result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED,
934 APR_OS_DEFAULT, p)) != APR_SUCCESS) {
935 ap_log_error(APLOG_MARK, APLOG_ERR, result, s, APLOGNO(01515)
936 MODNAME ": can't read magic file %s", fname);
937 return -1;
938 }
939
940 /* set up the magic list (empty) */
941 conf->magic = conf->last = NULL;
942
943 /* parse it */
944 for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) {
945 int ws_offset;
946 char *last = line + strlen(line) - 1; /* guaranteed that len >= 1 since an
947 * "empty" line contains a '\n'
948 */
949
950 /* delete newline and any other trailing whitespace */
951 while (last >= line
952 && apr_isspace(*last)) {
953 *last = '\0';
954 --last;
955 }
956
957 /* skip leading whitespace */
958 ws_offset = 0;
959 while (line[ws_offset] && apr_isspace(line[ws_offset])) {
960 ws_offset++;
961 }
962
963 /* skip blank lines */
964 if (line[ws_offset] == 0) {
965 continue;
966 }
967
968 /* comment, do not parse */
969 if (line[ws_offset] == '#')
970 continue;
971
972 #if MIME_MAGIC_DEBUG
973 /* if we get here, we're going to use it so count it */
974 rule++;
975 #endif
976
977 /* parse it */
978 if (parse(s, p, line + ws_offset, lineno) != 0)
979 ++errs;
980 }
981
982 (void) apr_file_close(f);
983
984 #if MIME_MAGIC_DEBUG
985 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01516)
986 MODNAME ": apprentice conf=%pp file=%s m=%s m->next=%s last=%s",
987 conf,
988 conf->magicfile ? conf->magicfile : "NULL",
989 conf->magic ? "set" : "NULL",
990 (conf->magic && conf->magic->next) ? "set" : "NULL",
991 conf->last ? "set" : "NULL");
992 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01517)
993 MODNAME ": apprentice read %d lines, %d rules, %d errors",
994 lineno, rule, errs);
995 #endif
996
997 #if MIME_MAGIC_DEBUG
998 prevm = 0;
999 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01518)
1000 MODNAME ": apprentice test");
1001 for (m = conf->magic; m; m = m->next) {
1002 if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
1003 apr_isprint((((unsigned long) m) >> 16) & 255) &&
1004 apr_isprint((((unsigned long) m) >> 8) & 255) &&
1005 apr_isprint(((unsigned long) m) & 255)) {
1006 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01519)
1007 MODNAME ": apprentice: POINTER CLOBBERED! "
1008 "m=\"%c%c%c%c\" line=%d",
1009 (((unsigned long) m) >> 24) & 255,
1010 (((unsigned long) m) >> 16) & 255,
1011 (((unsigned long) m) >> 8) & 255,
1012 ((unsigned long) m) & 255,
1013 prevm ? prevm->lineno : -1);
1014 break;
1015 }
1016 prevm = m;
1017 }
1018 #endif
1019
1020 return (errs ? -1 : 0);
1021 }
1022
1023 /*
1024 * extend the sign bit if the comparison is to be signed
1025 */
signextend(server_rec * s,struct magic * m,unsigned long v)1026 static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v)
1027 {
1028 if (!(m->flag & UNSIGNED))
1029 switch (m->type) {
1030 /*
1031 * Do not remove the casts below. They are vital. When later
1032 * compared with the data, the sign extension must have happened.
1033 */
1034 case BYTE:
1035 v = (char) v;
1036 break;
1037 case SHORT:
1038 case BESHORT:
1039 case LESHORT:
1040 v = (short) v;
1041 break;
1042 case DATE:
1043 case BEDATE:
1044 case LEDATE:
1045 case LONG:
1046 case BELONG:
1047 case LELONG:
1048 v = (long) v;
1049 break;
1050 case STRING:
1051 break;
1052 default:
1053 ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, APLOGNO(01520)
1054 MODNAME ": can't happen: m->type=%d", m->type);
1055 return -1;
1056 }
1057 return v;
1058 }
1059
1060 /*
1061 * parse one line from magic file, put into magic[index++] if valid
1062 */
parse(server_rec * serv,apr_pool_t * p,char * l,int lineno)1063 static int parse(server_rec *serv, apr_pool_t *p, char *l, int lineno)
1064 {
1065 struct magic *m;
1066 char *t, *s;
1067 magic_server_config_rec *conf = (magic_server_config_rec *)
1068 ap_get_module_config(serv->module_config, &mime_magic_module);
1069
1070 /* allocate magic structure entry */
1071 m = (struct magic *) apr_pcalloc(p, sizeof(struct magic));
1072
1073 /* append to linked list */
1074 m->next = NULL;
1075 if (!conf->magic || !conf->last) {
1076 conf->magic = conf->last = m;
1077 }
1078 else {
1079 conf->last->next = m;
1080 conf->last = m;
1081 }
1082
1083 /* set values in magic structure */
1084 m->flag = 0;
1085 m->cont_level = 0;
1086 m->lineno = lineno;
1087
1088 while (*l == '>') {
1089 ++l; /* step over */
1090 m->cont_level++;
1091 }
1092
1093 if (m->cont_level != 0 && *l == '(') {
1094 ++l; /* step over */
1095 m->flag |= INDIR;
1096 }
1097
1098 /* get offset, then skip over it */
1099 m->offset = (int) strtol(l, &t, 0);
1100 if (l == t) {
1101 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01521)
1102 MODNAME ": offset %s invalid", l);
1103 }
1104 l = t;
1105
1106 if (m->flag & INDIR) {
1107 m->in.type = LONG;
1108 m->in.offset = 0;
1109 /*
1110 * read [.lbs][+-]nnnnn)
1111 */
1112 if (*l == '.') {
1113 switch (*++l) {
1114 case 'l':
1115 m->in.type = LONG;
1116 break;
1117 case 's':
1118 m->in.type = SHORT;
1119 break;
1120 case 'b':
1121 m->in.type = BYTE;
1122 break;
1123 default:
1124 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01522)
1125 MODNAME ": indirect offset type %c invalid", *l);
1126 break;
1127 }
1128 l++;
1129 }
1130 s = l;
1131 if (*l == '+' || *l == '-')
1132 l++;
1133 if (apr_isdigit((unsigned char) *l)) {
1134 m->in.offset = strtol(l, &t, 0);
1135 if (*s == '-')
1136 m->in.offset = -m->in.offset;
1137 }
1138 else
1139 t = l;
1140 if (*t++ != ')') {
1141 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01523)
1142 MODNAME ": missing ')' in indirect offset");
1143 }
1144 l = t;
1145 }
1146
1147
1148 while (apr_isdigit((unsigned char) *l))
1149 ++l;
1150 EATAB;
1151
1152 #define NBYTE 4
1153 #define NSHORT 5
1154 #define NLONG 4
1155 #define NSTRING 6
1156 #define NDATE 4
1157 #define NBESHORT 7
1158 #define NBELONG 6
1159 #define NBEDATE 6
1160 #define NLESHORT 7
1161 #define NLELONG 6
1162 #define NLEDATE 6
1163
1164 if (*l == 'u') {
1165 ++l;
1166 m->flag |= UNSIGNED;
1167 }
1168
1169 /* get type, skip it */
1170 if (strncmp(l, "byte", NBYTE) == 0) {
1171 m->type = BYTE;
1172 l += NBYTE;
1173 }
1174 else if (strncmp(l, "short", NSHORT) == 0) {
1175 m->type = SHORT;
1176 l += NSHORT;
1177 }
1178 else if (strncmp(l, "long", NLONG) == 0) {
1179 m->type = LONG;
1180 l += NLONG;
1181 }
1182 else if (strncmp(l, "string", NSTRING) == 0) {
1183 m->type = STRING;
1184 l += NSTRING;
1185 }
1186 else if (strncmp(l, "date", NDATE) == 0) {
1187 m->type = DATE;
1188 l += NDATE;
1189 }
1190 else if (strncmp(l, "beshort", NBESHORT) == 0) {
1191 m->type = BESHORT;
1192 l += NBESHORT;
1193 }
1194 else if (strncmp(l, "belong", NBELONG) == 0) {
1195 m->type = BELONG;
1196 l += NBELONG;
1197 }
1198 else if (strncmp(l, "bedate", NBEDATE) == 0) {
1199 m->type = BEDATE;
1200 l += NBEDATE;
1201 }
1202 else if (strncmp(l, "leshort", NLESHORT) == 0) {
1203 m->type = LESHORT;
1204 l += NLESHORT;
1205 }
1206 else if (strncmp(l, "lelong", NLELONG) == 0) {
1207 m->type = LELONG;
1208 l += NLELONG;
1209 }
1210 else if (strncmp(l, "ledate", NLEDATE) == 0) {
1211 m->type = LEDATE;
1212 l += NLEDATE;
1213 }
1214 else {
1215 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01524)
1216 MODNAME ": type %s invalid", l);
1217 return -1;
1218 }
1219 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1220 if (*l == '&') {
1221 ++l;
1222 m->mask = signextend(serv, m, strtol(l, &l, 0));
1223 }
1224 else
1225 m->mask = ~0L;
1226 EATAB;
1227
1228 switch (*l) {
1229 case '>':
1230 case '<':
1231 /* Old-style anding: "0 byte &0x80 dynamically linked" */
1232 case '&':
1233 case '^':
1234 case '=':
1235 m->reln = *l;
1236 ++l;
1237 break;
1238 case '!':
1239 if (m->type != STRING) {
1240 m->reln = *l;
1241 ++l;
1242 break;
1243 }
1244 /* FALL THROUGH */
1245 default:
1246 if (*l == 'x' && apr_isspace(l[1])) {
1247 m->reln = *l;
1248 ++l;
1249 goto GetDesc; /* Bill The Cat */
1250 }
1251 m->reln = '=';
1252 break;
1253 }
1254 EATAB;
1255
1256 if (getvalue(serv, m, &l))
1257 return -1;
1258 /*
1259 * now get last part - the description
1260 */
1261 GetDesc:
1262 EATAB;
1263 if (l[0] == '\b') {
1264 ++l;
1265 m->nospflag = 1;
1266 }
1267 else if ((l[0] == '\\') && (l[1] == 'b')) {
1268 ++l;
1269 ++l;
1270 m->nospflag = 1;
1271 }
1272 else
1273 m->nospflag = 0;
1274 apr_cpystrn(m->desc, l, sizeof(m->desc));
1275
1276 #if MIME_MAGIC_DEBUG
1277 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, serv, APLOGNO(01525)
1278 MODNAME ": parse line=%d m=%pp next=%pp cont=%d desc=%s",
1279 lineno, m, m->next, m->cont_level, m->desc);
1280 #endif /* MIME_MAGIC_DEBUG */
1281
1282 return 0;
1283 }
1284
1285 /*
1286 * Read a numeric value from a pointer, into the value union of a magic
1287 * pointer, according to the magic type. Update the string pointer to point
1288 * just after the number read. Return 0 for success, non-zero for failure.
1289 */
getvalue(server_rec * s,struct magic * m,char ** p)1290 static int getvalue(server_rec *s, struct magic *m, char **p)
1291 {
1292 int slen;
1293
1294 if (m->type == STRING) {
1295 *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen);
1296 m->vallen = slen;
1297 }
1298 else if (m->reln != 'x')
1299 m->value.l = signextend(s, m, strtol(*p, p, 0));
1300 return 0;
1301 }
1302
1303 /*
1304 * Convert a string containing C character escapes. Stop at an unescaped
1305 * space or tab. Copy the converted version to "p", returning its length in
1306 * *slen. Return updated scan pointer as function result.
1307 */
getstr(server_rec * serv,register char * s,register char * p,int plen,int * slen)1308 static char *getstr(server_rec *serv, register char *s, register char *p,
1309 int plen, int *slen)
1310 {
1311 char *origs = s, *origp = p;
1312 char *pmax = p + plen - 1;
1313 register int c;
1314 register int val;
1315
1316 while ((c = *s++) != '\0') {
1317 if (apr_isspace(c))
1318 break;
1319 if (p >= pmax) {
1320 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01526)
1321 MODNAME ": string too long: %s", origs);
1322 break;
1323 }
1324 if (c == '\\') {
1325 switch (c = *s++) {
1326
1327 case '\0':
1328 goto out;
1329
1330 default:
1331 *p++ = (char) c;
1332 break;
1333
1334 case 'n':
1335 *p++ = '\n';
1336 break;
1337
1338 case 'r':
1339 *p++ = '\r';
1340 break;
1341
1342 case 'b':
1343 *p++ = '\b';
1344 break;
1345
1346 case 't':
1347 *p++ = '\t';
1348 break;
1349
1350 case 'f':
1351 *p++ = '\f';
1352 break;
1353
1354 case 'v':
1355 *p++ = '\v';
1356 break;
1357
1358 /* \ and up to 3 octal digits */
1359 case '0':
1360 case '1':
1361 case '2':
1362 case '3':
1363 case '4':
1364 case '5':
1365 case '6':
1366 case '7':
1367 val = c - '0';
1368 c = *s++; /* try for 2 */
1369 if (c >= '0' && c <= '7') {
1370 val = (val << 3) | (c - '0');
1371 c = *s++; /* try for 3 */
1372 if (c >= '0' && c <= '7')
1373 val = (val << 3) | (c - '0');
1374 else
1375 --s;
1376 }
1377 else
1378 --s;
1379 *p++ = (char) val;
1380 break;
1381
1382 /* \x and up to 3 hex digits */
1383 case 'x':
1384 val = 'x'; /* Default if no digits */
1385 c = hextoint(*s++); /* Get next char */
1386 if (c >= 0) {
1387 val = c;
1388 c = hextoint(*s++);
1389 if (c >= 0) {
1390 val = (val << 4) + c;
1391 c = hextoint(*s++);
1392 if (c >= 0) {
1393 val = (val << 4) + c;
1394 }
1395 else
1396 --s;
1397 }
1398 else
1399 --s;
1400 }
1401 else
1402 --s;
1403 *p++ = (char) val;
1404 break;
1405 }
1406 }
1407 else
1408 *p++ = (char) c;
1409 }
1410 out:
1411 *p = '\0';
1412 *slen = p - origp;
1413 return s;
1414 }
1415
1416
1417 /* Single hex char to int; -1 if not a hex char. */
hextoint(int c)1418 static int hextoint(int c)
1419 {
1420 if (apr_isdigit(c))
1421 return c - '0';
1422 if ((c >= 'a') && (c <= 'f'))
1423 return c + 10 - 'a';
1424 if ((c >= 'A') && (c <= 'F'))
1425 return c + 10 - 'A';
1426 return -1;
1427 }
1428
1429
1430 /*
1431 * return DONE to indicate it's been handled
1432 * return OK to indicate it's a regular file still needing handling
1433 * other returns indicate a failure of some sort
1434 */
fsmagic(request_rec * r,const char * fn)1435 static int fsmagic(request_rec *r, const char *fn)
1436 {
1437 switch (r->finfo.filetype) {
1438 case APR_DIR:
1439 magic_rsl_puts(r, DIR_MAGIC_TYPE);
1440 return DONE;
1441 case APR_CHR:
1442 /*
1443 * (void) magic_rsl_printf(r,"character special (%d/%d)",
1444 * major(sb->st_rdev), minor(sb->st_rdev));
1445 */
1446 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1447 return DONE;
1448 case APR_BLK:
1449 /*
1450 * (void) magic_rsl_printf(r,"block special (%d/%d)",
1451 * major(sb->st_rdev), minor(sb->st_rdev));
1452 */
1453 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1454 return DONE;
1455 /* TODO add code to handle V7 MUX and Blit MUX files */
1456 case APR_PIPE:
1457 /*
1458 * magic_rsl_puts(r,"fifo (named pipe)");
1459 */
1460 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1461 return DONE;
1462 case APR_LNK:
1463 /* We used stat(), the only possible reason for this is that the
1464 * symlink is broken.
1465 */
1466 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01527)
1467 MODNAME ": broken symlink (%s)", fn);
1468 return HTTP_INTERNAL_SERVER_ERROR;
1469 case APR_SOCK:
1470 magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1471 return DONE;
1472 case APR_REG:
1473 break;
1474 default:
1475 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01528)
1476 MODNAME ": invalid file type %d.", r->finfo.filetype);
1477 return HTTP_INTERNAL_SERVER_ERROR;
1478 }
1479
1480 /*
1481 * regular file, check next possibility
1482 */
1483 if (r->finfo.size == 0) {
1484 magic_rsl_puts(r, MIME_TEXT_UNKNOWN);
1485 return DONE;
1486 }
1487 return OK;
1488 }
1489
1490 /*
1491 * softmagic - lookup one file in database (already read from /etc/magic by
1492 * apprentice.c). Passed the name and FILE * of one file to be typed.
1493 */
1494 /* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */
softmagic(request_rec * r,unsigned char * buf,apr_size_t nbytes)1495 static int softmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
1496 {
1497 if (match(r, buf, nbytes))
1498 return 1;
1499
1500 return 0;
1501 }
1502
1503 /*
1504 * Go through the whole list, stopping if you find a match. Process all the
1505 * continuations of that match before returning.
1506 *
1507 * We support multi-level continuations:
1508 *
1509 * At any time when processing a successful top-level match, there is a current
1510 * continuation level; it represents the level of the last successfully
1511 * matched continuation.
1512 *
1513 * Continuations above that level are skipped as, if we see one, it means that
1514 * the continuation that controls them - i.e, the lower-level continuation
1515 * preceding them - failed to match.
1516 *
1517 * Continuations below that level are processed as, if we see one, it means
1518 * we've finished processing or skipping higher-level continuations under the
1519 * control of a successful or unsuccessful lower-level continuation, and are
1520 * now seeing the next lower-level continuation and should process it. The
1521 * current continuation level reverts to the level of the one we're seeing.
1522 *
1523 * Continuations at the current level are processed as, if we see one, there's
1524 * no lower-level continuation that may have failed.
1525 *
1526 * If a continuation matches, we bump the current continuation level so that
1527 * higher-level continuations are processed.
1528 */
match(request_rec * r,unsigned char * s,apr_size_t nbytes)1529 static int match(request_rec *r, unsigned char *s, apr_size_t nbytes)
1530 {
1531 #if MIME_MAGIC_DEBUG
1532 int rule_counter = 0;
1533 #endif
1534 int cont_level = 0;
1535 int need_separator = 0;
1536 union VALUETYPE p;
1537 magic_server_config_rec *conf = (magic_server_config_rec *)
1538 ap_get_module_config(r->server->module_config, &mime_magic_module);
1539 struct magic *m;
1540
1541 #if MIME_MAGIC_DEBUG
1542 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01529)
1543 MODNAME ": match conf=%pp file=%s m=%s m->next=%s last=%s",
1544 conf,
1545 conf->magicfile ? conf->magicfile : "NULL",
1546 conf->magic ? "set" : "NULL",
1547 (conf->magic && conf->magic->next) ? "set" : "NULL",
1548 conf->last ? "set" : "NULL");
1549 #endif
1550
1551 #if MIME_MAGIC_DEBUG
1552 for (m = conf->magic; m; m = m->next) {
1553 if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
1554 apr_isprint((((unsigned long) m) >> 16) & 255) &&
1555 apr_isprint((((unsigned long) m) >> 8) & 255) &&
1556 apr_isprint(((unsigned long) m) & 255)) {
1557 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01530)
1558 MODNAME ": match: POINTER CLOBBERED! "
1559 "m=\"%c%c%c%c\"",
1560 (((unsigned long) m) >> 24) & 255,
1561 (((unsigned long) m) >> 16) & 255,
1562 (((unsigned long) m) >> 8) & 255,
1563 ((unsigned long) m) & 255);
1564 break;
1565 }
1566 }
1567 #endif
1568
1569 for (m = conf->magic; m; m = m->next) {
1570 #if MIME_MAGIC_DEBUG
1571 rule_counter++;
1572 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01531)
1573 MODNAME ": line=%d desc=%s", m->lineno, m->desc);
1574 #endif
1575
1576 /* check if main entry matches */
1577 if (!mget(r, &p, s, m, nbytes) ||
1578 !mcheck(r, &p, m)) {
1579 struct magic *m_cont;
1580
1581 /*
1582 * main entry didn't match, flush its continuations
1583 */
1584 if (!m->next || (m->next->cont_level == 0)) {
1585 continue;
1586 }
1587
1588 m_cont = m->next;
1589 while (m_cont && (m_cont->cont_level != 0)) {
1590 #if MIME_MAGIC_DEBUG
1591 rule_counter++;
1592 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01532)
1593 MODNAME ": line=%d mc=%pp mc->next=%pp cont=%d desc=%s",
1594 m_cont->lineno, m_cont,
1595 m_cont->next, m_cont->cont_level,
1596 m_cont->desc);
1597 #endif
1598 /*
1599 * this trick allows us to keep *m in sync when the continue
1600 * advances the pointer
1601 */
1602 m = m_cont;
1603 m_cont = m_cont->next;
1604 }
1605 continue;
1606 }
1607
1608 /* if we get here, the main entry rule was a match */
1609 /* this will be the last run through the loop */
1610 #if MIME_MAGIC_DEBUG
1611 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01533)
1612 MODNAME ": rule matched, line=%d type=%d %s",
1613 m->lineno, m->type,
1614 (m->type == STRING) ? m->value.s : "");
1615 #endif
1616
1617 /* print the match */
1618 mprint(r, &p, m);
1619
1620 /*
1621 * If we printed something, we'll need to print a blank before we
1622 * print something else.
1623 */
1624 if (m->desc[0])
1625 need_separator = 1;
1626 /* and any continuations that match */
1627 cont_level++;
1628 /*
1629 * while (m && m->next && m->next->cont_level != 0 && ( m = m->next
1630 * ))
1631 */
1632 m = m->next;
1633 while (m && (m->cont_level != 0)) {
1634 #if MIME_MAGIC_DEBUG
1635 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01534)
1636 MODNAME ": match line=%d cont=%d type=%d %s",
1637 m->lineno, m->cont_level, m->type,
1638 (m->type == STRING) ? m->value.s : "");
1639 #endif
1640 if (cont_level >= m->cont_level) {
1641 if (cont_level > m->cont_level) {
1642 /*
1643 * We're at the end of the level "cont_level"
1644 * continuations.
1645 */
1646 cont_level = m->cont_level;
1647 }
1648 if (mget(r, &p, s, m, nbytes) &&
1649 mcheck(r, &p, m)) {
1650 /*
1651 * This continuation matched. Print its message, with a
1652 * blank before it if the previous item printed and this
1653 * item isn't empty.
1654 */
1655 /* space if previous printed */
1656 if (need_separator
1657 && (m->nospflag == 0)
1658 && (m->desc[0] != '\0')
1659 ) {
1660 (void) magic_rsl_putchar(r, ' ');
1661 need_separator = 0;
1662 }
1663 mprint(r, &p, m);
1664 if (m->desc[0])
1665 need_separator = 1;
1666
1667 /*
1668 * If we see any continuations at a higher level, process
1669 * them.
1670 */
1671 cont_level++;
1672 }
1673 }
1674
1675 /* move to next continuation record */
1676 m = m->next;
1677 }
1678 #if MIME_MAGIC_DEBUG
1679 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01535)
1680 MODNAME ": matched after %d rules", rule_counter);
1681 #endif
1682 return 1; /* all through */
1683 }
1684 #if MIME_MAGIC_DEBUG
1685 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01536)
1686 MODNAME ": failed after %d rules", rule_counter);
1687 #endif
1688 return 0; /* no match at all */
1689 }
1690
mprint(request_rec * r,union VALUETYPE * p,struct magic * m)1691 static void mprint(request_rec *r, union VALUETYPE *p, struct magic *m)
1692 {
1693 char *pp;
1694 unsigned long v;
1695 char time_str[APR_CTIME_LEN];
1696
1697 switch (m->type) {
1698 case BYTE:
1699 v = p->b;
1700 break;
1701
1702 case SHORT:
1703 case BESHORT:
1704 case LESHORT:
1705 v = p->h;
1706 break;
1707
1708 case LONG:
1709 case BELONG:
1710 case LELONG:
1711 v = p->l;
1712 break;
1713
1714 case STRING:
1715 if (m->reln == '=') {
1716 (void) magic_rsl_printf(r, m->desc, m->value.s);
1717 }
1718 else {
1719 (void) magic_rsl_printf(r, m->desc, p->s);
1720 }
1721 return;
1722
1723 case DATE:
1724 case BEDATE:
1725 case LEDATE:
1726 apr_ctime(time_str, apr_time_from_sec(*(time_t *)&p->l));
1727 pp = time_str;
1728 (void) magic_rsl_printf(r, m->desc, pp);
1729 return;
1730 default:
1731 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01537)
1732 MODNAME ": invalid m->type (%d) in mprint().",
1733 m->type);
1734 return;
1735 }
1736
1737 v = signextend(r->server, m, v) & m->mask;
1738 (void) magic_rsl_printf(r, m->desc, (unsigned long) v);
1739 }
1740
1741 /*
1742 * Convert the byte order of the data we are looking at
1743 */
mconvert(request_rec * r,union VALUETYPE * p,struct magic * m)1744 static int mconvert(request_rec *r, union VALUETYPE *p, struct magic *m)
1745 {
1746 char *rt;
1747
1748 switch (m->type) {
1749 case BYTE:
1750 case SHORT:
1751 case LONG:
1752 case DATE:
1753 return 1;
1754 case STRING:
1755 /* Null terminate and eat the return */
1756 p->s[sizeof(p->s) - 1] = '\0';
1757 if ((rt = strchr(p->s, '\n')) != NULL)
1758 *rt = '\0';
1759 return 1;
1760 case BESHORT:
1761 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
1762 return 1;
1763 case BELONG:
1764 case BEDATE:
1765 p->l = (long)
1766 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
1767 return 1;
1768 case LESHORT:
1769 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
1770 return 1;
1771 case LELONG:
1772 case LEDATE:
1773 p->l = (long)
1774 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
1775 return 1;
1776 default:
1777 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01538)
1778 MODNAME ": invalid type %d in mconvert().", m->type);
1779 return 0;
1780 }
1781 }
1782
1783
mget(request_rec * r,union VALUETYPE * p,unsigned char * s,struct magic * m,apr_size_t nbytes)1784 static int mget(request_rec *r, union VALUETYPE *p, unsigned char *s,
1785 struct magic *m, apr_size_t nbytes)
1786 {
1787 long offset = m->offset;
1788
1789 if (offset + sizeof(union VALUETYPE) > nbytes)
1790 return 0;
1791
1792 memcpy(p, s + offset, sizeof(union VALUETYPE));
1793
1794 if (!mconvert(r, p, m))
1795 return 0;
1796
1797 if (m->flag & INDIR) {
1798
1799 switch (m->in.type) {
1800 case BYTE:
1801 offset = p->b + m->in.offset;
1802 break;
1803 case SHORT:
1804 offset = p->h + m->in.offset;
1805 break;
1806 case LONG:
1807 offset = p->l + m->in.offset;
1808 break;
1809 }
1810
1811 if (offset + sizeof(union VALUETYPE) > nbytes)
1812 return 0;
1813
1814 memcpy(p, s + offset, sizeof(union VALUETYPE));
1815
1816 if (!mconvert(r, p, m))
1817 return 0;
1818 }
1819 return 1;
1820 }
1821
mcheck(request_rec * r,union VALUETYPE * p,struct magic * m)1822 static int mcheck(request_rec *r, union VALUETYPE *p, struct magic *m)
1823 {
1824 register unsigned long l = m->value.l;
1825 register unsigned long v;
1826 int matched;
1827
1828 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
1829 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01539)
1830 MODNAME ": BOINK");
1831 return 1;
1832 }
1833
1834 switch (m->type) {
1835 case BYTE:
1836 v = p->b;
1837 break;
1838
1839 case SHORT:
1840 case BESHORT:
1841 case LESHORT:
1842 v = p->h;
1843 break;
1844
1845 case LONG:
1846 case BELONG:
1847 case LELONG:
1848 case DATE:
1849 case BEDATE:
1850 case LEDATE:
1851 v = p->l;
1852 break;
1853
1854 case STRING:
1855 l = 0;
1856 /*
1857 * What we want here is: v = strncmp(m->value.s, p->s, m->vallen);
1858 * but ignoring any nulls. bcmp doesn't give -/+/0 and isn't
1859 * universally available anyway.
1860 */
1861 v = 0;
1862 {
1863 register unsigned char *a = (unsigned char *) m->value.s;
1864 register unsigned char *b = (unsigned char *) p->s;
1865 register int len = m->vallen;
1866
1867 while (--len >= 0)
1868 if ((v = *b++ - *a++) != 0)
1869 break;
1870 }
1871 break;
1872 default:
1873 /* bogosity, pretend that it just wasn't a match */
1874 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01540)
1875 MODNAME ": invalid type %d in mcheck().", m->type);
1876 return 0;
1877 }
1878
1879 v = signextend(r->server, m, v) & m->mask;
1880
1881 switch (m->reln) {
1882 case 'x':
1883 #if MIME_MAGIC_DEBUG
1884 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01541)
1885 "%lu == *any* = 1", v);
1886 #endif
1887 matched = 1;
1888 break;
1889
1890 case '!':
1891 matched = v != l;
1892 #if MIME_MAGIC_DEBUG
1893 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01542)
1894 "%lu != %lu = %d", v, l, matched);
1895 #endif
1896 break;
1897
1898 case '=':
1899 matched = v == l;
1900 #if MIME_MAGIC_DEBUG
1901 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01543)
1902 "%lu == %lu = %d", v, l, matched);
1903 #endif
1904 break;
1905
1906 case '>':
1907 if (m->flag & UNSIGNED) {
1908 matched = v > l;
1909 #if MIME_MAGIC_DEBUG
1910 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01544)
1911 "%lu > %lu = %d", v, l, matched);
1912 #endif
1913 }
1914 else {
1915 matched = (long) v > (long) l;
1916 #if MIME_MAGIC_DEBUG
1917 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01545)
1918 "%ld > %ld = %d", v, l, matched);
1919 #endif
1920 }
1921 break;
1922
1923 case '<':
1924 if (m->flag & UNSIGNED) {
1925 matched = v < l;
1926 #if MIME_MAGIC_DEBUG
1927 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01546)
1928 "%lu < %lu = %d", v, l, matched);
1929 #endif
1930 }
1931 else {
1932 matched = (long) v < (long) l;
1933 #if MIME_MAGIC_DEBUG
1934 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01547)
1935 "%ld < %ld = %d", v, l, matched);
1936 #endif
1937 }
1938 break;
1939
1940 case '&':
1941 matched = (v & l) == l;
1942 #if MIME_MAGIC_DEBUG
1943 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01548)
1944 "((%lx & %lx) == %lx) = %d", v, l, l, matched);
1945 #endif
1946 break;
1947
1948 case '^':
1949 matched = (v & l) != l;
1950 #if MIME_MAGIC_DEBUG
1951 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01549)
1952 "((%lx & %lx) != %lx) = %d", v, l, l, matched);
1953 #endif
1954 break;
1955
1956 default:
1957 /* bogosity, pretend it didn't match */
1958 matched = 0;
1959 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01550)
1960 MODNAME ": mcheck: can't happen: invalid relation %d.",
1961 m->reln);
1962 break;
1963 }
1964
1965 return matched;
1966 }
1967
1968 /* an optimization over plain strcmp() */
1969 #define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
1970
ascmagic(request_rec * r,unsigned char * buf,apr_size_t nbytes)1971 static int ascmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
1972 {
1973 int has_escapes = 0;
1974 unsigned char *s;
1975 char nbuf[SMALL_HOWMANY + 1]; /* one extra for terminating '\0' */
1976 char *token;
1977 const struct names *p;
1978 int small_nbytes;
1979 char *strtok_state;
1980
1981 /* these are easy, do them first */
1982
1983 /*
1984 * for troff, look for . + letter + letter or .\"; this must be done to
1985 * disambiguate tar archives' ./file and other trash from real troff
1986 * input.
1987 */
1988 if (*buf == '.') {
1989 unsigned char *tp = buf + 1;
1990
1991 while (apr_isspace(*tp))
1992 ++tp; /* skip leading whitespace */
1993 if ((apr_isalnum(*tp) || *tp == '\\') &&
1994 (apr_isalnum(*(tp + 1)) || *tp == '"')) {
1995 magic_rsl_puts(r, "application/x-troff");
1996 return 1;
1997 }
1998 }
1999 if ((*buf == 'c' || *buf == 'C') && apr_isspace(*(buf + 1))) {
2000 /* Fortran */
2001 magic_rsl_puts(r, "text/plain");
2002 return 1;
2003 }
2004
2005 /* look for tokens from names.h - this is expensive!, so we'll limit
2006 * ourselves to only SMALL_HOWMANY bytes */
2007 small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes;
2008 /* make a copy of the buffer here because apr_strtok() will destroy it */
2009 s = (unsigned char *) memcpy(nbuf, buf, small_nbytes);
2010 s[small_nbytes] = '\0';
2011 has_escapes = (memchr(s, '\033', small_nbytes) != NULL);
2012 while ((token = apr_strtok((char *) s, " \t\n\r\f", &strtok_state)) != NULL) {
2013 s = NULL; /* make apr_strtok() keep on tokin' */
2014 for (p = names; p < names + NNAMES; p++) {
2015 if (STREQ(p->name, token)) {
2016 magic_rsl_puts(r, types[p->type]);
2017 if (has_escapes)
2018 magic_rsl_puts(r, " (with escape sequences)");
2019 return 1;
2020 }
2021 }
2022 }
2023
2024 switch (is_tar(buf, nbytes)) {
2025 case 1:
2026 /* V7 tar archive */
2027 magic_rsl_puts(r, "application/x-tar");
2028 return 1;
2029 case 2:
2030 /* POSIX tar archive */
2031 magic_rsl_puts(r, "application/x-tar");
2032 return 1;
2033 }
2034
2035 /* all else fails, but it is ascii... */
2036 return 0;
2037 }
2038
2039
2040 /*
2041 * compress routines: zmagic() - returns 0 if not recognized, uncompresses
2042 * and prints information if recognized uncompress(s, method, old, n, newch)
2043 * - uncompress old into new, using method, return sizeof new
2044 */
2045
2046 static const struct {
2047 const char *magic;
2048 apr_size_t maglen;
2049 const char *argv[3];
2050 int silent;
2051 const char *encoding; /* MUST be lowercase */
2052 } compr[] = {
2053
2054 /* we use gzip here rather than uncompress because we have to pass
2055 * it a full filename -- and uncompress only considers filenames
2056 * ending with .Z
2057 */
2058 {
2059 "\037\235", 2, {
2060 "gzip", "-dcq", NULL
2061 }, 0, "x-compress"
2062 },
2063 {
2064 "\037\213", 2, {
2065 "gzip", "-dcq", NULL
2066 }, 1, "x-gzip"
2067 },
2068 /*
2069 * XXX pcat does not work, cause I don't know how to make it read stdin,
2070 * so we use gzip
2071 */
2072 {
2073 "\037\036", 2, {
2074 "gzip", "-dcq", NULL
2075 }, 0, "x-gzip"
2076 },
2077 };
2078
2079 #define ncompr (sizeof(compr) / sizeof(compr[0]))
2080
zmagic(request_rec * r,unsigned char * buf,apr_size_t nbytes)2081 static int zmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
2082 {
2083 unsigned char *newbuf;
2084 int newsize;
2085 int i;
2086
2087 for (i = 0; i < ncompr; i++) {
2088 if (nbytes < compr[i].maglen)
2089 continue;
2090 if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0)
2091 break;
2092 }
2093
2094 if (i == ncompr)
2095 return 0;
2096
2097 if ((newsize = uncompress(r, i, &newbuf, HOWMANY)) > 0) {
2098 /* set encoding type in the request record */
2099 r->content_encoding = compr[i].encoding;
2100
2101 newbuf[newsize-1] = '\0'; /* null-terminate uncompressed data */
2102 /* Try to detect the content type of the uncompressed data */
2103 if (tryit(r, newbuf, newsize, 0) != OK) {
2104 return 0;
2105 }
2106 }
2107 return 1;
2108 }
2109
2110
2111 struct uncompress_parms {
2112 request_rec *r;
2113 int method;
2114 };
2115
create_uncompress_child(struct uncompress_parms * parm,apr_pool_t * cntxt,apr_file_t ** pipe_in)2116 static int create_uncompress_child(struct uncompress_parms *parm, apr_pool_t *cntxt,
2117 apr_file_t **pipe_in)
2118 {
2119 int rc = 1;
2120 const char *new_argv[4];
2121 request_rec *r = parm->r;
2122 apr_pool_t *child_context = cntxt;
2123 apr_procattr_t *procattr;
2124 apr_proc_t *procnew;
2125
2126 /* XXX missing 1.3 logic:
2127 *
2128 * what happens when !compr[parm->method].silent?
2129 * Should we create the err pipe, read it, and copy to the log?
2130 */
2131
2132 if ((apr_procattr_create(&procattr, child_context) != APR_SUCCESS) ||
2133 (apr_procattr_io_set(procattr, APR_FULL_BLOCK,
2134 APR_FULL_BLOCK, APR_NO_PIPE) != APR_SUCCESS) ||
2135 (apr_procattr_dir_set(procattr,
2136 ap_make_dirstr_parent(r->pool, r->filename)) != APR_SUCCESS) ||
2137 (apr_procattr_cmdtype_set(procattr, APR_PROGRAM_PATH) != APR_SUCCESS)) {
2138 /* Something bad happened, tell the world. */
2139 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01551)
2140 "couldn't setup child process: %s", r->filename);
2141 }
2142 else {
2143 new_argv[0] = compr[parm->method].argv[0];
2144 new_argv[1] = compr[parm->method].argv[1];
2145 new_argv[2] = r->filename;
2146 new_argv[3] = NULL;
2147
2148 procnew = apr_pcalloc(child_context, sizeof(*procnew));
2149 rc = apr_proc_create(procnew, compr[parm->method].argv[0],
2150 new_argv, NULL, procattr, child_context);
2151
2152 if (rc != APR_SUCCESS) {
2153 /* Bad things happened. Everyone should have cleaned up. */
2154 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01552)
2155 MODNAME ": could not execute `%s'.",
2156 compr[parm->method].argv[0]);
2157 }
2158 else {
2159 apr_pool_note_subprocess(child_context, procnew, APR_KILL_AFTER_TIMEOUT);
2160 *pipe_in = procnew->out;
2161 }
2162 }
2163
2164 return (rc);
2165 }
2166
uncompress(request_rec * r,int method,unsigned char ** newch,apr_size_t n)2167 static int uncompress(request_rec *r, int method,
2168 unsigned char **newch, apr_size_t n)
2169 {
2170 struct uncompress_parms parm;
2171 apr_file_t *pipe_out = NULL;
2172 apr_pool_t *sub_context;
2173 apr_status_t rv;
2174
2175 parm.r = r;
2176 parm.method = method;
2177
2178 /* We make a sub_pool so that we can collect our child early, otherwise
2179 * there are cases (i.e. generating directory indices with mod_autoindex)
2180 * where we would end up with LOTS of zombies.
2181 */
2182 if (apr_pool_create(&sub_context, r->pool) != APR_SUCCESS)
2183 return -1;
2184 apr_pool_tag(sub_context, "magic_uncompress");
2185
2186 if ((rv = create_uncompress_child(&parm, sub_context, &pipe_out)) != APR_SUCCESS) {
2187 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01553)
2188 MODNAME ": couldn't spawn uncompress process: %s", r->uri);
2189 return -1;
2190 }
2191
2192 *newch = (unsigned char *) apr_palloc(r->pool, n);
2193 rv = apr_file_read(pipe_out, *newch, &n);
2194 if (n == 0) {
2195 apr_pool_destroy(sub_context);
2196 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01554)
2197 MODNAME ": read failed from uncompress of %s", r->filename);
2198 return -1;
2199 }
2200 apr_pool_destroy(sub_context);
2201 return n;
2202 }
2203
2204 /*
2205 * is_tar() -- figure out whether file is a tar archive.
2206 *
2207 * Stolen (by author of file utility) from the public domain tar program: Public
2208 * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
2209 *
2210 * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
2211 * 1997/06/24 00:41:02 ikluft Exp ikluft $
2212 *
2213 * Comments changed and some code/comments reformatted for file command by Ian
2214 * Darwin.
2215 */
2216
2217 #define isodigit(c) (((unsigned char)(c) >= '0') && ((unsigned char)(c) <= '7'))
2218
2219 /*
2220 * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
2221 * old UNIX tar file, 2 for Unix Std (POSIX) tar file.
2222 */
2223
is_tar(unsigned char * buf,apr_size_t nbytes)2224 static int is_tar(unsigned char *buf, apr_size_t nbytes)
2225 {
2226 register union record *header = (union record *) buf;
2227 register int i;
2228 register long sum, recsum;
2229 register char *p;
2230
2231 if (nbytes < sizeof(union record))
2232 return 0;
2233
2234 recsum = from_oct(8, header->header.chksum);
2235
2236 sum = 0;
2237 p = header->charptr;
2238 for (i = sizeof(union record); --i >= 0;) {
2239 /*
2240 * We can't use unsigned char here because of old compilers, e.g. V7.
2241 */
2242 sum += 0xFF & *p++;
2243 }
2244
2245 /* Adjust checksum to count the "chksum" field as blanks. */
2246 for (i = sizeof(header->header.chksum); --i >= 0;)
2247 sum -= 0xFF & header->header.chksum[i];
2248 sum += ' ' * sizeof header->header.chksum;
2249
2250 if (sum != recsum)
2251 return 0; /* Not a tar archive */
2252
2253 if (0 == strcmp(header->header.magic, TMAGIC))
2254 return 2; /* Unix Standard tar archive */
2255
2256 return 1; /* Old fashioned tar archive */
2257 }
2258
2259
2260 /*
2261 * Quick and dirty octal conversion.
2262 *
2263 * Result is -1 if the field is invalid (all blank, or nonoctal).
2264 */
from_oct(int digs,char * where)2265 static long from_oct(int digs, char *where)
2266 {
2267 register long value;
2268
2269 while (apr_isspace(*where)) { /* Skip spaces */
2270 where++;
2271 if (--digs <= 0)
2272 return -1; /* All blank field */
2273 }
2274 value = 0;
2275 while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */
2276 value = (value << 3) | (*where++ - '0');
2277 --digs;
2278 }
2279
2280 if (digs > 0 && *where && !apr_isspace(*where))
2281 return -1; /* Ended on non-space/nul */
2282
2283 return value;
2284 }
2285
2286 /*
2287 * Check for file-revision suffix
2288 *
2289 * This is for an obscure document control system used on an intranet.
2290 * The web representation of each file's revision has an @1, @2, etc
2291 * appended with the revision number. This needs to be stripped off to
2292 * find the file suffix, which can be recognized by sending the name back
2293 * through a sub-request. The base file name (without the @num suffix)
2294 * must exist because its type will be used as the result.
2295 */
revision_suffix(request_rec * r)2296 static int revision_suffix(request_rec *r)
2297 {
2298 int suffix_pos, result;
2299 char *sub_filename;
2300 request_rec *sub;
2301
2302 #if MIME_MAGIC_DEBUG
2303 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01555)
2304 MODNAME ": revision_suffix checking %s", r->filename);
2305 #endif /* MIME_MAGIC_DEBUG */
2306
2307 /* check for recognized revision suffix */
2308 suffix_pos = strlen(r->filename) - 1;
2309 if (!apr_isdigit(r->filename[suffix_pos])) {
2310 return 0;
2311 }
2312 while (suffix_pos >= 0 && apr_isdigit(r->filename[suffix_pos]))
2313 suffix_pos--;
2314 if (suffix_pos < 0 || r->filename[suffix_pos] != '@') {
2315 return 0;
2316 }
2317
2318 /* perform sub-request for the file name without the suffix */
2319 result = 0;
2320 sub_filename = apr_pstrndup(r->pool, r->filename, suffix_pos);
2321 #if MIME_MAGIC_DEBUG
2322 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01556)
2323 MODNAME ": subrequest lookup for %s", sub_filename);
2324 #endif /* MIME_MAGIC_DEBUG */
2325 sub = ap_sub_req_lookup_file(sub_filename, r, NULL);
2326
2327 /* extract content type/encoding/language from sub-request */
2328 if (sub->content_type) {
2329 ap_set_content_type(r, apr_pstrdup(r->pool, sub->content_type));
2330 #if MIME_MAGIC_DEBUG
2331 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01557)
2332 MODNAME ": subrequest %s got %s",
2333 sub_filename, r->content_type);
2334 #endif /* MIME_MAGIC_DEBUG */
2335 if (sub->content_encoding)
2336 r->content_encoding =
2337 apr_pstrdup(r->pool, sub->content_encoding);
2338 if (sub->content_languages) {
2339 int n;
2340 r->content_languages = apr_array_copy(r->pool,
2341 sub->content_languages);
2342 for (n = 0; n < r->content_languages->nelts; ++n) {
2343 char **lang = ((char **)r->content_languages->elts) + n;
2344 *lang = apr_pstrdup(r->pool, *lang);
2345 }
2346 }
2347 result = 1;
2348 }
2349
2350 /* clean up */
2351 ap_destroy_sub_req(sub);
2352
2353 return result;
2354 }
2355
2356 /*
2357 * initialize the module
2358 */
magic_init(apr_pool_t * p,apr_pool_t * plog,apr_pool_t * ptemp,server_rec * main_server)2359 static int magic_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
2360 {
2361 int result;
2362 magic_server_config_rec *conf;
2363 magic_server_config_rec *main_conf;
2364 server_rec *s;
2365 #if MIME_MAGIC_DEBUG
2366 struct magic *m, *prevm;
2367 #endif /* MIME_MAGIC_DEBUG */
2368
2369 main_conf = ap_get_module_config(main_server->module_config, &mime_magic_module);
2370 for (s = main_server; s; s = s->next) {
2371 conf = ap_get_module_config(s->module_config, &mime_magic_module);
2372 if (conf->magicfile == NULL && s != main_server) {
2373 /* inherits from the parent */
2374 *conf = *main_conf;
2375 }
2376 else if (conf->magicfile) {
2377 result = apprentice(s, p);
2378 if (result == -1)
2379 return OK;
2380 #if MIME_MAGIC_DEBUG
2381 prevm = 0;
2382 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01558)
2383 MODNAME ": magic_init 1 test");
2384 for (m = conf->magic; m; m = m->next) {
2385 if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
2386 apr_isprint((((unsigned long) m) >> 16) & 255) &&
2387 apr_isprint((((unsigned long) m) >> 8) & 255) &&
2388 apr_isprint(((unsigned long) m) & 255)) {
2389 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01559)
2390 MODNAME ": magic_init 1: POINTER CLOBBERED! "
2391 "m=\"%c%c%c%c\" line=%d",
2392 (((unsigned long) m) >> 24) & 255,
2393 (((unsigned long) m) >> 16) & 255,
2394 (((unsigned long) m) >> 8) & 255,
2395 ((unsigned long) m) & 255,
2396 prevm ? prevm->lineno : -1);
2397 break;
2398 }
2399 prevm = m;
2400 }
2401 #endif
2402 }
2403 }
2404 return OK;
2405 }
2406
2407 /*
2408 * Find the Content-Type from any resource this module has available
2409 */
2410
magic_find_ct(request_rec * r)2411 static int magic_find_ct(request_rec *r)
2412 {
2413 int result;
2414 magic_server_config_rec *conf;
2415
2416 /* the file has to exist */
2417 if (r->finfo.filetype == APR_NOFILE || !r->filename) {
2418 return DECLINED;
2419 }
2420
2421 /* was someone else already here? */
2422 if (r->content_type) {
2423 return DECLINED;
2424 }
2425
2426 conf = ap_get_module_config(r->server->module_config, &mime_magic_module);
2427 if (!conf || !conf->magic) {
2428 return DECLINED;
2429 }
2430
2431 /* initialize per-request info */
2432 if (!magic_set_config(r)) {
2433 return HTTP_INTERNAL_SERVER_ERROR;
2434 }
2435
2436 /* try excluding file-revision suffixes */
2437 if (revision_suffix(r) != 1) {
2438 /* process it based on the file contents */
2439 if ((result = magic_process(r)) != OK) {
2440 return result;
2441 }
2442 }
2443
2444 /* if we have any results, put them in the request structure */
2445 return magic_rsl_to_request(r);
2446 }
2447
register_hooks(apr_pool_t * p)2448 static void register_hooks(apr_pool_t *p)
2449 {
2450 static const char * const aszPre[]={ "mod_mime.c", NULL };
2451
2452 /* mod_mime_magic should be run after mod_mime, if at all. */
2453
2454 ap_hook_type_checker(magic_find_ct, aszPre, NULL, APR_HOOK_MIDDLE);
2455 ap_hook_post_config(magic_init, NULL, NULL, APR_HOOK_FIRST);
2456 }
2457
2458 /*
2459 * Apache API module interface
2460 */
2461
2462 AP_DECLARE_MODULE(mime_magic) =
2463 {
2464 STANDARD20_MODULE_STUFF,
2465 NULL, /* dir config creator */
2466 NULL, /* dir merger --- default is to override */
2467 create_magic_server_config, /* server config */
2468 merge_magic_server_config, /* merge server config */
2469 mime_magic_cmds, /* command apr_table_t */
2470 register_hooks /* register hooks */
2471 };
2472