1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * mod_mime_magic: MIME type lookup via file magic numbers
19  * Copyright (c) 1996-1997 Cisco Systems, Inc.
20  *
21  * This software was submitted by Cisco Systems to the Apache Software Foundation in July
22  * 1997.  Future revisions and derivatives of this source code must
23  * acknowledge Cisco Systems as the original contributor of this module.
24  * All other licensing and usage conditions are those of the Apache Software Foundation.
25  *
26  * Some of this code is derived from the free version of the file command
27  * originally posted to comp.sources.unix.  Copyright info for that program
28  * is included below as required.
29  * ---------------------------------------------------------------------------
30  * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin.
31  *
32  * This software is not subject to any license of the American Telephone and
33  * Telegraph Company or of the Regents of the University of California.
34  *
35  * Permission is granted to anyone to use this software for any purpose on any
36  * computer system, and to alter it and redistribute it freely, subject to
37  * the following restrictions:
38  *
39  * 1. The author is not responsible for the consequences of use of this
40  * software, no matter how awful, even if they arise from flaws in it.
41  *
42  * 2. The origin of this software must not be misrepresented, either by
43  * explicit claim or by omission.  Since few users ever read sources, credits
44  * must appear in the documentation.
45  *
46  * 3. Altered versions must be plainly marked as such, and must not be
47  * misrepresented as being the original software.  Since few users ever read
48  * sources, credits must appear in the documentation.
49  *
50  * 4. This notice may not be removed or altered.
51  * -------------------------------------------------------------------------
52  *
53  * For compliance with Mr Darwin's terms: this has been very significantly
54  * modified from the free "file" command.
55  * - all-in-one file for compilation convenience when moving from one
56  *   version of Apache to the next.
57  * - Memory allocation is done through the Apache API's apr_pool_t structure.
58  * - All functions have had necessary Apache API request or server
59  *   structures passed to them where necessary to call other Apache API
60  *   routines.  (i.e. usually for logging, files, or memory allocation in
61  *   itself or a called function.)
62  * - struct magic has been converted from an array to a single-ended linked
63  *   list because it only grows one record at a time, it's only accessed
64  *   sequentially, and the Apache API has no equivalent of realloc().
65  * - Functions have been changed to get their parameters from the server
66  *   configuration instead of globals.  (It should be reentrant now but has
67  *   not been tested in a threaded environment.)
68  * - Places where it used to print results to stdout now saves them in a
69  *   list where they're used to set the MIME type in the Apache request
70  *   record.
71  * - Command-line flags have been removed since they will never be used here.
72  *
73  * Ian Kluft <ikluft@cisco.com>
74  * Engineering Information Framework
75  * Central Engineering
76  * Cisco Systems, Inc.
77  * San Jose, CA, USA
78  *
79  * Initial installation          July/August 1996
80  * Misc bug fixes                May 1997
81  * Submission to Apache Software Foundation    July 1997
82  *
83  */
84 
85 #include "apr.h"
86 #include "apr_strings.h"
87 #include "apr_lib.h"
88 #define APR_WANT_STRFUNC
89 #include "apr_want.h"
90 
91 #if APR_HAVE_UNISTD_H
92 #include <unistd.h>
93 #endif
94 
95 #include "ap_config.h"
96 #include "httpd.h"
97 #include "http_config.h"
98 #include "http_request.h"
99 #include "http_core.h"
100 #include "http_log.h"
101 #include "http_protocol.h"
102 #include "util_script.h"
103 
104 /* ### this isn't set by configure? does anybody set this? */
105 #ifdef HAVE_UTIME_H
106 #include <utime.h>
107 #endif
108 
109 /*
110  * data structures and related constants
111  */
112 
113 #define MODNAME        "mod_mime_magic"
114 #define MIME_MAGIC_DEBUG        0
115 
116 #define MIME_BINARY_UNKNOWN    "application/octet-stream"
117 #define MIME_TEXT_UNKNOWN    "text/plain"
118 
119 #define MAXMIMESTRING        256
120 
121 /* HOWMANY must be at least 4096 to make gzip -dcq work */
122 #define HOWMANY  4096
123 /* SMALL_HOWMANY limits how much work we do to figure out text files */
124 #define SMALL_HOWMANY 1024
125 #define MAXDESC    50   /* max leng of text description */
126 #define MAXstring 64    /* max leng of "string" types */
127 
128 struct magic {
129     struct magic *next;     /* link to next entry */
130     int lineno;             /* line number from magic file */
131 
132     short flag;
133 #define INDIR  1            /* if '>(...)' appears,  */
134 #define UNSIGNED 2          /* comparison is unsigned */
135     short cont_level;       /* level of ">" */
136     struct {
137         char type;          /* byte short long */
138         long offset;        /* offset from indirection */
139     } in;
140     long offset;            /* offset to magic number */
141     unsigned char reln;     /* relation (0=eq, '>'=gt, etc) */
142     char type;              /* int, short, long or string. */
143     char vallen;            /* length of string value, if any */
144 #define BYTE      1
145 #define SHORT     2
146 #define LONG      4
147 #define STRING    5
148 #define DATE      6
149 #define BESHORT   7
150 #define BELONG    8
151 #define BEDATE    9
152 #define LESHORT  10
153 #define LELONG   11
154 #define LEDATE   12
155     union VALUETYPE {
156         unsigned char b;
157         unsigned short h;
158         unsigned long l;
159         char s[MAXstring];
160         unsigned char hs[2];   /* 2 bytes of a fixed-endian "short" */
161         unsigned char hl[4];   /* 2 bytes of a fixed-endian "long" */
162     } value;                   /* either number or string */
163     unsigned long mask;        /* mask before comparison with value */
164     char nospflag;             /* suppress space character */
165 
166     /* NOTE: this string is suspected of overrunning - find it! */
167     char desc[MAXDESC];        /* description */
168 };
169 
170 /*
171  * data structures for tar file recognition
172  * --------------------------------------------------------------------------
173  * Header file for public domain tar (tape archive) program.
174  *
175  * @(#)tar.h 1.20 86/10/29    Public Domain. Created 25 August 1985 by John
176  * Gilmore, ihnp4!hoptoad!gnu.
177  *
178  * Header block on tape.
179  *
180  * I'm going to use traditional DP naming conventions here. A "block" is a big
181  * chunk of stuff that we do I/O on. A "record" is a piece of info that we
182  * care about. Typically many "record"s fit into a "block".
183  */
184 #define RECORDSIZE    512
185 #define NAMSIZ    100
186 #define TUNMLEN    32
187 #define TGNMLEN    32
188 
189 union record {
190     char charptr[RECORDSIZE];
191     struct header {
192         char name[NAMSIZ];
193         char mode[8];
194         char uid[8];
195         char gid[8];
196         char size[12];
197         char mtime[12];
198         char chksum[8];
199         char linkflag;
200         char linkname[NAMSIZ];
201         char magic[8];
202         char uname[TUNMLEN];
203         char gname[TGNMLEN];
204         char devmajor[8];
205         char devminor[8];
206     } header;
207 };
208 
209 /* The magic field is filled with this if uname and gname are valid. */
210 #define    TMAGIC        "ustar  "   /* 7 chars and a null */
211 
212 /*
213  * file-function prototypes
214  */
215 static int ascmagic(request_rec *, unsigned char *, apr_size_t);
216 static int is_tar(unsigned char *, apr_size_t);
217 static int softmagic(request_rec *, unsigned char *, apr_size_t);
218 static int tryit(request_rec *, unsigned char *, apr_size_t, int);
219 static int zmagic(request_rec *, unsigned char *, apr_size_t);
220 
221 static int getvalue(server_rec *, struct magic *, char **);
222 static int hextoint(int);
223 static char *getstr(server_rec *, char *, char *, int, int *);
224 static int parse(server_rec *, apr_pool_t *p, char *, int);
225 
226 static int match(request_rec *, unsigned char *, apr_size_t);
227 static int mget(request_rec *, union VALUETYPE *, unsigned char *,
228                 struct magic *, apr_size_t);
229 static int mcheck(request_rec *, union VALUETYPE *, struct magic *);
230 static void mprint(request_rec *, union VALUETYPE *, struct magic *);
231 
232 static int uncompress(request_rec *, int,
233                       unsigned char **, apr_size_t);
234 static long from_oct(int, char *);
235 static int fsmagic(request_rec *r, const char *fn);
236 
237 /*
238  * includes for ASCII substring recognition formerly "names.h" in file
239  * command
240  *
241  * Original notes: names and types used by ascmagic in file(1). These tokens are
242  * here because they can appear anywhere in the first HOWMANY bytes, while
243  * tokens in /etc/magic must appear at fixed offsets into the file. Don't
244  * make HOWMANY too high unless you have a very fast CPU.
245  */
246 
247 /* these types are used to index the apr_table_t 'types': keep em in sync! */
248 /* HTML inserted in first because this is a web server module now */
249 #define L_HTML    0   /* HTML */
250 #define L_C       1   /* first and foremost on UNIX */
251 #define L_FORT    2   /* the oldest one */
252 #define L_MAKE    3   /* Makefiles */
253 #define L_PLI     4   /* PL/1 */
254 #define L_MACH    5   /* some kinda assembler */
255 #define L_ENG     6   /* English */
256 #define L_PAS     7   /* Pascal */
257 #define L_MAIL    8   /* Electronic mail */
258 #define L_NEWS    9   /* Usenet Netnews */
259 
260 static const char *const types[] =
261 {
262     "text/html",             /* HTML */
263     "text/plain",            /* "c program text", */
264     "text/plain",            /* "fortran program text", */
265     "text/plain",            /* "make commands text", */
266     "text/plain",            /* "pl/1 program text", */
267     "text/plain",            /* "assembler program text", */
268     "text/plain",            /* "English text", */
269     "text/plain",            /* "pascal program text", */
270     "message/rfc822",        /* "mail text", */
271     "message/news",          /* "news text", */
272     "application/binary",    /* "can't happen error on names.h/types", */
273     0
274 };
275 
276 static const struct names {
277     const char *name;
278     short type;
279 } names[] = {
280 
281     /* These must be sorted by eye for optimal hit rate */
282     /* Add to this list only after substantial meditation */
283     {
284         "<html>", L_HTML
285     },
286     {
287         "<HTML>", L_HTML
288     },
289     {
290         "<head>", L_HTML
291     },
292     {
293         "<HEAD>", L_HTML
294     },
295     {
296         "<title>", L_HTML
297     },
298     {
299         "<TITLE>", L_HTML
300     },
301     {
302         "<h1>", L_HTML
303     },
304     {
305         "<H1>", L_HTML
306     },
307     {
308         "<!--", L_HTML
309     },
310     {
311         "<!DOCTYPE HTML", L_HTML
312     },
313     {
314         "/*", L_C
315     },               /* must precede "The", "the", etc. */
316     {
317         "#include", L_C
318     },
319     {
320         "char", L_C
321     },
322     {
323         "The", L_ENG
324     },
325     {
326         "the", L_ENG
327     },
328     {
329         "double", L_C
330     },
331     {
332         "extern", L_C
333     },
334     {
335         "float", L_C
336     },
337     {
338         "real", L_C
339     },
340     {
341         "struct", L_C
342     },
343     {
344         "union", L_C
345     },
346     {
347         "CFLAGS", L_MAKE
348     },
349     {
350         "LDFLAGS", L_MAKE
351     },
352     {
353         "all:", L_MAKE
354     },
355     {
356         ".PRECIOUS", L_MAKE
357     },
358     /*
359      * Too many files of text have these words in them.  Find another way to
360      * recognize Fortrash.
361      */
362 #ifdef    NOTDEF
363     {
364         "subroutine", L_FORT
365     },
366     {
367         "function", L_FORT
368     },
369     {
370         "block", L_FORT
371     },
372     {
373         "common", L_FORT
374     },
375     {
376         "dimension", L_FORT
377     },
378     {
379         "integer", L_FORT
380     },
381     {
382         "data", L_FORT
383     },
384 #endif /* NOTDEF */
385     {
386         ".ascii", L_MACH
387     },
388     {
389         ".asciiz", L_MACH
390     },
391     {
392         ".byte", L_MACH
393     },
394     {
395         ".even", L_MACH
396     },
397     {
398         ".globl", L_MACH
399     },
400     {
401         "clr", L_MACH
402     },
403     {
404         "(input,", L_PAS
405     },
406     {
407         "dcl", L_PLI
408     },
409     {
410         "Received:", L_MAIL
411     },
412     {
413         ">From", L_MAIL
414     },
415     {
416         "Return-Path:", L_MAIL
417     },
418     {
419         "Cc:", L_MAIL
420     },
421     {
422         "Newsgroups:", L_NEWS
423     },
424     {
425         "Path:", L_NEWS
426     },
427     {
428         "Organization:", L_NEWS
429     },
430     {
431         NULL, 0
432     }
433 };
434 
435 #define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)
436 
437 /*
438  * Result String List (RSL)
439  *
440  * The file(1) command prints its output.  Instead, we store the various
441  * "printed" strings in a list (allocating memory as we go) and concatenate
442  * them at the end when we finally know how much space they'll need.
443  */
444 
445 typedef struct magic_rsl_s {
446     const char *str;                  /* string, possibly a fragment */
447     struct magic_rsl_s *next;   /* pointer to next fragment */
448 } magic_rsl;
449 
450 /*
451  * Apache module configuration structures
452  */
453 
454 /* per-server info */
455 typedef struct {
456     const char *magicfile;    /* where magic be found */
457     struct magic *magic;      /* head of magic config list */
458     struct magic *last;
459 } magic_server_config_rec;
460 
461 /* per-request info */
462 typedef struct {
463     magic_rsl *head;          /* result string list */
464     magic_rsl *tail;
465 } magic_req_rec;
466 
467 /*
468  * configuration functions - called by Apache API routines
469  */
470 
471 module AP_MODULE_DECLARE_DATA mime_magic_module;
472 
create_magic_server_config(apr_pool_t * p,server_rec * d)473 static void *create_magic_server_config(apr_pool_t *p, server_rec *d)
474 {
475     /* allocate the config - use pcalloc because it needs to be zeroed */
476     return apr_pcalloc(p, sizeof(magic_server_config_rec));
477 }
478 
merge_magic_server_config(apr_pool_t * p,void * basev,void * addv)479 static void *merge_magic_server_config(apr_pool_t *p, void *basev, void *addv)
480 {
481     magic_server_config_rec *base = (magic_server_config_rec *) basev;
482     magic_server_config_rec *add = (magic_server_config_rec *) addv;
483     magic_server_config_rec *new = (magic_server_config_rec *)
484                             apr_palloc(p, sizeof(magic_server_config_rec));
485 
486     new->magicfile = add->magicfile ? add->magicfile : base->magicfile;
487     new->magic = NULL;
488     new->last = NULL;
489     return new;
490 }
491 
set_magicfile(cmd_parms * cmd,void * dummy,const char * arg)492 static const char *set_magicfile(cmd_parms *cmd, void *dummy, const char *arg)
493 {
494     magic_server_config_rec *conf = (magic_server_config_rec *)
495     ap_get_module_config(cmd->server->module_config,
496                       &mime_magic_module);
497 
498     if (!conf) {
499         return MODNAME ": server structure not allocated";
500     }
501     conf->magicfile = arg;
502     return NULL;
503 }
504 
505 /*
506  * configuration file commands - exported to Apache API
507  */
508 
509 static const command_rec mime_magic_cmds[] =
510 {
511     AP_INIT_TAKE1("MimeMagicFile", set_magicfile, NULL, RSRC_CONF,
512      "Path to MIME Magic file (in file(1) format)"),
513     {NULL}
514 };
515 
516 /*
517  * RSL (result string list) processing routines
518  *
519  * These collect strings that would have been printed in fragments by file(1)
520  * into a list of magic_rsl structures with the strings. When complete,
521  * they're concatenated together to become the MIME content and encoding
522  * types.
523  *
524  * return value conventions for these functions: functions which return int:
525  * failure = -1, other = result functions which return pointers: failure = 0,
526  * other = result
527  */
528 
529 /* allocate a per-request structure and put it in the request record */
magic_set_config(request_rec * r)530 static magic_req_rec *magic_set_config(request_rec *r)
531 {
532     magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool,
533                                                       sizeof(magic_req_rec));
534 
535     req_dat->head = req_dat->tail = (magic_rsl *) NULL;
536     ap_set_module_config(r->request_config, &mime_magic_module, req_dat);
537     return req_dat;
538 }
539 
540 /* add a string to the result string list for this request */
541 /* it is the responsibility of the caller to allocate "str" */
magic_rsl_add(request_rec * r,const char * str)542 static int magic_rsl_add(request_rec *r, const char *str)
543 {
544     magic_req_rec *req_dat = (magic_req_rec *)
545                     ap_get_module_config(r->request_config, &mime_magic_module);
546     magic_rsl *rsl;
547 
548     /* make sure we have a list to put it in */
549     if (!req_dat) {
550         ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EINVAL, r, APLOGNO(01507)
551                     MODNAME ": request config should not be NULL");
552         if (!(req_dat = magic_set_config(r))) {
553             /* failure */
554             return -1;
555         }
556     }
557 
558     /* allocate the list entry */
559     rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl));
560 
561     /* fill it */
562     rsl->str = str;
563     rsl->next = (magic_rsl *) NULL;
564 
565     /* append to the list */
566     if (req_dat->head && req_dat->tail) {
567         req_dat->tail->next = rsl;
568         req_dat->tail = rsl;
569     }
570     else {
571         req_dat->head = req_dat->tail = rsl;
572     }
573 
574     /* success */
575     return 0;
576 }
577 
578 /* RSL hook for puts-type functions */
magic_rsl_puts(request_rec * r,const char * str)579 static int magic_rsl_puts(request_rec *r, const char *str)
580 {
581     return magic_rsl_add(r, str);
582 }
583 
584 /* RSL hook for printf-type functions */
magic_rsl_printf(request_rec * r,char * str,...)585 static int magic_rsl_printf(request_rec *r, char *str,...)
586 {
587     va_list ap;
588 
589     char buf[MAXMIMESTRING];
590 
591     /* assemble the string into the buffer */
592     va_start(ap, str);
593     apr_vsnprintf(buf, sizeof(buf), str, ap);
594     va_end(ap);
595 
596     /* add the buffer to the list */
597     return magic_rsl_add(r, apr_pstrdup(r->pool, buf));
598 }
599 
600 /* RSL hook for putchar-type functions */
magic_rsl_putchar(request_rec * r,char c)601 static int magic_rsl_putchar(request_rec *r, char c)
602 {
603     char str[2];
604 
605     /* high overhead for 1 char - just hope they don't do this much */
606     str[0] = c;
607     str[1] = '\0';
608     return magic_rsl_add(r, apr_pstrdup(r->pool, str));
609 }
610 
611 /* allocate and copy a contiguous string from a result string list */
rsl_strdup(request_rec * r,int start_frag,int start_pos,int len)612 static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len)
613 {
614     char *result;       /* return value */
615     int cur_frag,       /* current fragment number/counter */
616         cur_pos,        /* current position within fragment */
617         res_pos;        /* position in result string */
618     magic_rsl *frag;    /* list-traversal pointer */
619     magic_req_rec *req_dat = (magic_req_rec *)
620                     ap_get_module_config(r->request_config, &mime_magic_module);
621 
622     /* allocate the result string */
623     result = (char *) apr_palloc(r->pool, len + 1);
624 
625     /* loop through and collect the string */
626     res_pos = 0;
627     for (frag = req_dat->head, cur_frag = 0;
628          frag->next;
629          frag = frag->next, cur_frag++) {
630         /* loop to the first fragment */
631         if (cur_frag < start_frag)
632             continue;
633 
634         /* loop through and collect chars */
635         for (cur_pos = (cur_frag == start_frag) ? start_pos : 0;
636              frag->str[cur_pos];
637              cur_pos++) {
638             if (cur_frag >= start_frag
639                 && cur_pos >= start_pos
640                 && res_pos <= len) {
641                 result[res_pos++] = frag->str[cur_pos];
642                 if (res_pos > len) {
643                     break;
644                 }
645             }
646         }
647     }
648 
649     /* clean up and return */
650     result[res_pos] = 0;
651 #if MIME_MAGIC_DEBUG
652     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01508)
653              MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result);
654 #endif
655     return result;
656 }
657 
658 /* states for the state-machine algorithm in magic_rsl_to_request() */
659 typedef enum {
660     rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
661 } rsl_states;
662 
663 /* process the RSL and set the MIME info in the request record */
magic_rsl_to_request(request_rec * r)664 static int magic_rsl_to_request(request_rec *r)
665 {
666     int cur_frag,         /* current fragment number/counter */
667         cur_pos,          /* current position within fragment */
668         type_frag,        /* content type starting point: fragment */
669         type_pos,         /* content type starting point: position */
670         type_len,         /* content type length */
671         encoding_frag,    /* content encoding starting point: fragment */
672         encoding_pos,     /* content encoding starting point: position */
673         encoding_len;     /* content encoding length */
674 
675     char *tmp;
676     magic_rsl *frag;      /* list-traversal pointer */
677     rsl_states state;
678 
679     magic_req_rec *req_dat = (magic_req_rec *)
680                     ap_get_module_config(r->request_config, &mime_magic_module);
681 
682     /* check if we have a result */
683     if (!req_dat || !req_dat->head) {
684         /* empty - no match, we defer to other Apache modules */
685         return DECLINED;
686     }
687 
688     /* start searching for the type and encoding */
689     state = rsl_leading_space;
690     type_frag = type_pos = type_len = 0;
691     encoding_frag = encoding_pos = encoding_len = 0;
692     for (frag = req_dat->head, cur_frag = 0;
693          frag && frag->next;
694          frag = frag->next, cur_frag++) {
695         /* loop through the characters in the fragment */
696         for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) {
697             if (apr_isspace(frag->str[cur_pos])) {
698                 /* process whitespace actions for each state */
699                 if (state == rsl_leading_space) {
700                     /* eat whitespace in this state */
701                     continue;
702                 }
703                 else if (state == rsl_type) {
704                     /* whitespace: type has no slash! */
705                     return DECLINED;
706                 }
707                 else if (state == rsl_subtype) {
708                     /* whitespace: end of MIME type */
709                     state++;
710                     continue;
711                 }
712                 else if (state == rsl_separator) {
713                     /* eat whitespace in this state */
714                     continue;
715                 }
716                 else if (state == rsl_encoding) {
717                     /* whitespace: end of MIME encoding */
718                     /* we're done */
719                     frag = req_dat->tail;
720                     break;
721                 }
722                 else {
723                     /* should not be possible */
724                     /* abandon malfunctioning module */
725                     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01509)
726                                 MODNAME ": bad state %d (ws)", state);
727                     return DECLINED;
728                 }
729                 /* NOTREACHED */
730             }
731             else if (state == rsl_type &&
732                      frag->str[cur_pos] == '/') {
733                 /* copy the char and go to rsl_subtype state */
734                 type_len++;
735                 state++;
736             }
737             else {
738                 /* process non-space actions for each state */
739                 if (state == rsl_leading_space) {
740                     /* non-space: begin MIME type */
741                     state++;
742                     type_frag = cur_frag;
743                     type_pos = cur_pos;
744                     type_len = 1;
745                     continue;
746                 }
747                 else if (state == rsl_type ||
748                          state == rsl_subtype) {
749                     /* non-space: adds to type */
750                     type_len++;
751                     continue;
752                 }
753                 else if (state == rsl_separator) {
754                     /* non-space: begin MIME encoding */
755                     state++;
756                     encoding_frag = cur_frag;
757                     encoding_pos = cur_pos;
758                     encoding_len = 1;
759                     continue;
760                 }
761                 else if (state == rsl_encoding) {
762                     /* non-space: adds to encoding */
763                     encoding_len++;
764                     continue;
765                 }
766                 else {
767                     /* should not be possible */
768                     /* abandon malfunctioning module */
769                     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01510)
770                                 MODNAME ": bad state %d (ns)", state);
771                     return DECLINED;
772                 }
773                 /* NOTREACHED */
774             }
775             /* NOTREACHED */
776         }
777     }
778 
779     /* if we ended prior to state rsl_subtype, we had incomplete info */
780     if (state != rsl_subtype && state != rsl_separator &&
781         state != rsl_encoding) {
782         /* defer to other modules */
783         return DECLINED;
784     }
785 
786     /* save the info in the request record */
787     tmp = rsl_strdup(r, type_frag, type_pos, type_len);
788     /* XXX: this could be done at config time I'm sure... but I'm
789      * confused by all this magic_rsl stuff. -djg */
790     ap_content_type_tolower(tmp);
791     ap_set_content_type(r, tmp);
792 
793     if (state == rsl_encoding) {
794         tmp = rsl_strdup(r, encoding_frag,
795                                          encoding_pos, encoding_len);
796         /* XXX: this could be done at config time I'm sure... but I'm
797          * confused by all this magic_rsl stuff. -djg */
798         ap_str_tolower(tmp);
799         r->content_encoding = tmp;
800     }
801 
802     /* detect memory allocation or other errors */
803     if (!r->content_type ||
804         (state == rsl_encoding && !r->content_encoding)) {
805         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01511)
806                       MODNAME ": unexpected state %d; could be caused by bad "
807                       "data in magic file",
808                       state);
809         return HTTP_INTERNAL_SERVER_ERROR;
810     }
811 
812     /* success! */
813     return OK;
814 }
815 
816 /*
817  * magic_process - process input file r        Apache API request record
818  * (formerly called "process" in file command, prefix added for clarity) Opens
819  * the file and reads a fixed-size buffer to begin processing the contents.
820  */
magic_process(request_rec * r)821 static int magic_process(request_rec *r)
822 {
823     apr_file_t *fd = NULL;
824     unsigned char buf[HOWMANY + 1];  /* one extra for terminating '\0' */
825     apr_size_t nbytes = 0;           /* number of bytes read from a datafile */
826     int result;
827 
828     /*
829      * first try judging the file based on its filesystem status
830      */
831     switch ((result = fsmagic(r, r->filename))) {
832     case DONE:
833         magic_rsl_putchar(r, '\n');
834         return OK;
835     case OK:
836         break;
837     default:
838         /* fatal error, bail out */
839         return result;
840     }
841 
842     if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
843         /* We can't open it, but we were able to stat it. */
844         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01512)
845                     MODNAME ": can't read `%s'", r->filename);
846         /* let some other handler decide what the problem is */
847         return DECLINED;
848     }
849 
850     /*
851      * try looking at the first HOWMANY bytes
852      */
853     nbytes = sizeof(buf) - 1;
854     if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) {
855         ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r, APLOGNO(01513)
856                     MODNAME ": read failed: %s", r->filename);
857         return HTTP_INTERNAL_SERVER_ERROR;
858     }
859 
860     if (nbytes == 0) {
861         return DECLINED;
862     }
863     else {
864         buf[nbytes++] = '\0';  /* null-terminate it */
865         result = tryit(r, buf, nbytes, 1);
866         if (result != OK) {
867             return result;
868         }
869     }
870 
871     (void) apr_file_close(fd);
872     (void) magic_rsl_putchar(r, '\n');
873 
874     return OK;
875 }
876 
877 
tryit(request_rec * r,unsigned char * buf,apr_size_t nb,int checkzmagic)878 static int tryit(request_rec *r, unsigned char *buf, apr_size_t nb,
879                  int checkzmagic)
880 {
881     /*
882      * Try compression stuff
883      */
884     if (checkzmagic == 1) {
885         if (zmagic(r, buf, nb) == 1)
886             return OK;
887     }
888 
889     /*
890      * try tests in /etc/magic (or surrogate magic file)
891      */
892     if (softmagic(r, buf, nb) == 1)
893         return OK;
894 
895     /*
896      * try known keywords, check for ascii-ness too.
897      */
898     if (ascmagic(r, buf, nb) == 1)
899         return OK;
900 
901     /*
902      * abandon hope, all ye who remain here
903      */
904     return DECLINED;
905 }
906 
907 #define    EATAB {while (apr_isspace(*l))  ++l;}
908 
909 /*
910  * apprentice - load configuration from the magic file r
911  *  API request record
912  */
apprentice(server_rec * s,apr_pool_t * p)913 static int apprentice(server_rec *s, apr_pool_t *p)
914 {
915     apr_file_t *f = NULL;
916     apr_status_t result;
917     char line[BUFSIZ + 1];
918     int errs = 0;
919     int lineno;
920 #if MIME_MAGIC_DEBUG
921     int rule = 0;
922     struct magic *m, *prevm;
923 #endif
924     magic_server_config_rec *conf = (magic_server_config_rec *)
925                     ap_get_module_config(s->module_config, &mime_magic_module);
926     const char *fname = ap_server_root_relative(p, conf->magicfile);
927 
928     if (!fname) {
929         ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s, APLOGNO(01514)
930                      MODNAME ": Invalid magic file path %s", conf->magicfile);
931         return -1;
932     }
933     if ((result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED,
934                                 APR_OS_DEFAULT, p)) != APR_SUCCESS) {
935         ap_log_error(APLOG_MARK, APLOG_ERR, result, s, APLOGNO(01515)
936                      MODNAME ": can't read magic file %s", fname);
937         return -1;
938     }
939 
940     /* set up the magic list (empty) */
941     conf->magic = conf->last = NULL;
942 
943     /* parse it */
944     for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) {
945         int ws_offset;
946         char *last = line + strlen(line) - 1; /* guaranteed that len >= 1 since an
947                                                * "empty" line contains a '\n'
948                                                */
949 
950         /* delete newline and any other trailing whitespace */
951         while (last >= line
952                && apr_isspace(*last)) {
953             *last = '\0';
954             --last;
955         }
956 
957         /* skip leading whitespace */
958         ws_offset = 0;
959         while (line[ws_offset] && apr_isspace(line[ws_offset])) {
960             ws_offset++;
961         }
962 
963         /* skip blank lines */
964         if (line[ws_offset] == 0) {
965             continue;
966         }
967 
968         /* comment, do not parse */
969         if (line[ws_offset] == '#')
970             continue;
971 
972 #if MIME_MAGIC_DEBUG
973         /* if we get here, we're going to use it so count it */
974         rule++;
975 #endif
976 
977         /* parse it */
978         if (parse(s, p, line + ws_offset, lineno) != 0)
979             ++errs;
980     }
981 
982     (void) apr_file_close(f);
983 
984 #if MIME_MAGIC_DEBUG
985     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01516)
986                 MODNAME ": apprentice conf=%pp file=%s m=%s m->next=%s last=%s",
987                 conf,
988                 conf->magicfile ? conf->magicfile : "NULL",
989                 conf->magic ? "set" : "NULL",
990                 (conf->magic && conf->magic->next) ? "set" : "NULL",
991                 conf->last ? "set" : "NULL");
992     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01517)
993                 MODNAME ": apprentice read %d lines, %d rules, %d errors",
994                 lineno, rule, errs);
995 #endif
996 
997 #if MIME_MAGIC_DEBUG
998     prevm = 0;
999     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01518)
1000                 MODNAME ": apprentice test");
1001     for (m = conf->magic; m; m = m->next) {
1002         if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
1003             apr_isprint((((unsigned long) m) >> 16) & 255) &&
1004             apr_isprint((((unsigned long) m) >> 8) & 255) &&
1005             apr_isprint(((unsigned long) m) & 255)) {
1006             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01519)
1007                         MODNAME ": apprentice: POINTER CLOBBERED! "
1008                         "m=\"%c%c%c%c\" line=%d",
1009                         (((unsigned long) m) >> 24) & 255,
1010                         (((unsigned long) m) >> 16) & 255,
1011                         (((unsigned long) m) >> 8) & 255,
1012                         ((unsigned long) m) & 255,
1013                         prevm ? prevm->lineno : -1);
1014             break;
1015         }
1016         prevm = m;
1017     }
1018 #endif
1019 
1020     return (errs ? -1 : 0);
1021 }
1022 
1023 /*
1024  * extend the sign bit if the comparison is to be signed
1025  */
signextend(server_rec * s,struct magic * m,unsigned long v)1026 static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v)
1027 {
1028     if (!(m->flag & UNSIGNED))
1029         switch (m->type) {
1030             /*
1031              * Do not remove the casts below.  They are vital. When later
1032              * compared with the data, the sign extension must have happened.
1033              */
1034         case BYTE:
1035             v = (char) v;
1036             break;
1037         case SHORT:
1038         case BESHORT:
1039         case LESHORT:
1040             v = (short) v;
1041             break;
1042         case DATE:
1043         case BEDATE:
1044         case LEDATE:
1045         case LONG:
1046         case BELONG:
1047         case LELONG:
1048             v = (long) v;
1049             break;
1050         case STRING:
1051             break;
1052         default:
1053             ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, APLOGNO(01520)
1054                         MODNAME ": can't happen: m->type=%d", m->type);
1055             return -1;
1056         }
1057     return v;
1058 }
1059 
1060 /*
1061  * parse one line from magic file, put into magic[index++] if valid
1062  */
parse(server_rec * serv,apr_pool_t * p,char * l,int lineno)1063 static int parse(server_rec *serv, apr_pool_t *p, char *l, int lineno)
1064 {
1065     struct magic *m;
1066     char *t, *s;
1067     magic_server_config_rec *conf = (magic_server_config_rec *)
1068                     ap_get_module_config(serv->module_config, &mime_magic_module);
1069 
1070     /* allocate magic structure entry */
1071     m = (struct magic *) apr_pcalloc(p, sizeof(struct magic));
1072 
1073     /* append to linked list */
1074     m->next = NULL;
1075     if (!conf->magic || !conf->last) {
1076         conf->magic = conf->last = m;
1077     }
1078     else {
1079         conf->last->next = m;
1080         conf->last = m;
1081     }
1082 
1083     /* set values in magic structure */
1084     m->flag = 0;
1085     m->cont_level = 0;
1086     m->lineno = lineno;
1087 
1088     while (*l == '>') {
1089         ++l;  /* step over */
1090         m->cont_level++;
1091     }
1092 
1093     if (m->cont_level != 0 && *l == '(') {
1094         ++l;  /* step over */
1095         m->flag |= INDIR;
1096     }
1097 
1098     /* get offset, then skip over it */
1099     m->offset = (int) strtol(l, &t, 0);
1100     if (l == t) {
1101         ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01521)
1102                     MODNAME ": offset %s invalid", l);
1103     }
1104     l = t;
1105 
1106     if (m->flag & INDIR) {
1107         m->in.type = LONG;
1108         m->in.offset = 0;
1109         /*
1110          * read [.lbs][+-]nnnnn)
1111          */
1112         if (*l == '.') {
1113             switch (*++l) {
1114             case 'l':
1115                 m->in.type = LONG;
1116                 break;
1117             case 's':
1118                 m->in.type = SHORT;
1119                 break;
1120             case 'b':
1121                 m->in.type = BYTE;
1122                 break;
1123             default:
1124                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01522)
1125                         MODNAME ": indirect offset type %c invalid", *l);
1126                 break;
1127             }
1128             l++;
1129         }
1130         s = l;
1131         if (*l == '+' || *l == '-')
1132             l++;
1133         if (apr_isdigit((unsigned char) *l)) {
1134             m->in.offset = strtol(l, &t, 0);
1135             if (*s == '-')
1136                 m->in.offset = -m->in.offset;
1137         }
1138         else
1139             t = l;
1140         if (*t++ != ')') {
1141             ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01523)
1142                         MODNAME ": missing ')' in indirect offset");
1143         }
1144         l = t;
1145     }
1146 
1147 
1148     while (apr_isdigit((unsigned char) *l))
1149         ++l;
1150     EATAB;
1151 
1152 #define NBYTE           4
1153 #define NSHORT          5
1154 #define NLONG           4
1155 #define NSTRING         6
1156 #define NDATE           4
1157 #define NBESHORT        7
1158 #define NBELONG         6
1159 #define NBEDATE         6
1160 #define NLESHORT        7
1161 #define NLELONG         6
1162 #define NLEDATE         6
1163 
1164     if (*l == 'u') {
1165         ++l;
1166         m->flag |= UNSIGNED;
1167     }
1168 
1169     /* get type, skip it */
1170     if (strncmp(l, "byte", NBYTE) == 0) {
1171         m->type = BYTE;
1172         l += NBYTE;
1173     }
1174     else if (strncmp(l, "short", NSHORT) == 0) {
1175         m->type = SHORT;
1176         l += NSHORT;
1177     }
1178     else if (strncmp(l, "long", NLONG) == 0) {
1179         m->type = LONG;
1180         l += NLONG;
1181     }
1182     else if (strncmp(l, "string", NSTRING) == 0) {
1183         m->type = STRING;
1184         l += NSTRING;
1185     }
1186     else if (strncmp(l, "date", NDATE) == 0) {
1187         m->type = DATE;
1188         l += NDATE;
1189     }
1190     else if (strncmp(l, "beshort", NBESHORT) == 0) {
1191         m->type = BESHORT;
1192         l += NBESHORT;
1193     }
1194     else if (strncmp(l, "belong", NBELONG) == 0) {
1195         m->type = BELONG;
1196         l += NBELONG;
1197     }
1198     else if (strncmp(l, "bedate", NBEDATE) == 0) {
1199         m->type = BEDATE;
1200         l += NBEDATE;
1201     }
1202     else if (strncmp(l, "leshort", NLESHORT) == 0) {
1203         m->type = LESHORT;
1204         l += NLESHORT;
1205     }
1206     else if (strncmp(l, "lelong", NLELONG) == 0) {
1207         m->type = LELONG;
1208         l += NLELONG;
1209     }
1210     else if (strncmp(l, "ledate", NLEDATE) == 0) {
1211         m->type = LEDATE;
1212         l += NLEDATE;
1213     }
1214     else {
1215         ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01524)
1216                     MODNAME ": type %s invalid", l);
1217         return -1;
1218     }
1219     /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1220     if (*l == '&') {
1221         ++l;
1222         m->mask = signextend(serv, m, strtol(l, &l, 0));
1223     }
1224     else
1225         m->mask = ~0L;
1226     EATAB;
1227 
1228     switch (*l) {
1229     case '>':
1230     case '<':
1231         /* Old-style anding: "0 byte &0x80 dynamically linked" */
1232     case '&':
1233     case '^':
1234     case '=':
1235         m->reln = *l;
1236         ++l;
1237         break;
1238     case '!':
1239         if (m->type != STRING) {
1240             m->reln = *l;
1241             ++l;
1242             break;
1243         }
1244         /* FALL THROUGH */
1245     default:
1246         if (*l == 'x' && apr_isspace(l[1])) {
1247             m->reln = *l;
1248             ++l;
1249             goto GetDesc;  /* Bill The Cat */
1250         }
1251         m->reln = '=';
1252         break;
1253     }
1254     EATAB;
1255 
1256     if (getvalue(serv, m, &l))
1257         return -1;
1258     /*
1259      * now get last part - the description
1260      */
1261   GetDesc:
1262     EATAB;
1263     if (l[0] == '\b') {
1264         ++l;
1265         m->nospflag = 1;
1266     }
1267     else if ((l[0] == '\\') && (l[1] == 'b')) {
1268         ++l;
1269         ++l;
1270         m->nospflag = 1;
1271     }
1272     else
1273         m->nospflag = 0;
1274     apr_cpystrn(m->desc, l, sizeof(m->desc));
1275 
1276 #if MIME_MAGIC_DEBUG
1277     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, serv, APLOGNO(01525)
1278                 MODNAME ": parse line=%d m=%pp next=%pp cont=%d desc=%s",
1279                 lineno, m, m->next, m->cont_level, m->desc);
1280 #endif /* MIME_MAGIC_DEBUG */
1281 
1282     return 0;
1283 }
1284 
1285 /*
1286  * Read a numeric value from a pointer, into the value union of a magic
1287  * pointer, according to the magic type.  Update the string pointer to point
1288  * just after the number read.  Return 0 for success, non-zero for failure.
1289  */
getvalue(server_rec * s,struct magic * m,char ** p)1290 static int getvalue(server_rec *s, struct magic *m, char **p)
1291 {
1292     int slen;
1293 
1294     if (m->type == STRING) {
1295         *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen);
1296         m->vallen = slen;
1297     }
1298     else if (m->reln != 'x')
1299         m->value.l = signextend(s, m, strtol(*p, p, 0));
1300     return 0;
1301 }
1302 
1303 /*
1304  * Convert a string containing C character escapes.  Stop at an unescaped
1305  * space or tab. Copy the converted version to "p", returning its length in
1306  * *slen. Return updated scan pointer as function result.
1307  */
getstr(server_rec * serv,register char * s,register char * p,int plen,int * slen)1308 static char *getstr(server_rec *serv, register char *s, register char *p,
1309                     int plen, int *slen)
1310 {
1311     char *origs = s, *origp = p;
1312     char *pmax = p + plen - 1;
1313     register int c;
1314     register int val;
1315 
1316     while ((c = *s++) != '\0') {
1317         if (apr_isspace(c))
1318             break;
1319         if (p >= pmax) {
1320             ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01526)
1321                         MODNAME ": string too long: %s", origs);
1322             break;
1323         }
1324         if (c == '\\') {
1325             switch (c = *s++) {
1326 
1327             case '\0':
1328                 goto out;
1329 
1330             default:
1331                 *p++ = (char) c;
1332                 break;
1333 
1334             case 'n':
1335                 *p++ = '\n';
1336                 break;
1337 
1338             case 'r':
1339                 *p++ = '\r';
1340                 break;
1341 
1342             case 'b':
1343                 *p++ = '\b';
1344                 break;
1345 
1346             case 't':
1347                 *p++ = '\t';
1348                 break;
1349 
1350             case 'f':
1351                 *p++ = '\f';
1352                 break;
1353 
1354             case 'v':
1355                 *p++ = '\v';
1356                 break;
1357 
1358                 /* \ and up to 3 octal digits */
1359             case '0':
1360             case '1':
1361             case '2':
1362             case '3':
1363             case '4':
1364             case '5':
1365             case '6':
1366             case '7':
1367                 val = c - '0';
1368                 c = *s++;  /* try for 2 */
1369                 if (c >= '0' && c <= '7') {
1370                     val = (val << 3) | (c - '0');
1371                     c = *s++;  /* try for 3 */
1372                     if (c >= '0' && c <= '7')
1373                         val = (val << 3) | (c - '0');
1374                     else
1375                         --s;
1376                 }
1377                 else
1378                     --s;
1379                 *p++ = (char) val;
1380                 break;
1381 
1382                 /* \x and up to 3 hex digits */
1383             case 'x':
1384                 val = 'x';            /* Default if no digits */
1385                 c = hextoint(*s++);   /* Get next char */
1386                 if (c >= 0) {
1387                     val = c;
1388                     c = hextoint(*s++);
1389                     if (c >= 0) {
1390                         val = (val << 4) + c;
1391                         c = hextoint(*s++);
1392                         if (c >= 0) {
1393                             val = (val << 4) + c;
1394                         }
1395                         else
1396                             --s;
1397                     }
1398                     else
1399                         --s;
1400                 }
1401                 else
1402                     --s;
1403                 *p++ = (char) val;
1404                 break;
1405             }
1406         }
1407         else
1408             *p++ = (char) c;
1409     }
1410   out:
1411     *p = '\0';
1412     *slen = p - origp;
1413     return s;
1414 }
1415 
1416 
1417 /* Single hex char to int; -1 if not a hex char. */
hextoint(int c)1418 static int hextoint(int c)
1419 {
1420     if (apr_isdigit(c))
1421         return c - '0';
1422     if ((c >= 'a') && (c <= 'f'))
1423         return c + 10 - 'a';
1424     if ((c >= 'A') && (c <= 'F'))
1425         return c + 10 - 'A';
1426     return -1;
1427 }
1428 
1429 
1430 /*
1431  * return DONE to indicate it's been handled
1432  * return OK to indicate it's a regular file still needing handling
1433  * other returns indicate a failure of some sort
1434  */
fsmagic(request_rec * r,const char * fn)1435 static int fsmagic(request_rec *r, const char *fn)
1436 {
1437     switch (r->finfo.filetype) {
1438     case APR_DIR:
1439         magic_rsl_puts(r, DIR_MAGIC_TYPE);
1440         return DONE;
1441     case APR_CHR:
1442         /*
1443          * (void) magic_rsl_printf(r,"character special (%d/%d)",
1444          * major(sb->st_rdev), minor(sb->st_rdev));
1445          */
1446         (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1447         return DONE;
1448     case APR_BLK:
1449         /*
1450          * (void) magic_rsl_printf(r,"block special (%d/%d)",
1451          * major(sb->st_rdev), minor(sb->st_rdev));
1452          */
1453         (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1454         return DONE;
1455         /* TODO add code to handle V7 MUX and Blit MUX files */
1456     case APR_PIPE:
1457         /*
1458          * magic_rsl_puts(r,"fifo (named pipe)");
1459          */
1460         (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1461         return DONE;
1462     case APR_LNK:
1463         /* We used stat(), the only possible reason for this is that the
1464          * symlink is broken.
1465          */
1466         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01527)
1467                     MODNAME ": broken symlink (%s)", fn);
1468         return HTTP_INTERNAL_SERVER_ERROR;
1469     case APR_SOCK:
1470         magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1471         return DONE;
1472     case APR_REG:
1473         break;
1474     default:
1475         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01528)
1476                       MODNAME ": invalid file type %d.", r->finfo.filetype);
1477         return HTTP_INTERNAL_SERVER_ERROR;
1478     }
1479 
1480     /*
1481      * regular file, check next possibility
1482      */
1483     if (r->finfo.size == 0) {
1484         magic_rsl_puts(r, MIME_TEXT_UNKNOWN);
1485         return DONE;
1486     }
1487     return OK;
1488 }
1489 
1490 /*
1491  * softmagic - lookup one file in database (already read from /etc/magic by
1492  * apprentice.c). Passed the name and FILE * of one file to be typed.
1493  */
1494                 /* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */
softmagic(request_rec * r,unsigned char * buf,apr_size_t nbytes)1495 static int softmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
1496 {
1497     if (match(r, buf, nbytes))
1498         return 1;
1499 
1500     return 0;
1501 }
1502 
1503 /*
1504  * Go through the whole list, stopping if you find a match.  Process all the
1505  * continuations of that match before returning.
1506  *
1507  * We support multi-level continuations:
1508  *
1509  * At any time when processing a successful top-level match, there is a current
1510  * continuation level; it represents the level of the last successfully
1511  * matched continuation.
1512  *
1513  * Continuations above that level are skipped as, if we see one, it means that
1514  * the continuation that controls them - i.e, the lower-level continuation
1515  * preceding them - failed to match.
1516  *
1517  * Continuations below that level are processed as, if we see one, it means
1518  * we've finished processing or skipping higher-level continuations under the
1519  * control of a successful or unsuccessful lower-level continuation, and are
1520  * now seeing the next lower-level continuation and should process it.  The
1521  * current continuation level reverts to the level of the one we're seeing.
1522  *
1523  * Continuations at the current level are processed as, if we see one, there's
1524  * no lower-level continuation that may have failed.
1525  *
1526  * If a continuation matches, we bump the current continuation level so that
1527  * higher-level continuations are processed.
1528  */
match(request_rec * r,unsigned char * s,apr_size_t nbytes)1529 static int match(request_rec *r, unsigned char *s, apr_size_t nbytes)
1530 {
1531 #if MIME_MAGIC_DEBUG
1532     int rule_counter = 0;
1533 #endif
1534     int cont_level = 0;
1535     int need_separator = 0;
1536     union VALUETYPE p;
1537     magic_server_config_rec *conf = (magic_server_config_rec *)
1538                 ap_get_module_config(r->server->module_config, &mime_magic_module);
1539     struct magic *m;
1540 
1541 #if MIME_MAGIC_DEBUG
1542     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01529)
1543                 MODNAME ": match conf=%pp file=%s m=%s m->next=%s last=%s",
1544                 conf,
1545                 conf->magicfile ? conf->magicfile : "NULL",
1546                 conf->magic ? "set" : "NULL",
1547                 (conf->magic && conf->magic->next) ? "set" : "NULL",
1548                 conf->last ? "set" : "NULL");
1549 #endif
1550 
1551 #if MIME_MAGIC_DEBUG
1552     for (m = conf->magic; m; m = m->next) {
1553         if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
1554             apr_isprint((((unsigned long) m) >> 16) & 255) &&
1555             apr_isprint((((unsigned long) m) >> 8) & 255) &&
1556             apr_isprint(((unsigned long) m) & 255)) {
1557             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01530)
1558                         MODNAME ": match: POINTER CLOBBERED! "
1559                         "m=\"%c%c%c%c\"",
1560                         (((unsigned long) m) >> 24) & 255,
1561                         (((unsigned long) m) >> 16) & 255,
1562                         (((unsigned long) m) >> 8) & 255,
1563                         ((unsigned long) m) & 255);
1564             break;
1565         }
1566     }
1567 #endif
1568 
1569     for (m = conf->magic; m; m = m->next) {
1570 #if MIME_MAGIC_DEBUG
1571         rule_counter++;
1572         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01531)
1573                     MODNAME ": line=%d desc=%s", m->lineno, m->desc);
1574 #endif
1575 
1576         /* check if main entry matches */
1577         if (!mget(r, &p, s, m, nbytes) ||
1578             !mcheck(r, &p, m)) {
1579             struct magic *m_cont;
1580 
1581             /*
1582              * main entry didn't match, flush its continuations
1583              */
1584             if (!m->next || (m->next->cont_level == 0)) {
1585                 continue;
1586             }
1587 
1588             m_cont = m->next;
1589             while (m_cont && (m_cont->cont_level != 0)) {
1590 #if MIME_MAGIC_DEBUG
1591                 rule_counter++;
1592                 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01532)
1593                         MODNAME ": line=%d mc=%pp mc->next=%pp cont=%d desc=%s",
1594                             m_cont->lineno, m_cont,
1595                             m_cont->next, m_cont->cont_level,
1596                             m_cont->desc);
1597 #endif
1598                 /*
1599                  * this trick allows us to keep *m in sync when the continue
1600                  * advances the pointer
1601                  */
1602                 m = m_cont;
1603                 m_cont = m_cont->next;
1604             }
1605             continue;
1606         }
1607 
1608         /* if we get here, the main entry rule was a match */
1609         /* this will be the last run through the loop */
1610 #if MIME_MAGIC_DEBUG
1611         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01533)
1612                     MODNAME ": rule matched, line=%d type=%d %s",
1613                     m->lineno, m->type,
1614                     (m->type == STRING) ? m->value.s : "");
1615 #endif
1616 
1617         /* print the match */
1618         mprint(r, &p, m);
1619 
1620         /*
1621          * If we printed something, we'll need to print a blank before we
1622          * print something else.
1623          */
1624         if (m->desc[0])
1625             need_separator = 1;
1626         /* and any continuations that match */
1627         cont_level++;
1628         /*
1629          * while (m && m->next && m->next->cont_level != 0 && ( m = m->next
1630          * ))
1631          */
1632         m = m->next;
1633         while (m && (m->cont_level != 0)) {
1634 #if MIME_MAGIC_DEBUG
1635             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01534)
1636                         MODNAME ": match line=%d cont=%d type=%d %s",
1637                         m->lineno, m->cont_level, m->type,
1638                         (m->type == STRING) ? m->value.s : "");
1639 #endif
1640             if (cont_level >= m->cont_level) {
1641                 if (cont_level > m->cont_level) {
1642                     /*
1643                      * We're at the end of the level "cont_level"
1644                      * continuations.
1645                      */
1646                     cont_level = m->cont_level;
1647                 }
1648                 if (mget(r, &p, s, m, nbytes) &&
1649                     mcheck(r, &p, m)) {
1650                     /*
1651                      * This continuation matched. Print its message, with a
1652                      * blank before it if the previous item printed and this
1653                      * item isn't empty.
1654                      */
1655                     /* space if previous printed */
1656                     if (need_separator
1657                         && (m->nospflag == 0)
1658                         && (m->desc[0] != '\0')
1659                         ) {
1660                         (void) magic_rsl_putchar(r, ' ');
1661                         need_separator = 0;
1662                     }
1663                     mprint(r, &p, m);
1664                     if (m->desc[0])
1665                         need_separator = 1;
1666 
1667                     /*
1668                      * If we see any continuations at a higher level, process
1669                      * them.
1670                      */
1671                     cont_level++;
1672                 }
1673             }
1674 
1675             /* move to next continuation record */
1676             m = m->next;
1677         }
1678 #if MIME_MAGIC_DEBUG
1679         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01535)
1680                     MODNAME ": matched after %d rules", rule_counter);
1681 #endif
1682         return 1;  /* all through */
1683     }
1684 #if MIME_MAGIC_DEBUG
1685     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01536)
1686                 MODNAME ": failed after %d rules", rule_counter);
1687 #endif
1688     return 0;  /* no match at all */
1689 }
1690 
mprint(request_rec * r,union VALUETYPE * p,struct magic * m)1691 static void mprint(request_rec *r, union VALUETYPE *p, struct magic *m)
1692 {
1693     char *pp;
1694     unsigned long v;
1695     char time_str[APR_CTIME_LEN];
1696 
1697     switch (m->type) {
1698     case BYTE:
1699         v = p->b;
1700         break;
1701 
1702     case SHORT:
1703     case BESHORT:
1704     case LESHORT:
1705         v = p->h;
1706         break;
1707 
1708     case LONG:
1709     case BELONG:
1710     case LELONG:
1711         v = p->l;
1712         break;
1713 
1714     case STRING:
1715         if (m->reln == '=') {
1716             (void) magic_rsl_printf(r, m->desc, m->value.s);
1717         }
1718         else {
1719             (void) magic_rsl_printf(r, m->desc, p->s);
1720         }
1721         return;
1722 
1723     case DATE:
1724     case BEDATE:
1725     case LEDATE:
1726         apr_ctime(time_str, apr_time_from_sec(*(time_t *)&p->l));
1727         pp = time_str;
1728         (void) magic_rsl_printf(r, m->desc, pp);
1729         return;
1730     default:
1731         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01537)
1732                     MODNAME ": invalid m->type (%d) in mprint().",
1733                     m->type);
1734         return;
1735     }
1736 
1737     v = signextend(r->server, m, v) & m->mask;
1738     (void) magic_rsl_printf(r, m->desc, (unsigned long) v);
1739 }
1740 
1741 /*
1742  * Convert the byte order of the data we are looking at
1743  */
mconvert(request_rec * r,union VALUETYPE * p,struct magic * m)1744 static int mconvert(request_rec *r, union VALUETYPE *p, struct magic *m)
1745 {
1746     char *rt;
1747 
1748     switch (m->type) {
1749     case BYTE:
1750     case SHORT:
1751     case LONG:
1752     case DATE:
1753         return 1;
1754     case STRING:
1755         /* Null terminate and eat the return */
1756         p->s[sizeof(p->s) - 1] = '\0';
1757         if ((rt = strchr(p->s, '\n')) != NULL)
1758             *rt = '\0';
1759         return 1;
1760     case BESHORT:
1761         p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
1762         return 1;
1763     case BELONG:
1764     case BEDATE:
1765         p->l = (long)
1766             ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
1767         return 1;
1768     case LESHORT:
1769         p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
1770         return 1;
1771     case LELONG:
1772     case LEDATE:
1773         p->l = (long)
1774             ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
1775         return 1;
1776     default:
1777         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01538)
1778                     MODNAME ": invalid type %d in mconvert().", m->type);
1779         return 0;
1780     }
1781 }
1782 
1783 
mget(request_rec * r,union VALUETYPE * p,unsigned char * s,struct magic * m,apr_size_t nbytes)1784 static int mget(request_rec *r, union VALUETYPE *p, unsigned char *s,
1785                 struct magic *m, apr_size_t nbytes)
1786 {
1787     long offset = m->offset;
1788 
1789     if (offset + sizeof(union VALUETYPE) > nbytes)
1790                   return 0;
1791 
1792     memcpy(p, s + offset, sizeof(union VALUETYPE));
1793 
1794     if (!mconvert(r, p, m))
1795         return 0;
1796 
1797     if (m->flag & INDIR) {
1798 
1799         switch (m->in.type) {
1800         case BYTE:
1801             offset = p->b + m->in.offset;
1802             break;
1803         case SHORT:
1804             offset = p->h + m->in.offset;
1805             break;
1806         case LONG:
1807             offset = p->l + m->in.offset;
1808             break;
1809         }
1810 
1811         if (offset + sizeof(union VALUETYPE) > nbytes)
1812                       return 0;
1813 
1814         memcpy(p, s + offset, sizeof(union VALUETYPE));
1815 
1816         if (!mconvert(r, p, m))
1817             return 0;
1818     }
1819     return 1;
1820 }
1821 
mcheck(request_rec * r,union VALUETYPE * p,struct magic * m)1822 static int mcheck(request_rec *r, union VALUETYPE *p, struct magic *m)
1823 {
1824     register unsigned long l = m->value.l;
1825     register unsigned long v;
1826     int matched;
1827 
1828     if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
1829         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01539)
1830                     MODNAME ": BOINK");
1831         return 1;
1832     }
1833 
1834     switch (m->type) {
1835     case BYTE:
1836         v = p->b;
1837         break;
1838 
1839     case SHORT:
1840     case BESHORT:
1841     case LESHORT:
1842         v = p->h;
1843         break;
1844 
1845     case LONG:
1846     case BELONG:
1847     case LELONG:
1848     case DATE:
1849     case BEDATE:
1850     case LEDATE:
1851         v = p->l;
1852         break;
1853 
1854     case STRING:
1855         l = 0;
1856         /*
1857          * What we want here is: v = strncmp(m->value.s, p->s, m->vallen);
1858          * but ignoring any nulls.  bcmp doesn't give -/+/0 and isn't
1859          * universally available anyway.
1860          */
1861         v = 0;
1862         {
1863             register unsigned char *a = (unsigned char *) m->value.s;
1864             register unsigned char *b = (unsigned char *) p->s;
1865             register int len = m->vallen;
1866 
1867             while (--len >= 0)
1868                 if ((v = *b++ - *a++) != 0)
1869                     break;
1870         }
1871         break;
1872     default:
1873         /*  bogosity, pretend that it just wasn't a match */
1874         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01540)
1875                     MODNAME ": invalid type %d in mcheck().", m->type);
1876         return 0;
1877     }
1878 
1879     v = signextend(r->server, m, v) & m->mask;
1880 
1881     switch (m->reln) {
1882     case 'x':
1883 #if MIME_MAGIC_DEBUG
1884         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01541)
1885                     "%lu == *any* = 1", v);
1886 #endif
1887         matched = 1;
1888         break;
1889 
1890     case '!':
1891         matched = v != l;
1892 #if MIME_MAGIC_DEBUG
1893         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01542)
1894                     "%lu != %lu = %d", v, l, matched);
1895 #endif
1896         break;
1897 
1898     case '=':
1899         matched = v == l;
1900 #if MIME_MAGIC_DEBUG
1901         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01543)
1902                     "%lu == %lu = %d", v, l, matched);
1903 #endif
1904         break;
1905 
1906     case '>':
1907         if (m->flag & UNSIGNED) {
1908             matched = v > l;
1909 #if MIME_MAGIC_DEBUG
1910             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01544)
1911                         "%lu > %lu = %d", v, l, matched);
1912 #endif
1913         }
1914         else {
1915             matched = (long) v > (long) l;
1916 #if MIME_MAGIC_DEBUG
1917             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01545)
1918                         "%ld > %ld = %d", v, l, matched);
1919 #endif
1920         }
1921         break;
1922 
1923     case '<':
1924         if (m->flag & UNSIGNED) {
1925             matched = v < l;
1926 #if MIME_MAGIC_DEBUG
1927             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01546)
1928                         "%lu < %lu = %d", v, l, matched);
1929 #endif
1930         }
1931         else {
1932             matched = (long) v < (long) l;
1933 #if MIME_MAGIC_DEBUG
1934             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01547)
1935                         "%ld < %ld = %d", v, l, matched);
1936 #endif
1937         }
1938         break;
1939 
1940     case '&':
1941         matched = (v & l) == l;
1942 #if MIME_MAGIC_DEBUG
1943         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01548)
1944                     "((%lx & %lx) == %lx) = %d", v, l, l, matched);
1945 #endif
1946         break;
1947 
1948     case '^':
1949         matched = (v & l) != l;
1950 #if MIME_MAGIC_DEBUG
1951         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01549)
1952                     "((%lx & %lx) != %lx) = %d", v, l, l, matched);
1953 #endif
1954         break;
1955 
1956     default:
1957         /* bogosity, pretend it didn't match */
1958         matched = 0;
1959         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01550)
1960                     MODNAME ": mcheck: can't happen: invalid relation %d.",
1961                     m->reln);
1962         break;
1963     }
1964 
1965     return matched;
1966 }
1967 
1968 /* an optimization over plain strcmp() */
1969 #define    STREQ(a, b)    (*(a) == *(b) && strcmp((a), (b)) == 0)
1970 
ascmagic(request_rec * r,unsigned char * buf,apr_size_t nbytes)1971 static int ascmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
1972 {
1973     int has_escapes = 0;
1974     unsigned char *s;
1975     char nbuf[SMALL_HOWMANY + 1];  /* one extra for terminating '\0' */
1976     char *token;
1977     const struct names *p;
1978     int small_nbytes;
1979     char *strtok_state;
1980 
1981     /* these are easy, do them first */
1982 
1983     /*
1984      * for troff, look for . + letter + letter or .\"; this must be done to
1985      * disambiguate tar archives' ./file and other trash from real troff
1986      * input.
1987      */
1988     if (*buf == '.') {
1989         unsigned char *tp = buf + 1;
1990 
1991         while (apr_isspace(*tp))
1992             ++tp;  /* skip leading whitespace */
1993         if ((apr_isalnum(*tp) || *tp == '\\') &&
1994              (apr_isalnum(*(tp + 1)) || *tp == '"')) {
1995             magic_rsl_puts(r, "application/x-troff");
1996             return 1;
1997         }
1998     }
1999     if ((*buf == 'c' || *buf == 'C') && apr_isspace(*(buf + 1))) {
2000         /* Fortran */
2001         magic_rsl_puts(r, "text/plain");
2002         return 1;
2003     }
2004 
2005     /* look for tokens from names.h - this is expensive!, so we'll limit
2006      * ourselves to only SMALL_HOWMANY bytes */
2007     small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes;
2008     /* make a copy of the buffer here because apr_strtok() will destroy it */
2009     s = (unsigned char *) memcpy(nbuf, buf, small_nbytes);
2010     s[small_nbytes] = '\0';
2011     has_escapes = (memchr(s, '\033', small_nbytes) != NULL);
2012     while ((token = apr_strtok((char *) s, " \t\n\r\f", &strtok_state)) != NULL) {
2013         s = NULL;  /* make apr_strtok() keep on tokin' */
2014         for (p = names; p < names + NNAMES; p++) {
2015             if (STREQ(p->name, token)) {
2016                 magic_rsl_puts(r, types[p->type]);
2017                 if (has_escapes)
2018                     magic_rsl_puts(r, " (with escape sequences)");
2019                 return 1;
2020             }
2021         }
2022     }
2023 
2024     switch (is_tar(buf, nbytes)) {
2025     case 1:
2026         /* V7 tar archive */
2027         magic_rsl_puts(r, "application/x-tar");
2028         return 1;
2029     case 2:
2030         /* POSIX tar archive */
2031         magic_rsl_puts(r, "application/x-tar");
2032         return 1;
2033     }
2034 
2035     /* all else fails, but it is ascii... */
2036     return 0;
2037 }
2038 
2039 
2040 /*
2041  * compress routines: zmagic() - returns 0 if not recognized, uncompresses
2042  * and prints information if recognized uncompress(s, method, old, n, newch)
2043  * - uncompress old into new, using method, return sizeof new
2044  */
2045 
2046 static const struct {
2047     const char *magic;
2048     apr_size_t maglen;
2049     const char *argv[3];
2050     int silent;
2051     const char *encoding;  /* MUST be lowercase */
2052 } compr[] = {
2053 
2054     /* we use gzip here rather than uncompress because we have to pass
2055      * it a full filename -- and uncompress only considers filenames
2056      * ending with .Z
2057      */
2058     {
2059         "\037\235", 2, {
2060             "gzip", "-dcq", NULL
2061         }, 0, "x-compress"
2062     },
2063     {
2064         "\037\213", 2, {
2065             "gzip", "-dcq", NULL
2066         }, 1, "x-gzip"
2067     },
2068     /*
2069      * XXX pcat does not work, cause I don't know how to make it read stdin,
2070      * so we use gzip
2071      */
2072     {
2073         "\037\036", 2, {
2074             "gzip", "-dcq", NULL
2075         }, 0, "x-gzip"
2076     },
2077 };
2078 
2079 #define ncompr (sizeof(compr) / sizeof(compr[0]))
2080 
zmagic(request_rec * r,unsigned char * buf,apr_size_t nbytes)2081 static int zmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
2082 {
2083     unsigned char *newbuf;
2084     int newsize;
2085     int i;
2086 
2087     for (i = 0; i < ncompr; i++) {
2088         if (nbytes < compr[i].maglen)
2089             continue;
2090         if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0)
2091             break;
2092     }
2093 
2094     if (i == ncompr)
2095         return 0;
2096 
2097     if ((newsize = uncompress(r, i, &newbuf, HOWMANY)) > 0) {
2098         /* set encoding type in the request record */
2099         r->content_encoding = compr[i].encoding;
2100 
2101         newbuf[newsize-1] = '\0';  /* null-terminate uncompressed data */
2102         /* Try to detect the content type of the uncompressed data */
2103         if (tryit(r, newbuf, newsize, 0) != OK) {
2104             return 0;
2105         }
2106     }
2107     return 1;
2108 }
2109 
2110 
2111 struct uncompress_parms {
2112     request_rec *r;
2113     int method;
2114 };
2115 
create_uncompress_child(struct uncompress_parms * parm,apr_pool_t * cntxt,apr_file_t ** pipe_in)2116 static int create_uncompress_child(struct uncompress_parms *parm, apr_pool_t *cntxt,
2117                                    apr_file_t **pipe_in)
2118 {
2119     int rc = 1;
2120     const char *new_argv[4];
2121     request_rec *r = parm->r;
2122     apr_pool_t *child_context = cntxt;
2123     apr_procattr_t *procattr;
2124     apr_proc_t *procnew;
2125 
2126     /* XXX missing 1.3 logic:
2127      *
2128      * what happens when !compr[parm->method].silent?
2129      * Should we create the err pipe, read it, and copy to the log?
2130      */
2131 
2132     if ((apr_procattr_create(&procattr, child_context) != APR_SUCCESS) ||
2133         (apr_procattr_io_set(procattr, APR_FULL_BLOCK,
2134                            APR_FULL_BLOCK, APR_NO_PIPE)   != APR_SUCCESS) ||
2135         (apr_procattr_dir_set(procattr,
2136                               ap_make_dirstr_parent(r->pool, r->filename)) != APR_SUCCESS) ||
2137         (apr_procattr_cmdtype_set(procattr, APR_PROGRAM_PATH) != APR_SUCCESS)) {
2138         /* Something bad happened, tell the world. */
2139         ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01551)
2140                "couldn't setup child process: %s", r->filename);
2141     }
2142     else {
2143         new_argv[0] = compr[parm->method].argv[0];
2144         new_argv[1] = compr[parm->method].argv[1];
2145         new_argv[2] = r->filename;
2146         new_argv[3] = NULL;
2147 
2148         procnew = apr_pcalloc(child_context, sizeof(*procnew));
2149         rc = apr_proc_create(procnew, compr[parm->method].argv[0],
2150                                new_argv, NULL, procattr, child_context);
2151 
2152         if (rc != APR_SUCCESS) {
2153             /* Bad things happened. Everyone should have cleaned up. */
2154             ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01552)
2155                           MODNAME ": could not execute `%s'.",
2156                           compr[parm->method].argv[0]);
2157         }
2158         else {
2159             apr_pool_note_subprocess(child_context, procnew, APR_KILL_AFTER_TIMEOUT);
2160             *pipe_in = procnew->out;
2161         }
2162     }
2163 
2164     return (rc);
2165 }
2166 
uncompress(request_rec * r,int method,unsigned char ** newch,apr_size_t n)2167 static int uncompress(request_rec *r, int method,
2168                       unsigned char **newch, apr_size_t n)
2169 {
2170     struct uncompress_parms parm;
2171     apr_file_t *pipe_out = NULL;
2172     apr_pool_t *sub_context;
2173     apr_status_t rv;
2174 
2175     parm.r = r;
2176     parm.method = method;
2177 
2178     /* We make a sub_pool so that we can collect our child early, otherwise
2179      * there are cases (i.e. generating directory indices with mod_autoindex)
2180      * where we would end up with LOTS of zombies.
2181      */
2182     if (apr_pool_create(&sub_context, r->pool) != APR_SUCCESS)
2183         return -1;
2184     apr_pool_tag(sub_context, "magic_uncompress");
2185 
2186     if ((rv = create_uncompress_child(&parm, sub_context, &pipe_out)) != APR_SUCCESS) {
2187         ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01553)
2188                     MODNAME ": couldn't spawn uncompress process: %s", r->uri);
2189         return -1;
2190     }
2191 
2192     *newch = (unsigned char *) apr_palloc(r->pool, n);
2193     rv = apr_file_read(pipe_out, *newch, &n);
2194     if (n == 0) {
2195         apr_pool_destroy(sub_context);
2196         ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01554)
2197             MODNAME ": read failed from uncompress of %s", r->filename);
2198         return -1;
2199     }
2200     apr_pool_destroy(sub_context);
2201     return n;
2202 }
2203 
2204 /*
2205  * is_tar() -- figure out whether file is a tar archive.
2206  *
2207  * Stolen (by author of file utility) from the public domain tar program: Public
2208  * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
2209  *
2210  * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
2211  * 1997/06/24 00:41:02 ikluft Exp ikluft $
2212  *
2213  * Comments changed and some code/comments reformatted for file command by Ian
2214  * Darwin.
2215  */
2216 
2217 #define isodigit(c) (((unsigned char)(c) >= '0') && ((unsigned char)(c) <= '7'))
2218 
2219 /*
2220  * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
2221  * old UNIX tar file, 2 for Unix Std (POSIX) tar file.
2222  */
2223 
is_tar(unsigned char * buf,apr_size_t nbytes)2224 static int is_tar(unsigned char *buf, apr_size_t nbytes)
2225 {
2226     register union record *header = (union record *) buf;
2227     register int i;
2228     register long sum, recsum;
2229     register char *p;
2230 
2231     if (nbytes < sizeof(union record))
2232                return 0;
2233 
2234     recsum = from_oct(8, header->header.chksum);
2235 
2236     sum = 0;
2237     p = header->charptr;
2238     for (i = sizeof(union record); --i >= 0;) {
2239         /*
2240          * We can't use unsigned char here because of old compilers, e.g. V7.
2241          */
2242         sum += 0xFF & *p++;
2243     }
2244 
2245     /* Adjust checksum to count the "chksum" field as blanks. */
2246     for (i = sizeof(header->header.chksum); --i >= 0;)
2247         sum -= 0xFF & header->header.chksum[i];
2248     sum += ' ' * sizeof header->header.chksum;
2249 
2250     if (sum != recsum)
2251         return 0;   /* Not a tar archive */
2252 
2253     if (0 == strcmp(header->header.magic, TMAGIC))
2254         return 2;   /* Unix Standard tar archive */
2255 
2256     return 1;       /* Old fashioned tar archive */
2257 }
2258 
2259 
2260 /*
2261  * Quick and dirty octal conversion.
2262  *
2263  * Result is -1 if the field is invalid (all blank, or nonoctal).
2264  */
from_oct(int digs,char * where)2265 static long from_oct(int digs, char *where)
2266 {
2267     register long value;
2268 
2269     while (apr_isspace(*where)) {  /* Skip spaces */
2270         where++;
2271         if (--digs <= 0)
2272             return -1;  /* All blank field */
2273     }
2274     value = 0;
2275     while (digs > 0 && isodigit(*where)) {  /* Scan til nonoctal */
2276         value = (value << 3) | (*where++ - '0');
2277         --digs;
2278     }
2279 
2280     if (digs > 0 && *where && !apr_isspace(*where))
2281         return -1;  /* Ended on non-space/nul */
2282 
2283     return value;
2284 }
2285 
2286 /*
2287  * Check for file-revision suffix
2288  *
2289  * This is for an obscure document control system used on an intranet.
2290  * The web representation of each file's revision has an @1, @2, etc
2291  * appended with the revision number.  This needs to be stripped off to
2292  * find the file suffix, which can be recognized by sending the name back
2293  * through a sub-request.  The base file name (without the @num suffix)
2294  * must exist because its type will be used as the result.
2295  */
revision_suffix(request_rec * r)2296 static int revision_suffix(request_rec *r)
2297 {
2298     int suffix_pos, result;
2299     char *sub_filename;
2300     request_rec *sub;
2301 
2302 #if MIME_MAGIC_DEBUG
2303     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01555)
2304                 MODNAME ": revision_suffix checking %s", r->filename);
2305 #endif /* MIME_MAGIC_DEBUG */
2306 
2307     /* check for recognized revision suffix */
2308     suffix_pos = strlen(r->filename) - 1;
2309     if (!apr_isdigit(r->filename[suffix_pos])) {
2310         return 0;
2311     }
2312     while (suffix_pos >= 0 && apr_isdigit(r->filename[suffix_pos]))
2313         suffix_pos--;
2314     if (suffix_pos < 0 || r->filename[suffix_pos] != '@') {
2315         return 0;
2316     }
2317 
2318     /* perform sub-request for the file name without the suffix */
2319     result = 0;
2320     sub_filename = apr_pstrndup(r->pool, r->filename, suffix_pos);
2321 #if MIME_MAGIC_DEBUG
2322     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01556)
2323                 MODNAME ": subrequest lookup for %s", sub_filename);
2324 #endif /* MIME_MAGIC_DEBUG */
2325     sub = ap_sub_req_lookup_file(sub_filename, r, NULL);
2326 
2327     /* extract content type/encoding/language from sub-request */
2328     if (sub->content_type) {
2329         ap_set_content_type(r, apr_pstrdup(r->pool, sub->content_type));
2330 #if MIME_MAGIC_DEBUG
2331         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01557)
2332                     MODNAME ": subrequest %s got %s",
2333                     sub_filename, r->content_type);
2334 #endif /* MIME_MAGIC_DEBUG */
2335         if (sub->content_encoding)
2336             r->content_encoding =
2337                 apr_pstrdup(r->pool, sub->content_encoding);
2338         if (sub->content_languages) {
2339             int n;
2340             r->content_languages = apr_array_copy(r->pool,
2341                                                   sub->content_languages);
2342             for (n = 0; n < r->content_languages->nelts; ++n) {
2343                 char **lang = ((char **)r->content_languages->elts) + n;
2344                 *lang = apr_pstrdup(r->pool, *lang);
2345             }
2346         }
2347         result = 1;
2348     }
2349 
2350     /* clean up */
2351     ap_destroy_sub_req(sub);
2352 
2353     return result;
2354 }
2355 
2356 /*
2357  * initialize the module
2358  */
magic_init(apr_pool_t * p,apr_pool_t * plog,apr_pool_t * ptemp,server_rec * main_server)2359 static int magic_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
2360 {
2361     int result;
2362     magic_server_config_rec *conf;
2363     magic_server_config_rec *main_conf;
2364     server_rec *s;
2365 #if MIME_MAGIC_DEBUG
2366     struct magic *m, *prevm;
2367 #endif /* MIME_MAGIC_DEBUG */
2368 
2369     main_conf = ap_get_module_config(main_server->module_config, &mime_magic_module);
2370     for (s = main_server; s; s = s->next) {
2371         conf = ap_get_module_config(s->module_config, &mime_magic_module);
2372         if (conf->magicfile == NULL && s != main_server) {
2373             /* inherits from the parent */
2374             *conf = *main_conf;
2375         }
2376         else if (conf->magicfile) {
2377             result = apprentice(s, p);
2378             if (result == -1)
2379                 return OK;
2380 #if MIME_MAGIC_DEBUG
2381             prevm = 0;
2382             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01558)
2383                         MODNAME ": magic_init 1 test");
2384             for (m = conf->magic; m; m = m->next) {
2385                 if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
2386                     apr_isprint((((unsigned long) m) >> 16) & 255) &&
2387                     apr_isprint((((unsigned long) m) >> 8) & 255) &&
2388                     apr_isprint(((unsigned long) m) & 255)) {
2389                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01559)
2390                                 MODNAME ": magic_init 1: POINTER CLOBBERED! "
2391                                 "m=\"%c%c%c%c\" line=%d",
2392                                 (((unsigned long) m) >> 24) & 255,
2393                                 (((unsigned long) m) >> 16) & 255,
2394                                 (((unsigned long) m) >> 8) & 255,
2395                                 ((unsigned long) m) & 255,
2396                                 prevm ? prevm->lineno : -1);
2397                     break;
2398                 }
2399                 prevm = m;
2400             }
2401 #endif
2402         }
2403     }
2404     return OK;
2405 }
2406 
2407 /*
2408  * Find the Content-Type from any resource this module has available
2409  */
2410 
magic_find_ct(request_rec * r)2411 static int magic_find_ct(request_rec *r)
2412 {
2413     int result;
2414     magic_server_config_rec *conf;
2415 
2416     /* the file has to exist */
2417     if (r->finfo.filetype == APR_NOFILE || !r->filename) {
2418         return DECLINED;
2419     }
2420 
2421     /* was someone else already here? */
2422     if (r->content_type) {
2423         return DECLINED;
2424     }
2425 
2426     conf = ap_get_module_config(r->server->module_config, &mime_magic_module);
2427     if (!conf || !conf->magic) {
2428         return DECLINED;
2429     }
2430 
2431     /* initialize per-request info */
2432     if (!magic_set_config(r)) {
2433         return HTTP_INTERNAL_SERVER_ERROR;
2434     }
2435 
2436     /* try excluding file-revision suffixes */
2437     if (revision_suffix(r) != 1) {
2438         /* process it based on the file contents */
2439         if ((result = magic_process(r)) != OK) {
2440             return result;
2441         }
2442     }
2443 
2444     /* if we have any results, put them in the request structure */
2445     return magic_rsl_to_request(r);
2446 }
2447 
register_hooks(apr_pool_t * p)2448 static void register_hooks(apr_pool_t *p)
2449 {
2450     static const char * const aszPre[]={ "mod_mime.c", NULL };
2451 
2452     /* mod_mime_magic should be run after mod_mime, if at all. */
2453 
2454     ap_hook_type_checker(magic_find_ct, aszPre, NULL, APR_HOOK_MIDDLE);
2455     ap_hook_post_config(magic_init, NULL, NULL, APR_HOOK_FIRST);
2456 }
2457 
2458 /*
2459  * Apache API module interface
2460  */
2461 
2462 AP_DECLARE_MODULE(mime_magic) =
2463 {
2464     STANDARD20_MODULE_STUFF,
2465     NULL,                      /* dir config creator */
2466     NULL,                      /* dir merger --- default is to override */
2467     create_magic_server_config,        /* server config */
2468     merge_magic_server_config, /* merge server config */
2469     mime_magic_cmds,           /* command apr_table_t */
2470     register_hooks              /* register hooks */
2471 };
2472