1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2  *@ Implementation of mime-type.h.
3  *@ "Keep in sync with" ../../mime.types.
4  *@ TODO With an on_loop_tick_event, trigger cache update once per loop max.
5  *
6  * Copyright (c) 2012 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7  * SPDX-License-Identifier: ISC
8  *
9  * Permission to use, copy, modify, and/or distribute this software for any
10  * purpose with or without fee is hereby granted, provided that the above
11  * copyright notice and this permission notice appear in all copies.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20  */
21 #undef su_FILE
22 #define su_FILE mime_type
23 #define mx_SOURCE
24 #define mx_SOURCE_MIME_TYPE
25 
26 #ifndef mx_HAVE_AMALGAMATION
27 # include "mx/nail.h"
28 #endif
29 
30 #include <su/cs.h>
31 #include <su/icodec.h>
32 #include <su/mem.h>
33 
34 #include "mx/file-streams.h"
35 
36 #ifdef mx_HAVE_FILTER_HTML_TAGSOUP
37    /* TODO that this does not belong: clear */
38 # include "mx/filter-html.h"
39 #endif
40 #ifdef mx_HAVE_MAILCAP
41 # include "mx/mailcap.h"
42 #endif
43 
44 #include "mx/mime-type.h"
45 #include "su/code-in.h"
46 
47 enum a_mimetype{
48    a_MIMETYPE_APPLICATION,
49    a_MIMETYPE_AUDIO,
50    a_MIMETYPE_IMAGE,
51    a_MIMETYPE_MESSAGE,
52    a_MIMETYPE_MULTIPART,
53    a_MIMETYPE_TEXT,
54    a_MIMETYPE_VIDEO,
55    a_MIMETYPE_OTHER,
56    a_MIMETYPE__TMIN = 0u,
57    a_MIMETYPE__TMAX = a_MIMETYPE_OTHER,
58    a_MIMETYPE__TMASK = 0x07u,
59 
60    a_MIMETYPE_CMD = 1u<<8, /* Via `mimetype' (not struct a_mimetype_bltin) */
61    a_MIMETYPE_USR = 1u<<9, /* VAL_MIME_TYPES_USR */
62    a_MIMETYPE_SYS = 1u<<10, /* VAL_MIME_TYPES_SYS */
63    a_MIMETYPE_FSPEC = 1u<<11, /* Via f= *mimetypes-load-control* spec. */
64 
65    a_MIMETYPE_TM_PLAIN = 1u<<16, /* Without pipe handler display as text */
66    a_MIMETYPE_TM_SOUP_h = 2u<<16, /* Ditto, but HTML tagsoup parser iff */
67    a_MIMETYPE_TM_SOUP_H = 3u<<16, /* HTML tagsoup, else NOT plain text */
68    a_MIMETYPE_TM_QUIET = 4u<<16, /* No "no mime handler available" message */
69    a_MIMETYPE__TM_MARKMASK = 7u<<16
70 };
71 
72 enum a_mimetype_class{
73    a_MIMETYPE_C_NONE,
74    a_MIMETYPE_C_CLEAN = a_MIMETYPE_C_NONE, /* Plain RFC 5322 message */
75    a_MIMETYPE_C_DEEP_INSPECT = 1u<<0, /* Always test all the file */
76    a_MIMETYPE_C_NCTT = 1u<<1, /* *content_type == NIL */
77    a_MIMETYPE_C_ISTXT = 1u<<2, /* *content_type =~ text\/ */
78    a_MIMETYPE_C_ISTXTCOK = 1u<<3, /* _ISTXT + *mime-allow-text-controls* */
79    a_MIMETYPE_C_HIGHBIT = 1u<<4, /* Not 7bit clean */
80    a_MIMETYPE_C_LONGLINES = 1u<<5, /* MIME_LINELEN_LIMIT exceed. */
81    a_MIMETYPE_C_CTRLCHAR = 1u<<6, /* Control characters seen */
82    a_MIMETYPE_C_HASNUL = 1u<<7, /* Contains \0 characters */
83    a_MIMETYPE_C_NOTERMNL = 1u<<8, /* Lacks a final newline */
84    a_MIMETYPE_C_FROM_ = 1u<<9, /* ^From_ seen */
85    a_MIMETYPE_C_FROM_1STLINE = 1u<<10, /* From_ line seen */
86    a_MIMETYPE_C_SUGGEST_DONE = 1u<<16, /* Inspector suggests parse stop */
87    a_MIMETYPE_C__1STLINE = 1u<<17 /* .. */
88 };
89 
90 enum a_mimetype_counter_evidence{
91    a_MIMETYPE_CE_NONE,
92    a_MIMETYPE_CE_SET = 1u<<0, /* *mime-counter-evidence* was set */
93    a_MIMETYPE_CE_BIN_OVWR = 1u<<1, /* appli../o.-s.: check, ovw if possible */
94    a_MIMETYPE_CE_ALL_OVWR = 1u<<2, /* all: check, ovw if possible */
95    a_MIMETYPE_CE_BIN_PARSE = 1u<<3 /* appli../o.-s.: classify contents last */
96 };
97 
98 struct a_mimetype_bltin{
99    u32 mtb_flags;
100    u32 mtb_mtlen;
101    char const *mtb_line;
102 };
103 
104 struct a_mimetype_node{
105    struct a_mimetype_node *mtn_next;
106    u32 mtn_flags;
107    u32 mtn_len; /* Length of MIME type string, rest thereafter */
108    char const *mtn_line;
109 };
110 
111 struct a_mimetype_lookup{
112    char const *mtl_name;
113    uz mtl_nlen;
114    struct a_mimetype_node const *mtl_node;
115    char *mtl_result; /* If requested, autorec_alloc()ed MIME type */
116 };
117 
118 struct a_mimetype_class_arg{
119    char const *mtca_buf;
120    uz mtca_len;
121    sz mtca_curlnlen;
122    /*char mtca_lastc;*/
123    char mtca_c;
124    u8 mtca__dummy[3];
125    enum a_mimetype_class mtca_mtc;
126    u64 mtca_all_len;
127    u64 mtca_all_highbit; /* TODO not yet interpreted */
128    u64 mtca_all_bogus;
129 };
130 
131 static struct a_mimetype_bltin const a_mimetype_bltin[] = {
132 #include "gen-mime-types.h" /* */
133 };
134 
135 static char const a_mimetype_names[][16] = {
136    "application/", "audio/", "image/",
137    "message/", "multipart/", "text/",
138    "video/"
139 };
140 CTAV(a_MIMETYPE_APPLICATION == 0 && a_MIMETYPE_AUDIO == 1 &&
141    a_MIMETYPE_IMAGE == 2 && a_MIMETYPE_MESSAGE == 3 &&
142    a_MIMETYPE_MULTIPART == 4 && a_MIMETYPE_TEXT == 5 &&
143    a_MIMETYPE_VIDEO == 6);
144 
145 /* */
146 static boole a_mimetype_is_init;
147 static struct a_mimetype_node *a_mimetype_list;
148 
149 /* Initialize MIME type list in order */
150 static void a_mimetype_init(void);
151 static boole a_mimetype__load_file(u32 orflags, char const *file, char **line,
152       uz *linesize);
153 
154 /* Create (prepend) a new MIME type; cmdcalled results in a bit more verbosity
155  * for `mimetype' */
156 static struct a_mimetype_node *a_mimetype_create(boole cmdcalled, u32 orflags,
157       char const *line, uz len);
158 
159 /* Try to find MIME type by X (after zeroing mtlp), return NIL if not found;
160  * if with_result >mtl_result will be created upon success for the former */
161 static struct a_mimetype_lookup *a_mimetype_by_filename(
162       struct a_mimetype_lookup *mtlp, char const *name, boole with_result);
163 static struct a_mimetype_lookup *a_mimetype_by_name(
164       struct a_mimetype_lookup *mtlp, char const *name);
165 
166 /* In-depth inspection of raw content: call _round() repeatedly, last time with
167  * a 0 length buffer, finally check .mtca_mtc for result.
168  * No further call is needed if _round() return includes _C_SUGGEST_DONE,
169  * as the resulting classification is unambiguous */
170 SINLINE struct a_mimetype_class_arg *a_mimetype_classify_init(
171       struct a_mimetype_class_arg *mtcap, enum a_mimetype_class initval);
172 static BITENUM_IS(u32,a_mimetype_class) a_mimetype_classify_round(
173       struct a_mimetype_class_arg *mtcap);
174 
175 /* We need an in-depth inspection of an application/octet-stream part */
176 static enum mx_mimetype a_mimetype_classify_o_s_part(u32 mce,
177       struct mimepart *mpp, boole deep_inspect);
178 
179 /* Check whether a *pipe-XY* handler is applicable, and adjust flags according
180  * to the defined trigger characters; upon entry MIMETYPE_HDL_NIL is set, and
181  * that is not changed if mthp does not apply */
182 static BITENUM_IS(u32,mx_mimetype_handler_flags) a_mimetype_pipe_check(
183       struct mx_mimetype_handler *mthp, enum sendaction action);
184 
185 static void
a_mimetype_init(void)186 a_mimetype_init(void){
187    uz linesize;
188    char c, *line;
189    char const *srcs_arr[10], *ccp, **srcs;
190    u32 i, j;
191    struct a_mimetype_node *tail;
192    NYD_IN;
193 
194    /*if(a_mimetype_is_init)
195     *  goto jleave;*/
196 
197    /* Always load our built-ins */
198    for(tail = NIL, i = 0; i < NELEM(a_mimetype_bltin); ++i){
199       struct a_mimetype_bltin const *mtbp;
200       struct a_mimetype_node *mtnp;
201 
202       mtnp = su_ALLOC(sizeof *mtnp);
203       mtbp = &a_mimetype_bltin[i];
204 
205       if(tail != NIL)
206          tail->mtn_next = mtnp;
207       else
208          a_mimetype_list = mtnp;
209       tail = mtnp;
210       mtnp->mtn_next = NIL;
211       mtnp->mtn_flags = mtbp->mtb_flags;
212       mtnp->mtn_len = mtbp->mtb_mtlen;
213       mtnp->mtn_line = mtbp->mtb_line;
214    }
215 
216    /* Decide which files sources have to be loaded */
217    if((ccp = ok_vlook(mimetypes_load_control)) == NIL)
218       ccp = "US";
219    else if(*ccp == '\0')
220       goto jleave;
221 
222    srcs = &srcs_arr[2];
223    srcs[-1] = srcs[-2] = NIL;
224 
225    if(su_cs_find_c(ccp, '=') != NIL){
226       line = savestr(ccp);
227 
228       while((ccp = su_cs_sep_c(&line, ',', TRU1)) != NIL){
229          switch((c = *ccp)){
230          case 'S': case 's':
231             srcs_arr[1] = VAL_MIME_TYPES_SYS;
232             if(0){
233                /* FALLTHRU */
234          case 'U': case 'u':
235                srcs_arr[0] = VAL_MIME_TYPES_USR;
236             }
237             if (ccp[1] != '\0')
238                goto jecontent;
239             break;
240          case 'F': case 'f':
241             if(*++ccp == '=' && *++ccp != '\0'){
242                if(P2UZ(srcs - srcs_arr) < NELEM(srcs_arr))
243                   *srcs++ = ccp;
244                else
245                   n_err(_("*mimetypes-load-control*: too many sources, "
246                         "skipping %s\n"), n_shexp_quote_cp(ccp, FAL0));
247                continue;
248             }
249             /* FALLTHRU */
250          default:
251             goto jecontent;
252          }
253       }
254    }else for(i = 0; (c = ccp[i]) != '\0'; ++i)
255       switch(c){
256       case 'S': case 's': srcs_arr[1] = VAL_MIME_TYPES_SYS; break;
257       case 'U': case 'u': srcs_arr[0] = VAL_MIME_TYPES_USR; break;
258       default:
259 jecontent:
260          n_err(_("*mimetypes-load-control*: unsupported value: %s\n"), ccp);
261          goto jleave;
262       }
263 
264    /* Load all file-based sources in the desired order */
265    mx_fs_linepool_aquire(&line, &linesize);
266    for(j = 0, i = S(u32,P2UZ(srcs - srcs_arr)), srcs = srcs_arr;
267          i > 0; ++j, ++srcs, --i)
268       if(*srcs == NIL)
269          continue;
270       else if(!a_mimetype__load_file((j == 0 ? a_MIMETYPE_USR
271                   : (j == 1 ? a_MIMETYPE_SYS : a_MIMETYPE_FSPEC)),
272                *srcs, &line, &linesize)){
273          s32 eno;
274 
275          if((eno = su_err_no()) != su_ERR_NOENT ||
276                (n_poption & n_PO_D_V) || j > 1)
277             n_err(_("*mimetypes-load-control*: cannot open or load %s: %s\n"),
278                n_shexp_quote_cp(*srcs, FAL0), su_err_doc(eno));
279       }
280    mx_fs_linepool_release(line, linesize);
281 
282 jleave:
283    a_mimetype_is_init = TRU1;
284    NYD_OU;
285 }
286 
287 static boole
a_mimetype__load_file(u32 orflags,char const * file,char ** line,uz * linesize)288 a_mimetype__load_file(u32 orflags, char const *file, char **line,
289       uz *linesize){
290    uz len;
291    struct a_mimetype_node *head, *tail, *mtnp;
292    FILE *fp;
293    char const *cp;
294    NYD_IN;
295 
296    if((cp = fexpand(file, (FEXP_NOPROTO | FEXP_LOCAL_FILE | FEXP_NSHELL))
297          ) == NIL || (fp = mx_fs_open(cp, "r")) == NIL){
298       cp = NIL;
299       goto jleave;
300    }
301 
302    head = tail = NIL;
303 
304    while(fgetline(line, linesize, NIL, &len, fp, FAL0) != NIL)
305       if((mtnp = a_mimetype_create(FAL0, orflags, *line, len)) != NIL){
306          if(head == NIL)
307             head = tail = mtnp;
308          else
309             tail->mtn_next = mtnp;
310          tail = mtnp;
311       }
312 
313    if(ferror(fp))
314       cp = NIL;
315    else if(head != NIL){
316       tail->mtn_next = a_mimetype_list;
317       a_mimetype_list = head;
318    }
319 
320    mx_fs_close(fp);
321 
322 jleave:
323    NYD_OU;
324    return (cp != NIL);
325 }
326 
327 static struct a_mimetype_node *
a_mimetype_create(boole cmdcalled,u32 orflags,char const * line,uz len)328 a_mimetype_create(boole cmdcalled, u32 orflags, char const *line, uz len){
329    uz tlen, i;
330    char const *typ, *subtyp;
331    struct a_mimetype_node *mtnp;
332    NYD_IN;
333 
334    mtnp = NIL;
335 
336    /* Drop anything after a comment first TODO v15: only when read from file */
337    if((typ = su_mem_find(line, '#', len)) != NIL)
338       len = P2UZ(typ - line);
339 
340    /* Then trim any trailing whitespace from line (including NL/CR) */
341    /* C99 */{
342       struct str work;
343 
344       work.s = UNCONST(char*,line);
345       work.l = len;
346       line = n_str_trim(&work, n_STR_TRIM_BOTH)->s;
347       len = work.l;
348    }
349    typ = line;
350 
351    /* (But wait - is there a type marker?) */
352    tlen = len;
353    if(!(orflags & (a_MIMETYPE_USR | a_MIMETYPE_SYS)) &&
354          (*typ == '?' || *typ == '@')){
355       if(*typ == '@') /* v15compat (plus trailing below) */
356          n_OBSOLETE2(_("mimetype: type markers (and much more) use ? not @"),
357             line);
358       if(len < 2)
359          goto jeinval;
360       if(typ[1] == ' '){
361          orflags |= a_MIMETYPE_TM_PLAIN;
362          typ += 2;
363          len -= 2;
364          line += 2;
365       }else if(len > 3){
366          if(typ[2] == ' ')
367             i = 3;
368          else if(len > 4 && (typ[2] == '?' || typ[2] == '@') && typ[3] == ' ')
369             i = 4;
370          else
371             goto jeinval;
372 
373          switch(typ[1]){
374          default: goto jeinval;
375          case 't': orflags |= a_MIMETYPE_TM_PLAIN; break;
376          case 'h': orflags |= a_MIMETYPE_TM_SOUP_h; break;
377          case 'H': orflags |= a_MIMETYPE_TM_SOUP_H; break;
378          case 'q': orflags |= a_MIMETYPE_TM_QUIET; break;
379          }
380          typ += i;
381          len -= i;
382          line += i;
383       }else
384          goto jeinval;
385    }
386 
387    while(len > 0 && !su_cs_is_blank(*line))
388       ++line, --len;
389    /* Ignore empty lines and even incomplete specifications (only MIME type)
390     * because this is quite common in mime.types(5) files */
391    if(len == 0 || (tlen = P2UZ(line - typ)) == 0){
392       if(cmdcalled || (orflags & a_MIMETYPE_FSPEC)){
393          if(len == 0){
394             line = _("(no value)");
395             len = su_cs_len(line);
396          }
397          n_err(_("Empty MIME type or no extensions given: %.*s\n"),
398             S(int,len), line);
399       }
400       goto jleave;
401    }
402 
403    if((subtyp = su_mem_find(typ, '/', tlen)) == NIL || subtyp[1] == '\0' ||
404          su_cs_is_space(subtyp[1])) {
405 jeinval:
406       if(cmdcalled || (orflags & a_MIMETYPE_FSPEC) || (n_poption & n_PO_D_V))
407          n_err(_("%s MIME type: %.*s\n"),
408             (cmdcalled ? _("Invalid") : _("mime.types(5): invalid")),
409             (int)tlen, typ);
410       goto jleave;
411    }
412    ++subtyp;
413 
414    /* Map to mime_type */
415    tlen = P2UZ(subtyp - typ);
416    for(i = a_MIMETYPE__TMIN;;){
417       if(!su_cs_cmp_case_n(a_mimetype_names[i], typ, tlen)){
418          orflags |= i;
419          tlen = P2UZ(line - subtyp);
420          typ = subtyp;
421          break;
422       }
423       if(++i == a_MIMETYPE__TMAX){
424          orflags |= a_MIMETYPE_OTHER;
425          tlen = P2UZ(line - typ);
426          break;
427       }
428    }
429 
430    /* Strip leading whitespace from the list of extensions;
431     * trailing WS has already been trimmed away above.
432     * Be silent on slots which define a mimetype without any value */
433    while(len > 0 && su_cs_is_blank(*line))
434       ++line, --len;
435    if(len == 0)
436       goto jleave;
437 
438    /*  */
439    mtnp = su_ALLOC(sizeof(*mtnp) + tlen + len +1);
440    mtnp->mtn_next = NIL;
441    mtnp->mtn_flags = orflags;
442    mtnp->mtn_len = S(u32,tlen);
443    /* C99 */{
444       char *l;
445 
446       l = S(char*,&mtnp[1]);
447       mtnp->mtn_line = l;
448       su_mem_copy(l, typ, tlen);
449       su_mem_copy(&l[tlen], line, len);
450       l[tlen += len] = '\0';
451    }
452 
453 jleave:
454    NYD_OU;
455    return mtnp;
456 }
457 
458 static struct a_mimetype_lookup *
a_mimetype_by_filename(struct a_mimetype_lookup * mtlp,char const * name,boole with_result)459 a_mimetype_by_filename(struct a_mimetype_lookup *mtlp, char const *name,
460       boole with_result){
461    char const *ext, *cp;
462    struct a_mimetype_node *mtnp;
463    uz nlen, i, j;
464    NYD2_IN;
465 
466    su_mem_set(mtlp, 0, sizeof *mtlp);
467 
468    if((nlen = su_cs_len(name)) == 0) /* TODO name should be a URI */
469       goto jnull_leave;
470    /* We need a period TODO we should support names like README etc. */
471    for(i = nlen; name[--i] != '.';)
472       if(i == 0 || name[i] == '/') /* XXX no magics */
473          goto jnull_leave;
474    /* While here, basename() it */
475    while(i > 0 && name[i - 1] != '/')
476       --i;
477    name += i;
478    nlen -= i;
479    mtlp->mtl_name = name;
480    mtlp->mtl_nlen = nlen;
481 
482    if(!a_mimetype_is_init)
483       a_mimetype_init();
484 
485    /* ..all the MIME types */
486    for(mtnp = a_mimetype_list; mtnp != NIL; mtnp = mtnp->mtn_next){
487       for(ext = &mtnp->mtn_line[mtnp->mtn_len];; ext = cp){
488          cp = ext;
489          while(su_cs_is_space(*cp))
490             ++cp;
491          ext = cp;
492          while(!su_cs_is_space(*cp) && *cp != '\0')
493             ++cp;
494 
495          if((i = P2UZ(cp - ext)) == 0)
496             break;
497          /* Do not allow neither of ".txt" or "txt" to match "txt" */
498          else if(i + 1 >= nlen || name[(j = nlen - i) - 1] != '.' ||
499                su_cs_cmp_case_n(name + j, ext, i))
500             continue;
501 
502          /* Found it */
503          mtlp->mtl_node = mtnp;
504 
505          if(!with_result)
506             goto jleave;
507 
508          if((mtnp->mtn_flags & a_MIMETYPE__TMASK) == a_MIMETYPE_OTHER){
509             name = su_empty;
510             j = 0;
511          }else{
512             name = a_mimetype_names[mtnp->mtn_flags & a_MIMETYPE__TMASK];
513             j = su_cs_len(name);
514          }
515          i = mtnp->mtn_len;
516          mtlp->mtl_result = n_autorec_alloc(i + j +1);
517          if(j > 0)
518             su_mem_copy(mtlp->mtl_result, name, j);
519          su_mem_copy(&mtlp->mtl_result[j], mtnp->mtn_line, i);
520          mtlp->mtl_result[j += i] = '\0';
521          goto jleave;
522       }
523    }
524 
525 jnull_leave:
526    mtlp = NIL;
527 jleave:
528    NYD2_OU;
529    return mtlp;
530 }
531 
532 static struct a_mimetype_lookup *
a_mimetype_by_name(struct a_mimetype_lookup * mtlp,char const * name)533 a_mimetype_by_name(struct a_mimetype_lookup *mtlp, char const *name){
534    uz nlen, i, j;
535    char const *cp;
536    struct a_mimetype_node *mtnp;
537    NYD2_IN;
538 
539    su_mem_set(mtlp, 0, sizeof *mtlp);
540 
541    if((mtlp->mtl_nlen = nlen = su_cs_len(mtlp->mtl_name = name)) == 0)
542       goto jnull_leave;
543 
544    if(!a_mimetype_is_init)
545       a_mimetype_init();
546 
547    /* ..all the MIME types */
548    for(mtnp = a_mimetype_list; mtnp != NIL; mtnp = mtnp->mtn_next){
549       if((mtnp->mtn_flags & a_MIMETYPE__TMASK) == a_MIMETYPE_OTHER){
550          cp = su_empty;
551          j = 0;
552       }else{
553          cp = a_mimetype_names[mtnp->mtn_flags & a_MIMETYPE__TMASK];
554          j = su_cs_len(cp);
555       }
556       i = mtnp->mtn_len;
557 
558       if(i + j == mtlp->mtl_nlen){
559          char *xmt;
560 
561          xmt = n_lofi_alloc(i + j +1);
562          if(j > 0)
563             su_mem_copy(xmt, cp, j);
564          su_mem_copy(&xmt[j], mtnp->mtn_line, i);
565          xmt[j += i] = '\0';
566          i = su_cs_cmp_case(name, xmt);
567          n_lofi_free(xmt);
568 
569          /* Found it? */
570          if(!i){
571             mtlp->mtl_node = mtnp;
572             goto jleave;
573          }
574       }
575    }
576 
577 jnull_leave:
578    mtlp = NIL;
579 jleave:
580    NYD2_OU;
581    return mtlp;
582 }
583 
584 SINLINE struct a_mimetype_class_arg *
a_mimetype_classify_init(struct a_mimetype_class_arg * mtcap,enum a_mimetype_class initval)585 a_mimetype_classify_init(struct a_mimetype_class_arg * mtcap,
586       enum a_mimetype_class initval){
587    NYD2_IN;
588    su_mem_set(mtcap, 0, sizeof *mtcap);
589    /*mtcap->mtca_lastc =*/ mtcap->mtca_c = EOF;
590    mtcap->mtca_mtc = initval | a_MIMETYPE_C__1STLINE;
591    NYD2_OU;
592    return mtcap;
593 }
594 
BITENUM_IS(u32,a_mimetype_class)595 static BITENUM_IS(u32,a_mimetype_class)
596 a_mimetype_classify_round(struct a_mimetype_class_arg *mtcap){
597    /* TODO classify_round: dig UTF-8 for !text/!! */
598    /* TODO BTW., after the MIME/send layer rewrite we could use a MIME
599     * TODO boundary of "=-=-=" if we would add a B_ in EQ spirit to F_,
600     * TODO and report that state to the outer world */
601 #define a_F_ "From "
602 #define a_F_SIZEOF (sizeof(a_F_) -1)
603 
604    char f_buf[a_F_SIZEOF], *f_p = f_buf;
605    BITENUM_IS(u32, a_mimetype_class) mtc;
606    int c, lastc;
607    s64 alllen;
608    sz curlnlen;
609    uz blen;
610    char const *buf;
611    NYD2_IN;
612 
613    buf = mtcap->mtca_buf;
614    blen = mtcap->mtca_len;
615    curlnlen = mtcap->mtca_curlnlen;
616    alllen = mtcap->mtca_all_len;
617    c = mtcap->mtca_c;
618    /*lastc = mtcap->mtca_lastc;*/
619    mtc = mtcap->mtca_mtc;
620 
621    for(;; ++curlnlen){
622       if(blen == 0){
623          /* Real EOF, or only current buffer end? */
624          if(mtcap->mtca_len == 0){
625             lastc = c;
626             c = EOF;
627          }else{
628             lastc = EOF;
629             break;
630          }
631       }else{
632          ++alllen;
633          lastc = c;
634          c = S(uc,*buf++);
635       }
636       --blen;
637 
638       if(c == '\0'){
639          mtc |= a_MIMETYPE_C_HASNUL;
640          if(!(mtc & a_MIMETYPE_C_ISTXTCOK)){
641             mtc |= a_MIMETYPE_C_SUGGEST_DONE;
642             break;
643          }
644          continue;
645       }
646       if(c == '\n' || c == EOF){
647          mtc &= ~a_MIMETYPE_C__1STLINE;
648          if(curlnlen >= MIME_LINELEN_LIMIT)
649             mtc |= a_MIMETYPE_C_LONGLINES;
650          if(c == EOF)
651             break;
652          f_p = f_buf;
653          curlnlen = -1;
654          continue;
655       }
656       /* A bit hairy is handling of \r=\x0D=CR.
657        * RFC 2045, 6.7:
658        * Control characters other than TAB, or CR and LF as parts of CRLF
659        * pairs, must not appear.  \r alone does not force _CTRLCHAR below since
660        * we cannot peek the next character.  Thus right here, inspect the last
661        * seen character for if its \r and set _CTRLCHAR in a delayed fashion */
662        /*else*/ if(lastc == '\r')
663          mtc |= a_MIMETYPE_C_CTRLCHAR;
664 
665       /* Control character? XXX this is all ASCII here */
666       if(c < 0x20 || c == 0x7F){
667          /* RFC 2045, 6.7, as above ... */
668          if(c != '\t' && c != '\r')
669             mtc |= a_MIMETYPE_C_CTRLCHAR;
670 
671          /* If there is a escape sequence in reverse solidus notation defined
672           * for this in ANSI X3.159-1989 (ANSI C89), do not treat it as
673           * a control for real.  I.e., \a=\x07=BEL, \b=\x08=BS, \t=\x09=HT.
674           * Do not follow libmagic(1) in respect to \v=\x0B=VT.  \f=\x0C=NP; do
675           * ignore \e=\x1B=ESC */
676          if((c >= '\x07' && c <= '\x0D') || c == '\x1B')
677             continue;
678 
679          /* As a special case, if we are going for displaying data to the user
680           * or quoting a message then simply continue this, in the end, in case
681           * we get there, we will decide upon the all_len/all_bogus ratio
682           * whether this is usable plain text or not */
683          ++mtcap->mtca_all_bogus;
684          if(mtc & a_MIMETYPE_C_DEEP_INSPECT)
685             continue;
686 
687          mtc |= a_MIMETYPE_C_HASNUL; /* Force base64 */
688          if(!(mtc & a_MIMETYPE_C_ISTXTCOK)){
689             mtc |= a_MIMETYPE_C_SUGGEST_DONE;
690             break;
691          }
692       }else if(S(u8,c) & 0x80){
693          mtc |= a_MIMETYPE_C_HIGHBIT;
694          ++mtcap->mtca_all_highbit;
695          if(!(mtc & (a_MIMETYPE_C_NCTT | a_MIMETYPE_C_ISTXT))){/* TODO _NCTT?*/
696             mtc |= a_MIMETYPE_C_HASNUL /*base64*/ | a_MIMETYPE_C_SUGGEST_DONE;
697             break;
698          }
699       }else if(!(mtc & a_MIMETYPE_C_FROM_) &&
700             UCMP(z, curlnlen, <, a_F_SIZEOF)){
701          *f_p++ = S(char,c);
702          if(UCMP(z, curlnlen, ==, a_F_SIZEOF - 1) &&
703                P2UZ(f_p - f_buf) == a_F_SIZEOF &&
704                !su_mem_cmp(f_buf, a_F_, a_F_SIZEOF)){
705             mtc |= a_MIMETYPE_C_FROM_;
706             if(mtc & a_MIMETYPE_C__1STLINE)
707                mtc |= a_MIMETYPE_C_FROM_1STLINE;
708          }
709       }
710    }
711    if(c == EOF && lastc != '\n')
712       mtc |= a_MIMETYPE_C_NOTERMNL;
713 
714    mtcap->mtca_curlnlen = curlnlen;
715    /*mtcap->mtca_lastc = lastc*/;
716    mtcap->mtca_c = c;
717    mtcap->mtca_mtc = mtc;
718    mtcap->mtca_all_len = alllen;
719    NYD2_OU;
720    return mtc;
721 
722 #undef a_F_
723 #undef a_F_SIZEOF
724 }
725 
726 static enum mx_mimetype
a_mimetype_classify_o_s_part(u32 mce,struct mimepart * mpp,boole deep_inspect)727 a_mimetype_classify_o_s_part(u32 mce, struct mimepart *mpp,
728       boole deep_inspect){
729    struct str in = {NIL, 0}, outrest, inrest, dec;
730    struct a_mimetype_class_arg mtca;
731    int lc, c;
732    uz cnt, lsz;
733    FILE *ibuf;
734    long start_off;
735    boole did_inrest;
736    enum a_mimetype_class mtc;
737    enum mx_mimetype mt;
738    NYD2_IN;
739 
740    ASSERT(mpp->m_mime_enc != MIMEE_BIN);
741 
742    outrest = inrest = dec = in;
743    mt = mx_MIMETYPE_UNKNOWN;
744    mtc = a_MIMETYPE_C_NONE;
745    did_inrest = FAL0;
746 
747    /* TODO v15-compat Note we actually bypass our usual file handling by
748     * TODO directly using fseek() on mb.mb_itf -- the v15 rewrite will change
749     * TODO all of this, and until then doing it like this is the only option
750     * TODO to integrate nicely into whoever calls us */
751    if((start_off = ftell(mb.mb_itf)) == -1)
752       goto jleave;
753    if((ibuf = setinput(&mb, R(struct message*,mpp), NEED_BODY)) == NIL){
754 jos_leave:
755       (void)fseek(mb.mb_itf, start_off, SEEK_SET);
756       goto jleave;
757    }
758    cnt = mpp->m_size;
759 
760    /* Skip part headers */
761    for(lc = '\0'; cnt > 0; lc = c, --cnt)
762       if((c = getc(ibuf)) == EOF || (c == '\n' && lc == '\n'))
763          break;
764    if(cnt == 0 || ferror(ibuf))
765       goto jos_leave;
766 
767    /* So now let's inspect the part content, decoding content-transfer-encoding
768     * along the way TODO this should simply be "mime_factory_create(MPP)"!
769     * TODO In fact m_mime_classifier_(setup|call|call_part|finalize)() and the
770     * TODO state(s) should become reported to the outer
771     * TODO world like that (see MIME boundary TODO around here) */
772    a_mimetype_classify_init(&mtca, (a_MIMETYPE_C_ISTXT |
773       (deep_inspect ? a_MIMETYPE_C_DEEP_INSPECT : a_MIMETYPE_C_NONE)));
774 
775    for(lsz = 0;;){
776       boole dobuf;
777 
778       c = (--cnt == 0) ? EOF : getc(ibuf);
779       if((dobuf = (c == '\n'))){
780          /* Ignore empty lines */
781          if(lsz == 0)
782             continue;
783       }else if((dobuf = (c == EOF))){
784          if(lsz == 0 && outrest.l == 0)
785             break;
786       }
787 
788       if(in.l + 1 >= lsz)
789          in.s = su_REALLOC(in.s, lsz += LINESIZE);
790       if(c != EOF)
791          in.s[in.l++] = S(char,c);
792       if(!dobuf)
793          continue;
794 
795 jdobuf:
796       switch(mpp->m_mime_enc){
797       case MIMEE_B64:
798          if(!b64_decode_part(&dec, &in, &outrest,
799                (did_inrest ? NIL : &inrest))) {
800             mtca.mtca_mtc = a_MIMETYPE_C_HASNUL;
801             goto jstopit; /* break;break; */
802          }
803          break;
804       case MIMEE_QP:
805          /* Drin */
806          if(!qp_decode_part(&dec, &in, &outrest, &inrest)){
807             mtca.mtca_mtc = a_MIMETYPE_C_HASNUL;
808             goto jstopit; /* break;break; */
809          }
810          if(dec.l == 0 && c != EOF){
811             in.l = 0;
812             continue;
813          }
814          break;
815       default:
816          /* Temporarily switch those two buffers.. */
817          dec = in;
818          in.s = NIL;
819          in.l = 0;
820          break;
821       }
822 
823       mtca.mtca_buf = dec.s;
824       mtca.mtca_len = (sz)dec.l;
825       if((mtc = a_mimetype_classify_round(&mtca)) & a_MIMETYPE_C_SUGGEST_DONE){
826          mtc = a_MIMETYPE_C_HASNUL;
827          break;
828       }
829 
830       if(c == EOF)
831          break;
832       /* ..and restore switched */
833       if(in.s == NIL){
834          in = dec;
835          dec.s = NIL;
836       }
837       in.l = dec.l = 0;
838    }
839 
840    if((in.l = inrest.l) > 0){
841       in.s = inrest.s;
842       inrest.s = NIL;
843       did_inrest = TRU1;
844       goto jdobuf;
845    }
846    if(outrest.l > 0)
847       goto jdobuf;
848 
849 jstopit:
850    if(in.s != NIL)
851       su_FREE(in.s);
852    if(dec.s != NIL)
853       su_FREE(dec.s);
854    if(outrest.s != NIL)
855       su_FREE(outrest.s);
856    if(inrest.s != NIL)
857       su_FREE(inrest.s);
858 
859    /* Restore file position to what caller expects (sic) */
860    fseek(mb.mb_itf, start_off, SEEK_SET);
861 
862    if(!(mtc & (a_MIMETYPE_C_HASNUL /*| a_MIMETYPE_C_CTRLCHAR XXX really? */))){
863       /* In that special relaxed case we may very well wave through
864        * octet-streams full of control characters, as they do no harm
865        * TODO This should be part of m_mime_classifier_finalize() then! */
866       if(deep_inspect &&
867             mtca.mtca_all_len - mtca.mtca_all_bogus < mtca.mtca_all_len >> 2)
868          goto jleave;
869 
870       mt = mx_MIMETYPE_TEXT_PLAIN;
871       if(mce & a_MIMETYPE_CE_ALL_OVWR)
872          mpp->m_ct_type_plain = "text/plain";
873       if(mce & (a_MIMETYPE_CE_BIN_OVWR | a_MIMETYPE_CE_ALL_OVWR))
874          mpp->m_ct_type_usr_ovwr = "text/plain";
875    }
876 
877 jleave:
878    NYD2_OU;
879    return mt;
880 }
881 
BITENUM_IS(u32,mx_mimetype_handler_flags)882 static BITENUM_IS(u32,mx_mimetype_handler_flags)
883 a_mimetype_pipe_check(struct mx_mimetype_handler *mthp,
884       enum sendaction action){
885    char const *cp;
886    BITENUM_IS(u32,mx_mimetype_handler_flags) rv_orig, rv;
887    NYD2_IN;
888 
889    rv_orig = rv = mthp->mth_flags;
890    ASSERT((rv & mx_MIMETYPE_HDL_TYPE_MASK) == mx_MIMETYPE_HDL_NIL);
891 
892    /* Do we have any handler for this part? */
893    if(*(cp = mthp->mth_shell_cmd) == '\0')
894       goto jleave;
895    else if(*cp++ != '?' && cp[-1] != '@'/* v15compat */){
896       rv |= mx_MIMETYPE_HDL_CMD;
897       goto jleave;
898    }else{
899       if(cp[-1] == '@')/* v15compat */
900          n_OBSOLETE2(_("*pipe-TYPE/SUBTYPE*+': type markers (and much more) "
901             "use ? not @"), mthp->mth_shell_cmd);
902       if(*cp == '\0'){
903          rv |= mx_MIMETYPE_HDL_TEXT | mx_MIMETYPE_HDL_COPIOUSOUTPUT;
904          goto jleave;
905       }
906    }
907 
908 jnextc:
909    switch(*cp){
910    case '*': rv |= mx_MIMETYPE_HDL_COPIOUSOUTPUT; ++cp; goto jnextc;
911    case '#': rv |= mx_MIMETYPE_HDL_NOQUOTE; ++cp; goto jnextc;
912    case '&': rv |= mx_MIMETYPE_HDL_ASYNC; ++cp; goto jnextc;
913    case '!': rv |= mx_MIMETYPE_HDL_NEEDSTERM; ++cp; goto jnextc;
914    case '+':
915       if(rv & mx_MIMETYPE_HDL_TMPF)
916          rv |= mx_MIMETYPE_HDL_TMPF_UNLINK;
917       rv |= mx_MIMETYPE_HDL_TMPF;
918       ++cp;
919       goto jnextc;
920    case '=':
921       rv |= mx_MIMETYPE_HDL_TMPF_FILL;
922       ++cp;
923       goto jnextc;
924 
925    case 't':
926       switch(rv & mx_MIMETYPE_HDL_TYPE_MASK){
927       case mx_MIMETYPE_HDL_NIL: /* FALLTHRU */
928       case mx_MIMETYPE_HDL_TEXT: break;
929       default:
930          cp = N_("only one type-marker can be used");
931          goto jerrlog;
932       }
933       rv |= mx_MIMETYPE_HDL_TEXT | mx_MIMETYPE_HDL_COPIOUSOUTPUT;
934       ++cp;
935       goto jnextc;
936    case 'h':
937       switch(rv & mx_MIMETYPE_HDL_TYPE_MASK){
938       case mx_MIMETYPE_HDL_NIL: /* FALLTHRU */
939       case mx_MIMETYPE_HDL_PTF: break;
940       default:
941          cp = N_("only one type-marker can be used");
942          goto jerrlog;
943       }
944 #ifdef mx_HAVE_FILTER_HTML_TAGSOUP
945       mthp->mth_ptf = &mx_flthtml_process_main;
946       mthp->mth_msg.l = su_cs_len(mthp->mth_msg.s =
947             UNCONST(char*,_("Built-in HTML tagsoup filter")));
948       rv |= mx_MIMETYPE_HDL_PTF | mx_MIMETYPE_HDL_COPIOUSOUTPUT;
949       ++cp;
950       goto jnextc;
951 #else
952       cp = N_("?h type-marker unsupported (HTML tagsoup filter not built-in)");
953       goto jerrlog;
954 #endif
955 
956    case '@':/* v15compat */
957       /* FALLTHRU */
958    case '?': /* End of flags */
959       ++cp;
960       /* FALLTHRU */
961    default:
962       break;
963    }
964    mthp->mth_shell_cmd = cp;
965 
966    /* Implications */
967    if(rv & mx_MIMETYPE_HDL_TMPF_FILL)
968       rv |= mx_MIMETYPE_HDL_TMPF;
969 
970    /* Exceptions */
971    if(action == SEND_QUOTE || action == SEND_QUOTE_ALL){
972       if(rv & mx_MIMETYPE_HDL_NOQUOTE)
973          goto jerr;
974       /* Cannot fetch data back from asynchronous process */
975       if(rv & mx_MIMETYPE_HDL_ASYNC)
976          goto jerr;
977       if(rv & mx_MIMETYPE_HDL_NEEDSTERM) /* XXX for now */
978          goto jerr;
979       /* xxx Need copiousoutput, and nothing else (for now) */
980       if(!(rv & mx_MIMETYPE_HDL_COPIOUSOUTPUT))
981          goto jerr;
982    }
983 
984    if(rv & mx_MIMETYPE_HDL_NEEDSTERM){
985       if(rv & mx_MIMETYPE_HDL_COPIOUSOUTPUT){
986          cp = N_("cannot use needsterminal and copiousoutput");
987          goto jerr;
988       }
989       if(rv & mx_MIMETYPE_HDL_ASYNC){
990          cp = N_("cannot use needsterminal and x-mailx-async");
991          goto jerr;
992       }
993       /* needsterminal needs a terminal */
994       if(!(n_psonce & n_PSO_INTERACTIVE))
995          goto jerr;
996    }
997 
998    if(rv & mx_MIMETYPE_HDL_ASYNC){
999       if(rv & mx_MIMETYPE_HDL_COPIOUSOUTPUT){
1000          cp = N_("cannot use x-mailx-async and copiousoutput");
1001          goto jerrlog;
1002       }
1003       if(rv & mx_MIMETYPE_HDL_TMPF_UNLINK){
1004          cp = N_("cannot use x-mailx-async and x-mailx-tmpfile-unlink");
1005          goto jerrlog;
1006       }
1007    }
1008 
1009    if((rv & mx_MIMETYPE_HDL_TYPE_MASK) != mx_MIMETYPE_HDL_NIL){
1010       if(rv & ~(mx_MIMETYPE_HDL_TYPE_MASK | mx_MIMETYPE_HDL_COPIOUSOUTPUT |
1011             mx_MIMETYPE_HDL_NOQUOTE)){
1012          cp = N_("?[th] type-markers only support flags * and #");
1013          goto jerrlog;
1014       }
1015    }else
1016       rv |= mx_MIMETYPE_HDL_CMD;
1017 
1018 jleave:
1019    mthp->mth_flags = rv;
1020    NYD2_OU;
1021    return rv;
1022 
1023 jerrlog:
1024    n_err(_("MIME type handlers: %s\n"), V_(cp));
1025 jerr:
1026    rv = rv_orig;
1027    goto jleave;
1028 
1029 }
1030 
1031 int
c_mimetype(void * vp)1032 c_mimetype(void *vp){
1033    struct n_string s_b, *s;
1034    struct a_mimetype_node *mtnp;
1035    char **argv;
1036    NYD_IN;
1037 
1038    if(!a_mimetype_is_init)
1039       a_mimetype_init();
1040 
1041    s = n_string_creat_auto(&s_b);
1042 
1043    if(*(argv = vp) == NIL){
1044       FILE *fp;
1045       uz l;
1046 
1047       if(a_mimetype_list == NIL){
1048          fprintf(n_stdout,
1049             _("# `mimetype': no mime.type(5) data available\n"));
1050          goto jleave;
1051       }
1052 
1053       if((fp = mx_fs_tmp_open("mimetype", (mx_FS_O_RDWR | mx_FS_O_UNLINK |
1054                mx_FS_O_REGISTER), NIL)) == NIL){
1055          n_perr(_("tmpfile"), 0);
1056          fp = n_stdout;
1057       }
1058 
1059       s = n_string_reserve(s, 63);
1060 
1061       for(l = 0, mtnp = a_mimetype_list; mtnp != NIL;
1062             ++l, mtnp = mtnp->mtn_next){
1063          char const *cp;
1064 
1065          s = n_string_trunc(s, 0);
1066 
1067          switch(mtnp->mtn_flags & a_MIMETYPE__TM_MARKMASK){
1068          case a_MIMETYPE_TM_PLAIN: cp = "?t "; break;
1069          case a_MIMETYPE_TM_SOUP_h: cp = "?h "; break;
1070          case a_MIMETYPE_TM_SOUP_H: cp = "?H "; break;
1071          case a_MIMETYPE_TM_QUIET: cp = "?q "; break;
1072          default: cp = NIL; break;
1073          }
1074          if(cp != NIL)
1075             s = n_string_push_cp(s, cp);
1076 
1077          if((mtnp->mtn_flags & a_MIMETYPE__TMASK) != a_MIMETYPE_OTHER)
1078             s = n_string_push_cp(s,
1079                   a_mimetype_names[mtnp->mtn_flags &a_MIMETYPE__TMASK]);
1080 
1081          s = n_string_push_buf(s, mtnp->mtn_line, mtnp->mtn_len);
1082          s = n_string_push_c(s, ' ');
1083          s = n_string_push_c(s, ' ');
1084          s = n_string_push_cp(s, &mtnp->mtn_line[mtnp->mtn_len]);
1085 
1086          fprintf(fp, "mimetype %s%s\n", n_string_cp(s),
1087             ((n_poption & n_PO_D_V) == 0 ? su_empty
1088                : (mtnp->mtn_flags & a_MIMETYPE_USR ? " # user"
1089                : (mtnp->mtn_flags & a_MIMETYPE_SYS ? " # system"
1090                : (mtnp->mtn_flags & a_MIMETYPE_FSPEC ? " # f= file"
1091                : (mtnp->mtn_flags & a_MIMETYPE_CMD ? " # command"
1092                : " # built-in"))))));
1093        }
1094 
1095       if(fp != n_stdout){
1096          page_or_print(fp, l);
1097 
1098          mx_fs_close(fp);
1099       }else
1100          clearerr(fp);
1101    }else{
1102       for(; *argv != NIL; ++argv){
1103          if(s->s_len > 0)
1104             s = n_string_push_c(s, ' ');
1105          s = n_string_push_cp(s, *argv);
1106       }
1107 
1108       mtnp = a_mimetype_create(TRU1, a_MIMETYPE_CMD, n_string_cp(s), s->s_len);
1109       if(mtnp != NIL){
1110          mtnp->mtn_next = a_mimetype_list;
1111          a_mimetype_list = mtnp;
1112       }else
1113          vp = NIL;
1114    }
1115 
1116 jleave:
1117    NYD_OU;
1118    return (vp == NIL ? n_EXIT_ERR : n_EXIT_OK);
1119 }
1120 
1121 int
c_unmimetype(void * vp)1122 c_unmimetype(void *vp){
1123    boole match;
1124    struct a_mimetype_node *lnp, *mtnp;
1125    char **argv;
1126    NYD_IN;
1127 
1128    argv = vp;
1129 
1130    /* Need to load that first as necessary */
1131    if(!a_mimetype_is_init)
1132       a_mimetype_init();
1133 
1134    for(; *argv != NIL; ++argv){
1135       if(!su_cs_cmp_case(*argv, "reset")){
1136          a_mimetype_is_init = FAL0;
1137          goto jdelall;
1138       }
1139 
1140       if(argv[0][0] == '*' && argv[0][1] == '\0'){
1141 jdelall:
1142          while((mtnp = a_mimetype_list) != NIL){
1143             a_mimetype_list = mtnp->mtn_next;
1144             su_FREE(mtnp);
1145          }
1146          continue;
1147       }
1148 
1149       for(match = FAL0, lnp = NIL, mtnp = a_mimetype_list; mtnp != NIL;){
1150          char *val;
1151          uz i;
1152          char const *typ;
1153 
1154          if((mtnp->mtn_flags & a_MIMETYPE__TMASK) == a_MIMETYPE_OTHER){
1155             typ = su_empty;
1156             i = 0;
1157          }else{
1158             typ = a_mimetype_names[mtnp->mtn_flags & a_MIMETYPE__TMASK];
1159             i = su_cs_len(typ);
1160          }
1161 
1162          val = n_lofi_alloc(i + mtnp->mtn_len +1);
1163          su_mem_copy(val, typ, i);
1164          su_mem_copy(&val[i], mtnp->mtn_line, mtnp->mtn_len);
1165          val[i += mtnp->mtn_len] = '\0';
1166          i = su_cs_cmp_case(val, *argv);
1167          n_lofi_free(val);
1168 
1169          if(!i){
1170             struct a_mimetype_node *nnp;
1171 
1172             nnp = mtnp->mtn_next;
1173             if(lnp == NIL)
1174                a_mimetype_list = nnp;
1175             else
1176                lnp->mtn_next = nnp;
1177             su_FREE(mtnp);
1178             mtnp = nnp;
1179             match = TRU1;
1180          }else
1181             lnp = mtnp, mtnp = mtnp->mtn_next;
1182       }
1183 
1184       if(!match){
1185          if(!(n_pstate & n_PS_ROBOT) || (n_poption & n_PO_D_V))
1186             n_err(_("No such MIME type: %s\n"), n_shexp_quote_cp(*argv, FAL0));
1187          vp = NIL;
1188       }
1189    }
1190 
1191    NYD_OU;
1192    return (vp == NIL ? n_EXIT_ERR : n_EXIT_OK);
1193 }
1194 
1195 boole
mx_mimetype_is_valid(char const * name,boole t_a_subt,boole subt_wildcard_ok)1196 mx_mimetype_is_valid(char const *name, boole t_a_subt, boole subt_wildcard_ok){
1197    char c;
1198    NYD2_IN;
1199 
1200    if(t_a_subt)
1201       t_a_subt = TRU1;
1202 
1203    while((c = *name++) != '\0'){
1204       /* RFC 4288, section 4.2 */
1205       if(su_cs_is_alnum(c) || c == '!' ||
1206             c == '#' || c == '$' || c == '&' || c == '.' ||
1207             c == '+' || c == '-' || c == '^' || c == '_')
1208          continue;
1209 
1210       if(c == '/'){
1211          if(t_a_subt != TRU1)
1212             break;
1213          t_a_subt = TRUM1;
1214          continue;
1215       }
1216 
1217       if(c == '*' && t_a_subt == TRUM1 && subt_wildcard_ok)
1218          /* Must be last character, then */
1219          c = *name;
1220       break;
1221    }
1222 
1223    NYD2_OU;
1224    return (c == '\0');
1225 }
1226 
1227 boole
mx_mimetype_is_known(char const * name)1228 mx_mimetype_is_known(char const *name){
1229    struct a_mimetype_lookup mtl;
1230    boole rv;
1231    NYD_IN;
1232 
1233    rv = (a_mimetype_by_name(&mtl, name) != NIL);
1234    NYD_OU;
1235    return rv;
1236 }
1237 
1238 char *
mx_mimetype_classify_filename(char const * name)1239 mx_mimetype_classify_filename(char const *name){
1240    struct a_mimetype_lookup mtl;
1241    NYD_IN;
1242 
1243    a_mimetype_by_filename(&mtl, name, TRU1);
1244    NYD_OU;
1245    return mtl.mtl_result;
1246 }
1247 
1248 enum conversion
mx_mimetype_classify_file(FILE * fp,char const ** content_type,char const ** charset,boole * do_iconv,boole no_mboxo)1249 mx_mimetype_classify_file(FILE *fp, char const **content_type,
1250       char const **charset, boole *do_iconv, boole no_mboxo){
1251    /* TODO classify once only PLEASE PLEASE PLEASE */
1252    /* TODO message/rfc822 is special in that it may only be 7bit, 8bit or
1253     * TODO binary according to RFC 2046, 5.2.1
1254     * TODO The handling of which is a hack */
1255    enum conversion c;
1256    off_t fpsz;
1257    enum mime_enc menc;
1258    boole rfc822;
1259    enum a_mimetype_class mtc;
1260    NYD_IN;
1261 
1262    ASSERT(ftell(fp) == 0x0l);
1263 
1264    *do_iconv = FAL0;
1265 
1266    if(*content_type == NIL){
1267       mtc = a_MIMETYPE_C_NCTT;
1268       rfc822 = FAL0;
1269    }else if(!su_cs_cmp_case_n(*content_type, "text/", 5)){
1270       mtc = ok_blook(mime_allow_text_controls)
1271             ? a_MIMETYPE_C_ISTXT | a_MIMETYPE_C_ISTXTCOK : a_MIMETYPE_C_ISTXT;
1272       rfc822 = FAL0;
1273    }else if(!su_cs_cmp_case(*content_type, "message/rfc822")){
1274       mtc = a_MIMETYPE_C_ISTXT;
1275       rfc822 = TRU1;
1276    }else{
1277       mtc = a_MIMETYPE_C_CLEAN;
1278       rfc822 = FAL0;
1279    }
1280 
1281    menc = mime_enc_target();
1282 
1283    if((fpsz = fsize(fp)) == 0)
1284       goto j7bit;
1285    else{
1286       struct a_mimetype_class_arg mtca;
1287       char *buf;
1288 
1289       a_mimetype_classify_init(&mtca, mtc);
1290       buf = n_lofi_alloc(BUFFER_SIZE);
1291       for(;;){
1292          mtca.mtca_len = fread(buf, sizeof(buf[0]), BUFFER_SIZE, fp);
1293          mtca.mtca_buf = buf;
1294          if((mtc = a_mimetype_classify_round(&mtca)
1295                ) & a_MIMETYPE_C_SUGGEST_DONE)
1296             break;
1297          if(mtca.mtca_len == 0)
1298             break;
1299       }
1300       n_lofi_free(buf);
1301 
1302       /* TODO ferror(fp) ! */
1303       rewind(fp);
1304    }
1305 
1306    if(mtc & a_MIMETYPE_C_HASNUL){
1307       menc = MIMEE_B64;
1308       /* XXX Do not overwrite text content-type to allow UTF-16 and such, but
1309        * XXX only on request; otherwise enforce what file(1)/libmagic(3) would
1310        * XXX suggest */
1311       if(mtc & a_MIMETYPE_C_ISTXTCOK)
1312          goto jcharset;
1313       if(mtc & (a_MIMETYPE_C_NCTT | a_MIMETYPE_C_ISTXT))
1314          *content_type = "application/octet-stream";
1315       goto jleave;
1316    }
1317 
1318    if(mtc & (a_MIMETYPE_C_LONGLINES | a_MIMETYPE_C_CTRLCHAR |
1319          a_MIMETYPE_C_NOTERMNL | a_MIMETYPE_C_FROM_)){
1320       if(menc != MIMEE_B64 && menc != MIMEE_QP){
1321          /* If the user chooses 8bit, and we do not privacy-sign the message,
1322           * then if encoding would be enforced only because of a ^From_, no */
1323          if((mtc & (a_MIMETYPE_C_LONGLINES | a_MIMETYPE_C_CTRLCHAR |
1324                   a_MIMETYPE_C_NOTERMNL | a_MIMETYPE_C_FROM_)
1325                ) != a_MIMETYPE_C_FROM_ || no_mboxo)
1326             menc = MIMEE_QP;
1327          else{
1328             ASSERT(menc != MIMEE_7B);
1329             menc = (mtc & a_MIMETYPE_C_HIGHBIT) ? MIMEE_8B : MIMEE_7B;
1330          }
1331       }
1332       *do_iconv = ((mtc & a_MIMETYPE_C_HIGHBIT) != 0);
1333    }else if(mtc & a_MIMETYPE_C_HIGHBIT){
1334       if(mtc & (a_MIMETYPE_C_NCTT | a_MIMETYPE_C_ISTXT))
1335          *do_iconv = TRU1;
1336    }else
1337 j7bit:
1338       menc = MIMEE_7B;
1339    if(mtc & a_MIMETYPE_C_NCTT)
1340       *content_type = "text/plain";
1341 
1342    /* Not an attachment with specified charset? */
1343 jcharset:
1344    if(*charset == NIL) /* TODO MIME/send: iter active? iter! else */
1345       *charset = (mtc & a_MIMETYPE_C_HIGHBIT) ? charset_iter_or_fallback()
1346             : ok_vlook(charset_7bit);
1347 jleave:
1348    /* TODO mime_type_file_classify() should not return conversion */
1349    if(rfc822){
1350       if(mtc & a_MIMETYPE_C_FROM_1STLINE){
1351          n_err(_("Pre-v15 %s cannot handle message/rfc822 that "
1352               "indeed is a RFC 4155 MBOX!\n"
1353             "  Forcing a content-type of application/mbox!\n"),
1354             n_uagent);
1355          *content_type = "application/mbox";
1356          goto jnorfc822;
1357       }
1358       c = (menc == MIMEE_7B ? CONV_7BIT
1359             : (menc == MIMEE_8B ? CONV_8BIT
1360             /* May have only 7-bit, 8-bit and binary.  Try to avoid latter */
1361             : ((mtc & a_MIMETYPE_C_HASNUL) ? CONV_NONE
1362             : ((mtc & a_MIMETYPE_C_HIGHBIT) ? CONV_8BIT : CONV_7BIT))));
1363    }else
1364 jnorfc822:
1365       c = (menc == MIMEE_7B ? CONV_7BIT
1366             : (menc == MIMEE_8B ? CONV_8BIT
1367             : (menc == MIMEE_QP ? CONV_TOQP : CONV_TOB64)));
1368    NYD_OU;
1369    return c;
1370 }
1371 
1372 enum mx_mimetype
mx_mimetype_classify_part(struct mimepart * mpp,boole for_user_context)1373 mx_mimetype_classify_part(struct mimepart *mpp, boole for_user_context){
1374    /* TODO n_mimetype_classify_part() <-> m_mime_classifier_ with life cycle */
1375    struct a_mimetype_lookup mtl;
1376    boole is_os;
1377    union {char const *cp; u32 f;} mce;
1378    char const *ct;
1379    enum mx_mimetype mc;
1380    NYD_IN;
1381 
1382    mc = mx_MIMETYPE_UNKNOWN;
1383    if((ct = mpp->m_ct_type_plain) == NIL) /* TODO may not */
1384       ct = su_empty;
1385 
1386    if((mce.cp = ok_vlook(mime_counter_evidence)) != NIL && *mce.cp != '\0'){
1387       if((su_idec_u32_cp(&mce.f, mce.cp, 0, NIL
1388                ) & (su_IDEC_STATE_EMASK | su_IDEC_STATE_CONSUMED)
1389             ) != su_IDEC_STATE_CONSUMED){
1390          n_err(_("Invalid *mime-counter-evidence* value content\n"));
1391          is_os = FAL0;
1392       }else{
1393          mce.f |= a_MIMETYPE_CE_SET;
1394          is_os = !su_cs_cmp_case(ct, "application/octet-stream");
1395 
1396          if(mpp->m_filename != NIL &&
1397                (is_os || (mce.f & a_MIMETYPE_CE_ALL_OVWR))){
1398             if(a_mimetype_by_filename(&mtl, mpp->m_filename, TRU1) == NIL){
1399                if(is_os)
1400                   goto jos_content_check;
1401             }else if(is_os || su_cs_cmp_case(ct, mtl.mtl_result)){
1402                if(mce.f & a_MIMETYPE_CE_ALL_OVWR)
1403                   mpp->m_ct_type_plain = ct = mtl.mtl_result;
1404                if(mce.f & (a_MIMETYPE_CE_BIN_OVWR | a_MIMETYPE_CE_ALL_OVWR))
1405                   mpp->m_ct_type_usr_ovwr = ct = mtl.mtl_result;
1406             }
1407          }
1408       }
1409    }else
1410       is_os = FAL0;
1411 
1412    if(*ct == '\0' || su_cs_find_c(ct, '/') == NIL) /* Compat with non-MIME */
1413       mc = mx_MIMETYPE_TEXT;
1414    else if(su_cs_starts_with_case(ct, "text/")){
1415       ct += sizeof("text/") -1;
1416       if(!su_cs_cmp_case(ct, "plain"))
1417          mc = mx_MIMETYPE_TEXT_PLAIN;
1418       else if(!su_cs_cmp_case(ct, "html"))
1419          mc = mx_MIMETYPE_TEXT_HTML;
1420       else
1421          mc = mx_MIMETYPE_TEXT;
1422    }else if(su_cs_starts_with_case(ct, "message/")){
1423       ct += sizeof("message/") -1;
1424       if(!su_cs_cmp_case(ct, "rfc822"))
1425          mc = mx_MIMETYPE_822;
1426       else
1427          mc = mx_MIMETYPE_MESSAGE;
1428    }else if(su_cs_starts_with_case(ct, "multipart/")){
1429       struct multi_types{
1430          char mt_name[12];
1431          enum mx_mimetype mt_mc;
1432       } const mta[] = {
1433          {"alternative\0", mx_MIMETYPE_ALTERNATIVE},
1434          {"related", mx_MIMETYPE_RELATED},
1435          {"digest", mx_MIMETYPE_DIGEST},
1436          {"signed", mx_MIMETYPE_SIGNED},
1437          {"encrypted", mx_MIMETYPE_ENCRYPTED}
1438       }, *mtap;
1439 
1440       for(ct += sizeof("multipart/") -1, mtap = mta;;)
1441          if(!su_cs_cmp_case(ct, mtap->mt_name)){
1442             mc = mtap->mt_mc;
1443             break;
1444          }else if(++mtap == &mta[NELEM(mta)]){
1445             mc = mx_MIMETYPE_MULTI;
1446             break;
1447          }
1448    }else if(su_cs_starts_with_case(ct, "application/")){
1449       if(is_os)
1450          goto jos_content_check;
1451       ct += sizeof("application/") -1;
1452       if(!su_cs_cmp_case(ct, "pkcs7-mime") ||
1453             !su_cs_cmp_case(ct, "x-pkcs7-mime"))
1454          mc = mx_MIMETYPE_PKCS7;
1455    }
1456 
1457 jleave:
1458    NYD_OU;
1459    return mc;
1460 
1461 jos_content_check:
1462    if((mce.f & a_MIMETYPE_CE_BIN_PARSE) && mpp->m_mime_enc != MIMEE_BIN &&
1463          mpp->m_charset != NIL)
1464       mc = a_mimetype_classify_o_s_part(mce.f, mpp, for_user_context);
1465    goto jleave;
1466 }
1467 
1468 enum mx_mimetype_handler_flags
mx_mimetype_handler(struct mx_mimetype_handler * mthp,struct mimepart const * mpp,enum sendaction action)1469 mx_mimetype_handler(struct mx_mimetype_handler *mthp,
1470       struct mimepart const *mpp, enum sendaction action){
1471 #define a__S    "pipe-"
1472 #define a__L    (sizeof(a__S) -1)
1473 
1474    struct a_mimetype_lookup mtl;
1475    char const *es, *cs, *ccp;
1476    uz el, cl, l;
1477    char *buf, *cp;
1478    BITENUM_IS(u32,mx_mimetype_hander_flags) rv, xrv;
1479    NYD_IN;
1480 
1481    su_mem_set(mthp, 0, sizeof *mthp);
1482    buf = NIL;
1483    xrv = rv = mx_MIMETYPE_HDL_NIL;
1484 
1485    if(action != SEND_QUOTE && action != SEND_QUOTE_ALL &&
1486          action != SEND_TODISP && action != SEND_TODISP_ALL &&
1487          action != SEND_TODISP_PARTS)
1488       goto jleave;
1489 
1490    el = ((es = mpp->m_filename) != NIL &&
1491          (es = su_cs_rfind_c(es, '.')) != NIL &&
1492          *++es != '\0') ? su_cs_len(es) : 0;
1493    cl = ((cs = mpp->m_ct_type_usr_ovwr) != NIL ||
1494          (cs = mpp->m_ct_type_plain) != NIL) ? su_cs_len(cs) : 0;
1495    if((l = MAX(el, cl)) == 0)
1496       /* TODO this should be done during parse time! */
1497       goto jleave;
1498 
1499    /* We do not pass the flags around, so ensure carrier is up-to-date */
1500    mthp->mth_flags = rv;
1501 
1502    buf = n_lofi_alloc(a__L + l +1);
1503    su_mem_copy(buf, a__S, a__L);
1504 
1505    /* I. *pipe-EXTENSION* handlers take precedence.
1506     * Yes, we really "fail" here for file extensions which clash MIME types */
1507    if(el > 0){
1508       su_mem_copy(buf + a__L, es, el +1);
1509       for(cp = &buf[a__L]; *cp != '\0'; ++cp)
1510          *cp = su_cs_to_lower(*cp);
1511 
1512       if((mthp->mth_shell_cmd = ccp = n_var_vlook(buf, FAL0)) != NIL){
1513          rv = a_mimetype_pipe_check(mthp, action);
1514          if((rv & mx_MIMETYPE_HDL_TYPE_MASK) != mx_MIMETYPE_HDL_NIL)
1515             goto jleave;
1516       }
1517    }
1518 
1519    /* Only MIME Content-Type: to follow, if any */
1520    if(cl == 0)
1521       goto jleave;
1522 
1523    su_mem_copy(cp = &buf[a__L], cs, cl +1);
1524    cs = cp; /* Ensure normalized variant is henceforth used */
1525    for(; *cp != '\0'; ++cp)
1526       *cp = su_cs_to_lower(*cp);
1527 
1528    /* II.: *pipe-TYPE/SUBTYPE* */
1529    if((mthp->mth_shell_cmd = n_var_vlook(buf, FAL0)) != NIL){
1530       rv = a_mimetype_pipe_check(mthp, action);
1531       if((rv & mx_MIMETYPE_HDL_TYPE_MASK) != mx_MIMETYPE_HDL_NIL)
1532          goto jleave;
1533    }
1534 
1535    /* III. RFC 1524 / Mailcap lookup */
1536 #ifdef mx_HAVE_MAILCAP
1537    switch(mx_mailcap_handler(mthp, cs, action, mpp)){
1538    case TRU1:
1539       rv = mthp->mth_flags;
1540       goto jleave;
1541    case TRUM1:
1542       xrv = mthp->mth_flags; /* "Use at last-resort" handler */
1543       break;
1544    default:
1545       break;
1546    }
1547 #endif
1548 
1549    /* IV. and final: `mimetype' type-marker extension induced handler */
1550    if(a_mimetype_by_name(&mtl, cs) != NIL){
1551       switch(mtl.mtl_node->mtn_flags & a_MIMETYPE__TM_MARKMASK){
1552 #ifndef mx_HAVE_FILTER_HTML_TAGSOUP
1553       case a_MIMETYPE_TM_SOUP_H:
1554          break;
1555 #endif
1556       case a_MIMETYPE_TM_SOUP_h:
1557 #ifdef mx_HAVE_FILTER_HTML_TAGSOUP
1558       case a_MIMETYPE_TM_SOUP_H:
1559          mthp->mth_ptf = &mx_flthtml_process_main;
1560          mthp->mth_msg.l = su_cs_len(mthp->mth_msg.s =
1561                UNCONST(char*,_("Built-in HTML tagsoup filter")));
1562          rv ^= mx_MIMETYPE_HDL_NIL | mx_MIMETYPE_HDL_PTF;
1563          goto jleave;
1564 #endif
1565          /* FALLTHRU */
1566       case a_MIMETYPE_TM_PLAIN:
1567          mthp->mth_msg.l = su_cs_len(mthp->mth_msg.s =
1568                UNCONST(char*,_("Plain text")));
1569          rv ^= mx_MIMETYPE_HDL_NIL | mx_MIMETYPE_HDL_TEXT;
1570          goto jleave;
1571       case a_MIMETYPE_TM_QUIET:
1572          mthp->mth_msg.l = 0;
1573          mthp->mth_msg.s = UNCONST(char*,su_empty);
1574          goto jleave;
1575       default:
1576          break;
1577       }
1578    }
1579 
1580    /* Last-resort, anyone? */
1581    if(xrv != mx_MIMETYPE_HDL_NIL)
1582       rv = xrv;
1583 
1584 jleave:
1585    if(buf != NIL)
1586       n_lofi_free(buf);
1587 
1588    xrv = rv;
1589    if((rv &= mx_MIMETYPE_HDL_TYPE_MASK) == mx_MIMETYPE_HDL_NIL){
1590       if(mthp->mth_msg.s == NIL)
1591          mthp->mth_msg.l = su_cs_len(mthp->mth_msg.s = UNCONST(char*,
1592                A_("[-- No MIME handler installed, or not applicable --]\n")));
1593    }else if(rv == mx_MIMETYPE_HDL_CMD &&
1594          !(xrv & mx_MIMETYPE_HDL_COPIOUSOUTPUT) &&
1595          action != SEND_TODISP_PARTS){
1596       mthp->mth_msg.l = su_cs_len(mthp->mth_msg.s = UNCONST(char*,
1597             _("[-- Use the command `mimeview' to display this --]\n")));
1598       xrv &= ~mx_MIMETYPE_HDL_TYPE_MASK;
1599       xrv |= (rv = mx_MIMETYPE_HDL_MSG);
1600    }
1601    mthp->mth_flags = xrv;
1602 
1603    NYD_OU;
1604    return S(enum mx_mimetype_handler_flags,rv);
1605 
1606 #undef a__L
1607 #undef a__S
1608 }
1609 
1610 #include "su/code-ou.h"
1611 /* s-it-mode */
1612