1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2  *@ MIME parameter handling.
3  *
4  * Copyright (c) 2016 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
5  * SPDX-License-Identifier: ISC
6  *
7  * Permission to use, copy, modify, and/or distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #undef su_FILE
20 #define su_FILE mime_param
21 #define mx_SOURCE
22 
23 #ifndef mx_HAVE_AMALGAMATION
24 # include "mx/nail.h"
25 #endif
26 
27 #include <su/cs.h>
28 #include <su/icodec.h>
29 #include <su/mem.h>
30 
31 #include "mx/random.h"
32 
33 #include "mx/iconv.h"
34 
35 /* TODO fake */
36 #include "su/code-in.h"
37 
38 struct rfc2231_joiner {
39    struct rfc2231_joiner *rj_next;
40    u32      rj_no;            /* Continuation number */
41    u32      rj_len;           /* of useful data in .rj_dat */
42    u32      rj_val_off;       /* Start of value data therein */
43    u32      rj_cs_len;        /* Length of charset part */
44    boole      rj_is_enc;        /* Is percent encoded */
45    u8       __pad[7];
46    char const  *rj_dat;
47 };
48 
49 struct mime_param_builder {
50    struct mime_param_builder *mpb_next;
51    struct str  *mpb_result;
52    u32      mpb_level;        /* of recursion (<-> continuation number) */
53    u32      mpb_name_len;     /* of the parameter .mpb_name */
54    u32      mpb_value_len;    /* of remaining value */
55    u32      mpb_charset_len;  /* of .mpb_charset (iff in outermost level) */
56    u32      mpb_buf_len;      /* Usable result of this level in .mpb_buf */
57    boole      mpb_is_enc;       /* Level requires encoding */
58    u8       __dummy[1];
59    boole      mpb_is_utf8;      /* Encoding is UTF-8 */
60    s8       mpb_rv;
61    char const  *mpb_name;
62    char const  *mpb_value;       /* Remains of, once the level was entered */
63    char const  *mpb_charset;     /* *ttycharset* */
64    char        *mpb_buf;         /* Pointer to on-stack buffer */
65 };
66 
67 /* All ASCII characters which cause RFC 2231 to be applied XXX check -1 slots*/
68 static boole const        _rfc2231_etab[] = {
69     1, 1, 1, 1,  1, 1, 1, 1,  1, 1,-1,-1,  1,-1, 1, 1,   /* NUL..SI */
70     1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,   /* DLE..US */
71     1, 0, 1, 0,  0, 1, 0, 1,  1, 1, 1, 0,  1, 0, 0, 1,   /* CAN.. / */
72     0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 1, 1,  1, 1, 1, 1,   /*   0.. ? */
73 
74     1, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,   /*   @.. O */
75     0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 1,  1, 1, 0, 0,   /*   P.. _ */
76     0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,   /*   `.. o */
77     0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 1,   /*   p..DEL */
78 };
79 
80 /* In a headerbody, at a "param=XY" that we're not interested in, skip over the
81  * entire construct, return pointer to the first byte thereafter or to NUL */
82 static char const * _mime_param_skip(char const *hbp);
83 
84 /* Trim value, which points to after the "name[RFC 2231 stuff]=".
85  * On successful return (1,-1; -1 is returned if the value was quoted via
86  * double quotation marks) a set end_or_null points to after the value and any
87  * possible separator and result->s is the autorec_alloc()d normalized value */
88 static s8      _mime_param_value_trim(struct str *result, char const *start,
89                      char const **end_or_null);
90 
91 /* mime_param_get() found the desired parameter but it seems to use RFC 2231
92  * extended syntax: perform full RFC 2231 parsing starting at this point.
93  * Note that _join() returns is-error */
94 static char *     _rfc2231_param_parse(char const *param, uz plen,
95                      char const *hbp);
96 static boole     __rfc2231_join(struct rfc2231_joiner *head, char **result,
97                      char const **emsg);
98 
99 /* Recursive parameter builder.  Note we have a magic limit of 999 levels.
100  * Prepares a portion of output in self->mpb_buf;
101  * once >mpb_value is worked completely the deepmost level joins the result
102  * into >mpb_result and unrolls the stack. */
103 static void       _mime_param_create(struct mime_param_builder *self);
104 static void       __mime_param_join(struct mime_param_builder *head);
105 
106 static char const *
_mime_param_skip(char const * hbp)107 _mime_param_skip(char const *hbp)
108 {
109    char co, cn;
110    NYD2_IN;
111 
112    /* Skip over parameter name - note we may have skipped over an entire
113     * parameter name and thus point to a "="; i haven't yet truly checked
114     * against MIME RFCs, just test for ";" in the meanwhile XXX */
115    while ((cn = *hbp) != '\0' && cn != '=' && cn != ';')
116       ++hbp;
117    if (cn == '\0')
118       goto jleave;
119    ++hbp;
120    if (cn == ';')
121       goto jleave;
122 
123    while (su_cs_is_white((cn = *hbp))) /* XXX */
124       ++hbp;
125    if (cn == '\0')
126       goto jleave;
127 
128    if (cn == '"') {
129       co = '\0';
130       while ((cn = *++hbp) != '\0' && (cn != '"' || co == '\\'))
131          co = (co == '\\') ? '\0' : cn;
132       if (cn != '\0' && (cn = *++hbp) == ';')
133          ++hbp;
134    } else {
135       for (;; cn = *++hbp)
136          if (cn == '\0' || cn == ';' || su_cs_is_white(cn))
137             break;
138       if (cn != '\0')
139          ++hbp;
140    }
141 jleave:
142    NYD2_OU;
143    return hbp;
144 }
145 
146 static s8
_mime_param_value_trim(struct str * result,char const * start,char const ** end_or_null)147 _mime_param_value_trim(struct str *result, char const *start,
148    char const **end_or_null)
149 {
150    char const *e;
151    char co, cn;
152    uz i;
153    s8 rv;
154    NYD2_IN;
155 
156    while (su_cs_is_white(*start)) /* XXX? */
157       ++start;
158 
159    if (*start == '"') {
160       for (co = '\0', e = ++start;; ++e)
161          if ((cn = *e) == '\0')
162             goto jerr;
163          else if (cn == '"' && co != '\\')
164             break;
165          else if (cn == '\\' && co == '\\')
166             co = '\0';
167          else
168             co = cn;
169       i = P2UZ(e++ - start);
170       rv = -TRU1;
171    } else {
172       for (e = start; (cn = *e) != '\0' && !su_cs_is_white(cn) && cn != ';';
173             ++e)
174          ;
175       i = P2UZ(e - start);
176       rv = TRU1;
177    }
178 
179    result->s = n_autorec_alloc(i +1);
180    if (rv > 0) {
181       su_mem_copy(result->s, start, result->l = i);
182       result->s[i] = '\0';
183    } else {
184       uz j;
185       char *cp;
186 
187       for (j = 0, cp = result->s, co = '\0'; i-- > 0; co = cn) {
188          cn = *start++;
189          if (cn != '\\' || co == '\\') {
190             cp[j++] = cn;
191             if (cn == '\\')
192                cn = '\0';
193          }
194       }
195       cp[j] = '\0';
196 
197       result->s = cp;
198       result->l = j;
199    }
200 
201    if (end_or_null != NULL) {
202       while (*e != '\0' && *e == ';')
203          ++e;
204       *end_or_null = e;
205    }
206 jleave:
207    NYD2_OU;
208    return rv;
209 jerr:
210    rv = FAL0;
211    goto jleave;
212 }
213 
214 static char *
_rfc2231_param_parse(char const * param,uz plen,char const * hbp)215 _rfc2231_param_parse(char const *param, uz plen, char const *hbp)
216 {
217    /* TODO Do it for real and unite with mime_param_get() */
218    struct str xval;
219    char nobuf[32], *eptr, *rv = NULL, c;
220    char const *hbp_base, *cp, *emsg = NULL;
221    struct rfc2231_joiner *head = NULL, *np;
222    boole errors = FAL0;
223    uz i;
224    NYD2_IN;
225 
226    /* We were called by mime_param_get() after a param name match that
227     * involved "*", so jump to the matching code */
228    hbp_base = hbp;
229    goto jumpin;
230 
231    for (; *hbp != '\0'; hbp_base = hbp) {
232       while (su_cs_is_white(*hbp))
233          ++hbp;
234 
235       if (!su_cs_cmp_case_n(hbp, param, plen)) {
236          hbp += plen;
237          while (su_cs_is_white(*hbp))
238             ++hbp;
239          if (*hbp++ != '*')
240                goto jerr;
241 
242          /* RFC 2231 extensions: "NAME[*DIGITS][*]=", where "*DIGITS" indicates
243           * parameter continuation and the lone asterisk "*" percent encoded
244           * values -- if encoding is used the "*0" or lone parameter value
245           * MUST be encoded and start with a "CHARSET'LANGUAGE'" construct,
246           * where both of CHARSET and LANGUAGE are optional (we do effectively
247           * generate error if CHARSET is missing though).
248           * Continuations may not use that "C'L'" construct, but be tolerant
249           * and ignore those.  Also encoded and non-encoded continuations may
250           * occur, i.e., perform percent en-/decoding only as necessary.
251           * Continuations may occur in any order */
252          /* xxx RFC 2231 parsing ignores language tags */
253 jumpin:
254          for (cp = hbp; su_cs_is_digit(*cp); ++cp)
255             ;
256          i = P2UZ(cp - hbp);
257          if (i != 0) {
258             if (i >= sizeof(nobuf)) {
259                emsg = N_("too many digits to form a valid number");
260                goto jerr;
261             } else if ((c = *cp) != '=' && c != '*') {
262                emsg = N_("expected = or * after leading digits");
263                goto jerr;
264             }
265             su_mem_copy(nobuf, hbp, i);
266             nobuf[i] = '\0';
267             if((su_idec_uz_cp(&i, nobuf, 10, NULL
268                      ) & (su_IDEC_STATE_EMASK | su_IDEC_STATE_CONSUMED)
269                   ) != su_IDEC_STATE_CONSUMED || i >= 999){
270                emsg = N_("invalid continuation sequence number");
271                goto jerr;
272             }
273             hbp = ++cp;
274 
275             /* Value encoded? */
276             if (c == '*') {
277                if (*hbp++ != '=')
278                   goto jeeqaaster;
279             } else if (c != '=') {
280 jeeqaaster:
281                emsg = N_("expected = after asterisk *");
282                goto jerr;
283             }
284          } else {
285             /* In continuation mode that is an error, however */
286             if (head != NULL) {
287                emsg = N_("missing continuation sequence number");
288                goto jerr;
289             }
290             /* Parameter value is encoded, may define encoding */
291             c = '*';
292             if (*cp != '=')
293                goto jeeqaaster;
294             hbp = ++cp;
295             i = 0;
296          }
297 
298          /* Create new node and insert it sorted; should be faster than
299           * creating an unsorted list and sorting it after parsing */
300          np = n_alloc(sizeof *np);
301          np->rj_next = NULL;
302          np->rj_no = (u32)i;
303          np->rj_is_enc = (c == '*');
304          np->rj_val_off = np->rj_cs_len = 0;
305 
306          if (head == NULL)
307             head = np;
308          else if (i < head->rj_no) {
309             np->rj_next = head;
310             head = np;
311          } else {
312             struct rfc2231_joiner *l = NULL, *x = head;
313 
314             while (x != NULL && i > x->rj_no)
315                l = x, x = x->rj_next;
316             if (x != NULL)
317                np->rj_next = x;
318             ASSERT(l != NULL);
319             l->rj_next = np;
320          }
321 
322          switch (_mime_param_value_trim(&xval, hbp, &cp)) {
323          default:
324             emsg = (c == '*') ? N_("invalid value encoding")/* XXX fake */
325                   : N_("faulty value - missing closing quotation mark \"?");
326             goto jerr;
327          case -1:
328             /* XXX if (np->is_enc && su_mem_find(np->dat, '\'', i) != NULL) {
329              * XXX    emsg = N_("character set info not allowed here");
330              * XXX    goto jerr;
331              * XXX } */np->rj_is_enc = FAL0; /* Silently ignore */
332             /* FALLTHRU */
333          case 1:
334             if (xval.l >= U32_MAX) {
335                emsg = N_("parameter value too long");
336                goto jerr;
337             }
338             np->rj_len = (u32)xval.l;
339             np->rj_dat = xval.s;
340             break;
341          }
342 
343          /* Watch out for character set and language info */
344          if (np->rj_is_enc &&
345                (eptr = su_mem_find(xval.s, '\'', xval.l)) != NULL) {
346             np->rj_cs_len = P2UZ(eptr - xval.s);
347             if ((eptr = su_mem_find(eptr + 1, '\'', xval.l - np->rj_cs_len - 1)
348                   ) == NULL) {
349                emsg = N_("faulty RFC 2231 parameter extension");
350                goto jerr;
351             }
352             np->rj_val_off = P2UZ(++eptr - xval.s);
353          }
354 
355          hbp = cp;
356       } else
357          hbp = _mime_param_skip(hbp);
358    }
359    ASSERT(head != NULL); /* (always true due to jumpin:, but..) */
360 
361    errors |= __rfc2231_join(head, &rv, &emsg);
362    if (errors /*&& (n_poption & n_PO_D_V)*/) {
363       /* TODO should set global flags so that at the end of an operation
364        * TODO (for a message) a summary can be printed: faulty MIME, xy */
365       if (emsg == NULL)
366          emsg = N_("multiple causes");
367       n_err(_("Message had MIME errors: %s\n"), V_(emsg));
368    }
369 jleave:
370    NYD2_OU;
371    return rv;
372 
373 jerr:
374    while ((np = head) != NULL) {
375       head = np->rj_next;
376       n_free(np);
377    }
378    /*if (n_poption & n_PO_D_V)*/ {
379       if (emsg == NULL)
380          emsg = N_("expected asterisk *");
381       n_err(_("Faulty RFC 2231 MIME parameter value: %s: %s\n"
382          "Near: %s\n"), param, V_(emsg), hbp_base);
383    }
384    rv = NULL;
385    goto jleave;
386 }
387 
388 static boole
__rfc2231_join(struct rfc2231_joiner * head,char ** result,char const ** emsg)389 __rfc2231_join(struct rfc2231_joiner *head, char **result, char const **emsg)
390 {
391    struct str sin, sou;
392    struct rfc2231_joiner *np;
393    char const *cp;
394    uz i;
395    enum {
396       _NONE       = 0,
397       _HAVE_ENC   = 1<<0,
398       _HAVE_ICONV = 1<<1,
399       _SEEN_ANY   = 1<<2,
400       _ERRORS     = 1<<3
401    } f = _NONE;
402    u32 no;
403 #ifdef mx_HAVE_ICONV
404    iconv_t fhicd;
405 #endif
406    NYD2_IN;
407 
408 #ifdef mx_HAVE_ICONV
409    UNINIT(fhicd, (iconv_t)-1);
410 
411    if (head->rj_is_enc) {
412       char const *tcs;
413 
414       f |= _HAVE_ENC;
415       if (head->rj_cs_len == 0) {
416          /* It is an error if the character set is not set, the language alone
417           * cannot convert characters, let aside that we don't use it at all */
418          *emsg = N_("MIME RFC 2231 invalidity: missing character set\n");
419          f |= _ERRORS;
420       } else if (su_cs_cmp_case_n(tcs = ok_vlook(ttycharset),
421             head->rj_dat, head->rj_cs_len)) {
422          char *cs = n_lofi_alloc(head->rj_cs_len +1);
423 
424          su_mem_copy(cs, head->rj_dat, head->rj_cs_len);
425          cs[head->rj_cs_len] = '\0';
426          if ((fhicd = n_iconv_open(tcs, cs)) != (iconv_t)-1)
427             f |= _HAVE_ICONV;
428          else {
429             *emsg = N_("necessary character set conversion missing");
430             f |= _ERRORS;
431          }
432          n_lofi_free(cs);
433       }
434    }
435 #endif
436 
437    if (head->rj_no != 0) {
438       if (!(f & _ERRORS))
439          *emsg = N_("First RFC 2231 parameter value chunk number is not 0");
440       f |= _ERRORS;
441    }
442 
443    for (sou.s = NULL, sou.l = 0, no = 0; (np = head) != NULL; n_free(np)) {
444       head = np->rj_next;
445 
446       if (np->rj_no != no++) {
447          if (!(f & _ERRORS))
448             *emsg = N_("RFC 2231 parameter value chunks are not contiguous");
449          f |= _ERRORS;
450       }
451 
452       /* RFC 2231 allows such info only in the first continuation, and
453        * furthermore MUSTs the first to be encoded, then */
454       if (/*np->rj_is_enc &&*/ np->rj_val_off > 0 &&
455             (f & (_HAVE_ENC | _SEEN_ANY)) != _HAVE_ENC) {
456          if (!(f & _ERRORS))
457             *emsg = N_("invalid redundant RFC 2231 charset/language ignored");
458          f |= _ERRORS;
459       }
460       f |= _SEEN_ANY;
461 
462       i = np->rj_len - np->rj_val_off;
463       if (!np->rj_is_enc)
464          n_str_add_buf(&sou, np->rj_dat + np->rj_val_off, i);
465       else {
466          /* Perform percent decoding */
467          sin.s = n_alloc(i +1);
468          sin.l = 0;
469 
470          for (cp = np->rj_dat + np->rj_val_off; i > 0;) {
471             char c;
472 
473             if ((c = *cp++) == '%') {
474                s32 cc;
475 
476                if (i < 3 || (cc = n_c_from_hex_base16(cp)) < 0) {
477                   if (!(f & _ERRORS))
478                      *emsg = N_("invalid RFC 2231 percent encoded sequence");
479                   f |= _ERRORS;
480                   goto jhex_putc;
481                }
482                sin.s[sin.l++] = (char)cc;
483                cp += 2;
484                i -= 3;
485             } else {
486 jhex_putc:
487                sin.s[sin.l++] = c;
488                --i;
489             }
490          }
491          sin.s[sin.l] = '\0';
492 
493          n_str_add_buf(&sou, sin.s, sin.l);
494          n_free(sin.s);
495       }
496    }
497 
498    /* And add character set conversion on top as necessary.
499     * RFC 2231 is pragmatic: encode only mentions percent encoding and the
500     * character set for the entire string ("[no] facility for using more
501     * than one character set or language"), therefore "continuations may
502     * contain a mixture of encoded and unencoded segments" applies to
503     * a contiguous string of a single character set that has been torn in
504     * pieces due to space restrictions, and it happened that some pieces
505     * didn't need to be percent encoded.
506     *
507     * _In particular_ it therefore doesn't repeat the RFC 2047 paradigm
508     * that encoded-words-are-atomic, meaning that a single character-set
509     * conversion run over the final, joined, partially percent-decoded value
510     * should be sufficient */
511 #ifdef mx_HAVE_ICONV
512    if (f & _HAVE_ICONV) {
513       sin.s = NULL;
514       sin.l = 0;
515       if (n_iconv_str(fhicd, n_ICONV_UNIDEFAULT, &sin, &sou, NULL) != 0) {
516          if (!(f & _ERRORS)) /* XXX won't be reported with _UNIDFEFAULT */
517             *emsg = N_("character set conversion failed on value");
518          f |= _ERRORS;
519       }
520       n_free(sou.s);
521       sou = sin;
522 
523       n_iconv_close(fhicd);
524    }
525 #endif
526 
527    su_mem_copy(*result = n_autorec_alloc(sou.l +1), sou.s, sou.l +1);
528    n_free(sou.s);
529    NYD2_OU;
530    return ((f & _ERRORS) != 0);
531 }
532 
533 static void
_mime_param_create(struct mime_param_builder * self)534 _mime_param_create(struct mime_param_builder *self)
535 {
536    struct mime_param_builder next;
537    /* Don't use MIME_LINELEN_(MAX|LIMIT) stack buffer sizes: normally we won't
538     * exceed plain MIME_LINELEN, so that this would be a factor 10 wastage.
539     * On the other hand we may excess _LINELEN to avoid breaking up possible
540     * multibyte sequences until sizeof(buf) is reached, but since we (a) don't
541     * support stateful encodings and (b) will try to synchronize on UTF-8 this
542     * problem is scarce, possibly even artificial */
543    char buf[MIN(MIME_LINELEN_MAX >> 1, MIME_LINELEN * 2)],
544       *bp, *bp_max, *bp_xmax, *bp_lanoenc;
545    char const *vb, *vb_lanoenc;
546    uz vl;
547    enum {
548       _NONE    = 0,
549       _ISENC   = 1<<0,
550       _HADRAW  = 1<<1,
551       _RAW     = 1<<2
552    } f = _NONE;
553    NYD2_IN;
554    LCTA(sizeof(buf) >= MIME_LINELEN * 2, "Buffer to small for operation");
555 
556 jneed_enc:
557    self->mpb_buf = bp = bp_lanoenc = buf;
558    self->mpb_buf_len = 0;
559    self->mpb_is_enc = ((f & _ISENC) != 0);
560    vb_lanoenc = vb = self->mpb_value;
561    vl = self->mpb_value_len;
562 
563    /* Configure bp_max to fit in SHOULD, bp_xmax to extent */
564    bp_max = (buf + MIME_LINELEN) -
565          (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
566    bp_xmax = (buf + sizeof(buf)) -
567          (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
568    if ((f & _ISENC) && self->mpb_level == 0) {
569       bp_max -= self->mpb_charset_len;
570       bp_xmax -= self->mpb_charset_len;
571    }
572    if (PCMP(bp_max, <=, buf + sizeof("Hunky Dory"))) {
573       su_DBG( n_alert("_mime_param_create(): Hunky Dory!"); )
574       bp_max = buf + (MIME_LINELEN >> 1); /* And then it is SHOULD, anyway */
575    }
576    ASSERT(PCMP(bp_max + (4 * 3), <=, bp_xmax)); /* UTF-8 extra pad, below */
577 
578    f &= _ISENC;
579    while (vl > 0) {
580       union {char c; u8 uc;} u; u.c = *vb;
581 
582       f |= _RAW;
583       if (!(f & _ISENC)) {
584          if (u.uc > 0x7F || su_cs_is_cntrl(u.c)) { /* XXX reject _is_cntrl? */
585             /* We need to percent encode this character, possibly changing
586              * overall strategy, but anyway the one of this level, possibly
587              * rendering invalid any output byte we yet have produced here.
588              * Instead of throwing away that work just recurse if some fancy
589              * magic condition is true */
590              /* *However*, many tested MUAs fail to deal with parameters that
591               * are split across "too many" fields, including ones that
592               * misread RFC 2231 to allow only one digit, i.e., a maximum of
593               * ten.  This is plain wrong, but that won't help their users */
594             if (P2UZ(bp - buf) > /*10 (strawberry) COMPAT*/MIME_LINELEN>>1)
595                goto jrecurse;
596             f |= _ISENC;
597             goto jneed_enc;
598          }
599 
600          if (u.uc == '"' || u.uc == '\\') {
601             f ^= _RAW;
602             bp[0] = '\\';
603             bp[1] = u.c;
604             bp += 2;
605          }
606       } else if (u.uc > 0x7F || _rfc2231_etab[u.uc]) {
607          f ^= _RAW;
608          bp[0] = '%';
609          n_c_to_hex_base16(bp + 1, u.c);
610          bp += 3;
611       }
612 
613       ++vb;
614       --vl;
615       if (f & _RAW) {
616          f |= _HADRAW;
617          vb_lanoenc = vb;
618          *bp++ = u.c;
619          bp_lanoenc = bp;
620       }
621 
622       /* If all available space has been consumed we must split.
623        * Due to compatibility reasons we must take care not to break up
624        * multibyte sequences -- even though RFC 2231 rather implies that the
625        * split value should be joined (after percent encoded fields have
626        * been percent decoded) and the resulting string be treated in the
627        * specified character set / language, MUAs have been seen which apply
628        * the RFC 2047 encoded-words-are-atomic even to RFC 2231 values, even
629        * if stateful encodings cannot truly be supported like that?!..
630        *
631        * So split at 7-bit character if we have seen any and the wastage isn't
632        * too large; recall that we need to keep the overall number of P=V
633        * values as low as possible due to compatibility reasons.
634        * If we haven't seen any plain bytes be laxe and realize that bp_max
635        * reflects SHOULD lines, and try to extend this as long as possible.
636        * However, with UTF-8, try to backward synchronize on sequence start */
637       if (bp <= bp_max)
638          continue;
639 
640       if ((f & _HADRAW) && (PCMP(bp - bp_lanoenc, <=, bp_lanoenc - buf) ||
641             (!self->mpb_is_utf8 &&
642              P2UZ(bp_lanoenc - buf) >= (MIME_LINELEN >> 2)))) {
643          bp = bp_lanoenc;
644          vl += P2UZ(vb - vb_lanoenc);
645          vb = vb_lanoenc;
646          goto jrecurse;
647       }
648 
649       if (self->mpb_is_utf8 && ((u8)(vb[-1]) & 0xC0) != 0x80) {
650          bp -= 3;
651          --vb;
652          ++vl;
653          goto jrecurse;
654       }
655 
656       if (bp <= bp_xmax)
657          continue;
658       /* (Shit.) */
659       goto jrecurse;
660    }
661 
662    /* That level made the great and completed encoding.  Build result */
663    self->mpb_is_enc = ((f & _ISENC) != 0);
664    self->mpb_buf_len = P2UZ(bp - buf);
665    __mime_param_join(self);
666 jleave:
667    NYD2_OU;
668    return;
669 
670    /* Need to recurse, take care not to excess magical limit of 999 levels */
671 jrecurse:
672    if (self->mpb_level == 999) {
673       /*if (n_poption & n_PO_D_V)*/
674          n_err(_("Message RFC 2231 parameters nested too deeply!\n"));
675       goto jleave;
676    }
677 
678    self->mpb_is_enc = ((f & _ISENC) != 0);
679    self->mpb_buf_len = P2UZ(bp - buf);
680 
681    su_mem_set(&next, 0, sizeof next);
682    next.mpb_next = self;
683    next.mpb_level = self->mpb_level + 1;
684    next.mpb_name_len = self->mpb_name_len;
685    next.mpb_value_len = vl;
686    next.mpb_is_utf8 = self->mpb_is_utf8;
687    next.mpb_name = self->mpb_name;
688    next.mpb_value = vb;
689    _mime_param_create(&next);
690    goto jleave;
691 }
692 
693 static void
__mime_param_join(struct mime_param_builder * head)694 __mime_param_join(struct mime_param_builder *head)
695 {
696    char nobuf[16];
697    struct mime_param_builder *np;
698    uz i, ll;  ASSERT_INJ( uz len_max; )
699    struct str *result;
700    char *cp;
701    enum {
702       _NONE    = 0,
703       _ISENC   = 1<<0,
704       _ISQUOTE = 1<<1,
705       _ISCONT  = 1<<2
706    } f = _NONE;
707    NYD2_IN;
708 
709    /* Traverse the stack upwards to find out result length (worst case).
710     * Reverse the list while doing so */
711    for (i = 0, np = head, head = NULL; np != NULL;) {
712       struct mime_param_builder *tmp;
713 
714       i += np->mpb_buf_len + np->mpb_name_len + sizeof(" *999*=\"\";\n") -1;
715       if (np->mpb_is_enc)
716          f |= _ISENC;
717 
718       tmp = np->mpb_next;
719       np->mpb_next = head;
720       head = np;
721       np = tmp;
722    }
723    if (f & _ISENC)
724       i += head->mpb_charset_len; /* sizeof("''") -1 covered by \"\" above */
725    ASSERT_INJ( len_max = i; )
726    head->mpb_rv = TRU1;
727 
728    result = head->mpb_result;
729    if (head->mpb_next != NULL)
730       f |= _ISCONT;
731    cp = result->s = n_autorec_alloc(i +1);
732 
733    for (ll = 0, np = head;;) {
734       /* Name part */
735       su_mem_copy(cp, np->mpb_name, i = np->mpb_name_len);
736       cp += i;
737       ll += i;
738 
739       if (f & _ISCONT) {
740          char *cpo = cp, *nop = nobuf + sizeof(nobuf);
741          u32 noi = np->mpb_level;
742 
743          *--nop = '\0';
744          do
745             *--nop = "0123456789"[noi % 10];
746          while ((noi /= 10) != 0);
747 
748          *cp++ = '*';
749          while (*nop != '\0')
750             *cp++ = *nop++;
751 
752          ll += P2UZ(cp - cpo);
753       }
754 
755       if ((f & _ISENC) || np->mpb_is_enc) {
756          *cp++ = '*';
757          ++ll;
758       }
759       *cp++ = '=';
760       ++ll;
761 
762       /* Value part */
763       if (f & _ISENC) {
764          f &= ~_ISENC;
765          su_mem_copy(cp, np->mpb_charset, i = np->mpb_charset_len);
766          cp += i;
767          cp[0] = '\'';
768          cp[1] = '\'';
769          cp += 2;
770          ll += i + 2;
771       } else if (!np->mpb_is_enc) {
772          f |= _ISQUOTE;
773          *cp++ = '"';
774          ++ll;
775       }
776 
777       su_mem_copy(cp, np->mpb_buf, i = np->mpb_buf_len);
778       cp += i;
779       ll += i;
780 
781       if (f & _ISQUOTE) {
782          f ^= _ISQUOTE;
783          *cp++ = '"';
784          ++ll;
785       }
786 
787       if ((np = np->mpb_next) == NULL)
788          break;
789       *cp++ = ';';
790       ++ll;
791 
792       i = ll;
793       i += np->mpb_name_len + np->mpb_buf_len + sizeof(" *999*=\"\";\n") -1;
794       if (i >= MIME_LINELEN) {
795          head->mpb_rv = -TRU1;
796          *cp++ = '\n';
797          ll = 0;
798       }
799 
800       *cp++ = ' ';
801       ++ll;
802    }
803    *cp = '\0';
804    result->l = P2UZ(cp - result->s);
805    ASSERT(result->l < len_max);
806    NYD2_OU;
807 }
808 
809 FL char *
mime_param_get(char const * param,char const * headerbody)810 mime_param_get(char const *param, char const *headerbody) /* TODO rewr. */
811 {
812    struct str xval;
813    char *rv = NULL;
814    uz plen;
815    char const *p;
816    NYD_IN;
817 
818    plen = su_cs_len(param);
819    p = headerbody;
820 
821    /* At the beginning of headerbody there is no parameter=value pair xxx */
822    if (!su_cs_is_white(*p))
823       goto jskip1st;
824 
825    for (;;) {
826       while (su_cs_is_white(*p))
827          ++p;
828 
829       if (!su_cs_cmp_case_n(p, param, plen)) {
830          p += plen;
831          while (su_cs_is_white(*p)) /* XXX? */
832             ++p;
833          switch (*p++) {
834          case '*':
835             rv = _rfc2231_param_parse(param, plen, p);
836             goto jleave;
837          case '=':
838             if (!_mime_param_value_trim(&xval, p, NULL)) {
839                /* XXX LOG? */
840                goto jleave;
841             }
842             rv = xval.s;
843 
844             /* We do have a result, but some (elder) software (S-nail <v14.8)
845              * will use RFC 2047 encoded words in  parameter values, too */
846             /* TODO Automatically check whether the value seems to be RFC 2047
847              * TODO encwd. -- instead use *rfc2047_parameters* like mutt(1)? */
848             if ((p = su_cs_find(rv, "=?")) != NULL &&
849                   su_cs_find(p, "?=") != NULL) {
850                struct str ti, to;
851 
852                ti.l = su_cs_len(ti.s = rv);
853                mime_fromhdr(&ti, &to, TD_ISPR | TD_ICONV | TD_DELCTRL);
854                rv = savestrbuf(to.s, to.l);
855                n_free(to.s);
856             }
857             goto jleave;
858          default:
859             /* Not our desired parameter, skip and continue */
860             break;
861          }
862       }
863 
864 jskip1st:
865       if (*(p = _mime_param_skip(p)) == '\0')
866          goto jleave;
867    }
868 
869 jleave:
870    NYD_OU;
871    return rv;
872 }
873 
874 FL s8
mime_param_create(struct str * result,char const * name,char const * value)875 mime_param_create(struct str *result, char const *name, char const *value)
876 {
877    /* TODO All this needs rework when we have (1) a real string and even more
878     * TODO (2) use objects instead of stupid string concat; it's temporary
879     * TODO I.e., this function should return a HeaderBodyParam */
880    struct mime_param_builder top;
881    uz i;
882    NYD_IN;
883 
884    su_mem_set(result, 0, sizeof *result);
885 
886    su_mem_set(&top, 0, sizeof top);
887    top.mpb_result = result;
888    if ((i = su_cs_len(top.mpb_name = name)) >= U32_MAX)
889       goto jleave;
890    top.mpb_name_len = (u32)i;
891    if ((i = su_cs_len(top.mpb_value = value)) >= U32_MAX)
892       goto jleave;
893    top.mpb_value_len = (u32)i;
894    if ((i = su_cs_len(name = ok_vlook(ttycharset))) >= U32_MAX)
895       goto jleave;
896    top.mpb_charset_len = (u32)i;
897    top.mpb_charset = n_autorec_alloc(++i);
898    su_mem_copy(n_UNCONST(top.mpb_charset), name, i);
899    if(top.mpb_charset_len >= 4 && !su_mem_cmp(top.mpb_charset, "utf", 3) &&
900          ((top.mpb_charset[3] == '-' && top.mpb_charset[4] == '8' &&
901           top.mpb_charset_len == 5) || (top.mpb_charset[3] == '8' &&
902           top.mpb_charset_len == 4)))
903       top.mpb_is_utf8 = TRU1;
904    else
905       top.mpb_is_utf8 = FAL0;
906 
907    _mime_param_create(&top);
908 jleave:
909    NYD_OU;
910    return top.mpb_rv;
911 }
912 
913 FL char *
mime_param_boundary_get(char const * headerbody,uz * len)914 mime_param_boundary_get(char const *headerbody, uz *len)
915 {
916    char *q = NULL, *p;
917    NYD_IN;
918 
919    if ((p = mime_param_get("boundary", headerbody)) != NULL) {
920       uz i = su_cs_len(p);
921 
922       if (len != NULL)
923          *len = i + 2;
924       q = n_autorec_alloc(i + 2 +1);
925       q[0] = q[1] = '-';
926       su_mem_copy(q + 2, p, i);
927       *(q + i + 2) = '\0';
928    }
929    NYD_OU;
930    return q;
931 }
932 
933 FL char *
mime_param_boundary_create(void)934 mime_param_boundary_create(void)
935 {
936    static u32 reprocnt;
937    char *bp;
938    NYD_IN;
939 
940    bp = n_autorec_alloc(36 + 6 +1);
941    bp[0] = bp[2] = bp[39] = bp[41] = '=';
942    bp[1] = bp[40] = '-';
943    su_mem_copy(bp + 3, mx_random_create_cp(36, &reprocnt), 36);
944    bp[42] = '\0';
945    NYD_OU;
946    return bp;
947 }
948 
949 #include "su/code-ou.h"
950 /* s-it-mode */
951