1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ MIME parameter handling.
3 *
4 * Copyright (c) 2016 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
5 * SPDX-License-Identifier: ISC
6 *
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #undef su_FILE
20 #define su_FILE mime_param
21 #define mx_SOURCE
22
23 #ifndef mx_HAVE_AMALGAMATION
24 # include "mx/nail.h"
25 #endif
26
27 #include <su/cs.h>
28 #include <su/icodec.h>
29 #include <su/mem.h>
30
31 #include "mx/random.h"
32
33 #include "mx/iconv.h"
34
35 /* TODO fake */
36 #include "su/code-in.h"
37
38 struct rfc2231_joiner {
39 struct rfc2231_joiner *rj_next;
40 u32 rj_no; /* Continuation number */
41 u32 rj_len; /* of useful data in .rj_dat */
42 u32 rj_val_off; /* Start of value data therein */
43 u32 rj_cs_len; /* Length of charset part */
44 boole rj_is_enc; /* Is percent encoded */
45 u8 __pad[7];
46 char const *rj_dat;
47 };
48
49 struct mime_param_builder {
50 struct mime_param_builder *mpb_next;
51 struct str *mpb_result;
52 u32 mpb_level; /* of recursion (<-> continuation number) */
53 u32 mpb_name_len; /* of the parameter .mpb_name */
54 u32 mpb_value_len; /* of remaining value */
55 u32 mpb_charset_len; /* of .mpb_charset (iff in outermost level) */
56 u32 mpb_buf_len; /* Usable result of this level in .mpb_buf */
57 boole mpb_is_enc; /* Level requires encoding */
58 u8 __dummy[1];
59 boole mpb_is_utf8; /* Encoding is UTF-8 */
60 s8 mpb_rv;
61 char const *mpb_name;
62 char const *mpb_value; /* Remains of, once the level was entered */
63 char const *mpb_charset; /* *ttycharset* */
64 char *mpb_buf; /* Pointer to on-stack buffer */
65 };
66
67 /* All ASCII characters which cause RFC 2231 to be applied XXX check -1 slots*/
68 static boole const _rfc2231_etab[] = {
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1, /* NUL..SI */
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* DLE..US */
71 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, /* CAN.. / */
72 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 0.. ? */
73
74 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @.. O */
75 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, /* P.. _ */
76 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* `.. o */
77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* p..DEL */
78 };
79
80 /* In a headerbody, at a "param=XY" that we're not interested in, skip over the
81 * entire construct, return pointer to the first byte thereafter or to NUL */
82 static char const * _mime_param_skip(char const *hbp);
83
84 /* Trim value, which points to after the "name[RFC 2231 stuff]=".
85 * On successful return (1,-1; -1 is returned if the value was quoted via
86 * double quotation marks) a set end_or_null points to after the value and any
87 * possible separator and result->s is the autorec_alloc()d normalized value */
88 static s8 _mime_param_value_trim(struct str *result, char const *start,
89 char const **end_or_null);
90
91 /* mime_param_get() found the desired parameter but it seems to use RFC 2231
92 * extended syntax: perform full RFC 2231 parsing starting at this point.
93 * Note that _join() returns is-error */
94 static char * _rfc2231_param_parse(char const *param, uz plen,
95 char const *hbp);
96 static boole __rfc2231_join(struct rfc2231_joiner *head, char **result,
97 char const **emsg);
98
99 /* Recursive parameter builder. Note we have a magic limit of 999 levels.
100 * Prepares a portion of output in self->mpb_buf;
101 * once >mpb_value is worked completely the deepmost level joins the result
102 * into >mpb_result and unrolls the stack. */
103 static void _mime_param_create(struct mime_param_builder *self);
104 static void __mime_param_join(struct mime_param_builder *head);
105
106 static char const *
_mime_param_skip(char const * hbp)107 _mime_param_skip(char const *hbp)
108 {
109 char co, cn;
110 NYD2_IN;
111
112 /* Skip over parameter name - note we may have skipped over an entire
113 * parameter name and thus point to a "="; i haven't yet truly checked
114 * against MIME RFCs, just test for ";" in the meanwhile XXX */
115 while ((cn = *hbp) != '\0' && cn != '=' && cn != ';')
116 ++hbp;
117 if (cn == '\0')
118 goto jleave;
119 ++hbp;
120 if (cn == ';')
121 goto jleave;
122
123 while (su_cs_is_white((cn = *hbp))) /* XXX */
124 ++hbp;
125 if (cn == '\0')
126 goto jleave;
127
128 if (cn == '"') {
129 co = '\0';
130 while ((cn = *++hbp) != '\0' && (cn != '"' || co == '\\'))
131 co = (co == '\\') ? '\0' : cn;
132 if (cn != '\0' && (cn = *++hbp) == ';')
133 ++hbp;
134 } else {
135 for (;; cn = *++hbp)
136 if (cn == '\0' || cn == ';' || su_cs_is_white(cn))
137 break;
138 if (cn != '\0')
139 ++hbp;
140 }
141 jleave:
142 NYD2_OU;
143 return hbp;
144 }
145
146 static s8
_mime_param_value_trim(struct str * result,char const * start,char const ** end_or_null)147 _mime_param_value_trim(struct str *result, char const *start,
148 char const **end_or_null)
149 {
150 char const *e;
151 char co, cn;
152 uz i;
153 s8 rv;
154 NYD2_IN;
155
156 while (su_cs_is_white(*start)) /* XXX? */
157 ++start;
158
159 if (*start == '"') {
160 for (co = '\0', e = ++start;; ++e)
161 if ((cn = *e) == '\0')
162 goto jerr;
163 else if (cn == '"' && co != '\\')
164 break;
165 else if (cn == '\\' && co == '\\')
166 co = '\0';
167 else
168 co = cn;
169 i = P2UZ(e++ - start);
170 rv = -TRU1;
171 } else {
172 for (e = start; (cn = *e) != '\0' && !su_cs_is_white(cn) && cn != ';';
173 ++e)
174 ;
175 i = P2UZ(e - start);
176 rv = TRU1;
177 }
178
179 result->s = n_autorec_alloc(i +1);
180 if (rv > 0) {
181 su_mem_copy(result->s, start, result->l = i);
182 result->s[i] = '\0';
183 } else {
184 uz j;
185 char *cp;
186
187 for (j = 0, cp = result->s, co = '\0'; i-- > 0; co = cn) {
188 cn = *start++;
189 if (cn != '\\' || co == '\\') {
190 cp[j++] = cn;
191 if (cn == '\\')
192 cn = '\0';
193 }
194 }
195 cp[j] = '\0';
196
197 result->s = cp;
198 result->l = j;
199 }
200
201 if (end_or_null != NULL) {
202 while (*e != '\0' && *e == ';')
203 ++e;
204 *end_or_null = e;
205 }
206 jleave:
207 NYD2_OU;
208 return rv;
209 jerr:
210 rv = FAL0;
211 goto jleave;
212 }
213
214 static char *
_rfc2231_param_parse(char const * param,uz plen,char const * hbp)215 _rfc2231_param_parse(char const *param, uz plen, char const *hbp)
216 {
217 /* TODO Do it for real and unite with mime_param_get() */
218 struct str xval;
219 char nobuf[32], *eptr, *rv = NULL, c;
220 char const *hbp_base, *cp, *emsg = NULL;
221 struct rfc2231_joiner *head = NULL, *np;
222 boole errors = FAL0;
223 uz i;
224 NYD2_IN;
225
226 /* We were called by mime_param_get() after a param name match that
227 * involved "*", so jump to the matching code */
228 hbp_base = hbp;
229 goto jumpin;
230
231 for (; *hbp != '\0'; hbp_base = hbp) {
232 while (su_cs_is_white(*hbp))
233 ++hbp;
234
235 if (!su_cs_cmp_case_n(hbp, param, plen)) {
236 hbp += plen;
237 while (su_cs_is_white(*hbp))
238 ++hbp;
239 if (*hbp++ != '*')
240 goto jerr;
241
242 /* RFC 2231 extensions: "NAME[*DIGITS][*]=", where "*DIGITS" indicates
243 * parameter continuation and the lone asterisk "*" percent encoded
244 * values -- if encoding is used the "*0" or lone parameter value
245 * MUST be encoded and start with a "CHARSET'LANGUAGE'" construct,
246 * where both of CHARSET and LANGUAGE are optional (we do effectively
247 * generate error if CHARSET is missing though).
248 * Continuations may not use that "C'L'" construct, but be tolerant
249 * and ignore those. Also encoded and non-encoded continuations may
250 * occur, i.e., perform percent en-/decoding only as necessary.
251 * Continuations may occur in any order */
252 /* xxx RFC 2231 parsing ignores language tags */
253 jumpin:
254 for (cp = hbp; su_cs_is_digit(*cp); ++cp)
255 ;
256 i = P2UZ(cp - hbp);
257 if (i != 0) {
258 if (i >= sizeof(nobuf)) {
259 emsg = N_("too many digits to form a valid number");
260 goto jerr;
261 } else if ((c = *cp) != '=' && c != '*') {
262 emsg = N_("expected = or * after leading digits");
263 goto jerr;
264 }
265 su_mem_copy(nobuf, hbp, i);
266 nobuf[i] = '\0';
267 if((su_idec_uz_cp(&i, nobuf, 10, NULL
268 ) & (su_IDEC_STATE_EMASK | su_IDEC_STATE_CONSUMED)
269 ) != su_IDEC_STATE_CONSUMED || i >= 999){
270 emsg = N_("invalid continuation sequence number");
271 goto jerr;
272 }
273 hbp = ++cp;
274
275 /* Value encoded? */
276 if (c == '*') {
277 if (*hbp++ != '=')
278 goto jeeqaaster;
279 } else if (c != '=') {
280 jeeqaaster:
281 emsg = N_("expected = after asterisk *");
282 goto jerr;
283 }
284 } else {
285 /* In continuation mode that is an error, however */
286 if (head != NULL) {
287 emsg = N_("missing continuation sequence number");
288 goto jerr;
289 }
290 /* Parameter value is encoded, may define encoding */
291 c = '*';
292 if (*cp != '=')
293 goto jeeqaaster;
294 hbp = ++cp;
295 i = 0;
296 }
297
298 /* Create new node and insert it sorted; should be faster than
299 * creating an unsorted list and sorting it after parsing */
300 np = n_alloc(sizeof *np);
301 np->rj_next = NULL;
302 np->rj_no = (u32)i;
303 np->rj_is_enc = (c == '*');
304 np->rj_val_off = np->rj_cs_len = 0;
305
306 if (head == NULL)
307 head = np;
308 else if (i < head->rj_no) {
309 np->rj_next = head;
310 head = np;
311 } else {
312 struct rfc2231_joiner *l = NULL, *x = head;
313
314 while (x != NULL && i > x->rj_no)
315 l = x, x = x->rj_next;
316 if (x != NULL)
317 np->rj_next = x;
318 ASSERT(l != NULL);
319 l->rj_next = np;
320 }
321
322 switch (_mime_param_value_trim(&xval, hbp, &cp)) {
323 default:
324 emsg = (c == '*') ? N_("invalid value encoding")/* XXX fake */
325 : N_("faulty value - missing closing quotation mark \"?");
326 goto jerr;
327 case -1:
328 /* XXX if (np->is_enc && su_mem_find(np->dat, '\'', i) != NULL) {
329 * XXX emsg = N_("character set info not allowed here");
330 * XXX goto jerr;
331 * XXX } */np->rj_is_enc = FAL0; /* Silently ignore */
332 /* FALLTHRU */
333 case 1:
334 if (xval.l >= U32_MAX) {
335 emsg = N_("parameter value too long");
336 goto jerr;
337 }
338 np->rj_len = (u32)xval.l;
339 np->rj_dat = xval.s;
340 break;
341 }
342
343 /* Watch out for character set and language info */
344 if (np->rj_is_enc &&
345 (eptr = su_mem_find(xval.s, '\'', xval.l)) != NULL) {
346 np->rj_cs_len = P2UZ(eptr - xval.s);
347 if ((eptr = su_mem_find(eptr + 1, '\'', xval.l - np->rj_cs_len - 1)
348 ) == NULL) {
349 emsg = N_("faulty RFC 2231 parameter extension");
350 goto jerr;
351 }
352 np->rj_val_off = P2UZ(++eptr - xval.s);
353 }
354
355 hbp = cp;
356 } else
357 hbp = _mime_param_skip(hbp);
358 }
359 ASSERT(head != NULL); /* (always true due to jumpin:, but..) */
360
361 errors |= __rfc2231_join(head, &rv, &emsg);
362 if (errors /*&& (n_poption & n_PO_D_V)*/) {
363 /* TODO should set global flags so that at the end of an operation
364 * TODO (for a message) a summary can be printed: faulty MIME, xy */
365 if (emsg == NULL)
366 emsg = N_("multiple causes");
367 n_err(_("Message had MIME errors: %s\n"), V_(emsg));
368 }
369 jleave:
370 NYD2_OU;
371 return rv;
372
373 jerr:
374 while ((np = head) != NULL) {
375 head = np->rj_next;
376 n_free(np);
377 }
378 /*if (n_poption & n_PO_D_V)*/ {
379 if (emsg == NULL)
380 emsg = N_("expected asterisk *");
381 n_err(_("Faulty RFC 2231 MIME parameter value: %s: %s\n"
382 "Near: %s\n"), param, V_(emsg), hbp_base);
383 }
384 rv = NULL;
385 goto jleave;
386 }
387
388 static boole
__rfc2231_join(struct rfc2231_joiner * head,char ** result,char const ** emsg)389 __rfc2231_join(struct rfc2231_joiner *head, char **result, char const **emsg)
390 {
391 struct str sin, sou;
392 struct rfc2231_joiner *np;
393 char const *cp;
394 uz i;
395 enum {
396 _NONE = 0,
397 _HAVE_ENC = 1<<0,
398 _HAVE_ICONV = 1<<1,
399 _SEEN_ANY = 1<<2,
400 _ERRORS = 1<<3
401 } f = _NONE;
402 u32 no;
403 #ifdef mx_HAVE_ICONV
404 iconv_t fhicd;
405 #endif
406 NYD2_IN;
407
408 #ifdef mx_HAVE_ICONV
409 UNINIT(fhicd, (iconv_t)-1);
410
411 if (head->rj_is_enc) {
412 char const *tcs;
413
414 f |= _HAVE_ENC;
415 if (head->rj_cs_len == 0) {
416 /* It is an error if the character set is not set, the language alone
417 * cannot convert characters, let aside that we don't use it at all */
418 *emsg = N_("MIME RFC 2231 invalidity: missing character set\n");
419 f |= _ERRORS;
420 } else if (su_cs_cmp_case_n(tcs = ok_vlook(ttycharset),
421 head->rj_dat, head->rj_cs_len)) {
422 char *cs = n_lofi_alloc(head->rj_cs_len +1);
423
424 su_mem_copy(cs, head->rj_dat, head->rj_cs_len);
425 cs[head->rj_cs_len] = '\0';
426 if ((fhicd = n_iconv_open(tcs, cs)) != (iconv_t)-1)
427 f |= _HAVE_ICONV;
428 else {
429 *emsg = N_("necessary character set conversion missing");
430 f |= _ERRORS;
431 }
432 n_lofi_free(cs);
433 }
434 }
435 #endif
436
437 if (head->rj_no != 0) {
438 if (!(f & _ERRORS))
439 *emsg = N_("First RFC 2231 parameter value chunk number is not 0");
440 f |= _ERRORS;
441 }
442
443 for (sou.s = NULL, sou.l = 0, no = 0; (np = head) != NULL; n_free(np)) {
444 head = np->rj_next;
445
446 if (np->rj_no != no++) {
447 if (!(f & _ERRORS))
448 *emsg = N_("RFC 2231 parameter value chunks are not contiguous");
449 f |= _ERRORS;
450 }
451
452 /* RFC 2231 allows such info only in the first continuation, and
453 * furthermore MUSTs the first to be encoded, then */
454 if (/*np->rj_is_enc &&*/ np->rj_val_off > 0 &&
455 (f & (_HAVE_ENC | _SEEN_ANY)) != _HAVE_ENC) {
456 if (!(f & _ERRORS))
457 *emsg = N_("invalid redundant RFC 2231 charset/language ignored");
458 f |= _ERRORS;
459 }
460 f |= _SEEN_ANY;
461
462 i = np->rj_len - np->rj_val_off;
463 if (!np->rj_is_enc)
464 n_str_add_buf(&sou, np->rj_dat + np->rj_val_off, i);
465 else {
466 /* Perform percent decoding */
467 sin.s = n_alloc(i +1);
468 sin.l = 0;
469
470 for (cp = np->rj_dat + np->rj_val_off; i > 0;) {
471 char c;
472
473 if ((c = *cp++) == '%') {
474 s32 cc;
475
476 if (i < 3 || (cc = n_c_from_hex_base16(cp)) < 0) {
477 if (!(f & _ERRORS))
478 *emsg = N_("invalid RFC 2231 percent encoded sequence");
479 f |= _ERRORS;
480 goto jhex_putc;
481 }
482 sin.s[sin.l++] = (char)cc;
483 cp += 2;
484 i -= 3;
485 } else {
486 jhex_putc:
487 sin.s[sin.l++] = c;
488 --i;
489 }
490 }
491 sin.s[sin.l] = '\0';
492
493 n_str_add_buf(&sou, sin.s, sin.l);
494 n_free(sin.s);
495 }
496 }
497
498 /* And add character set conversion on top as necessary.
499 * RFC 2231 is pragmatic: encode only mentions percent encoding and the
500 * character set for the entire string ("[no] facility for using more
501 * than one character set or language"), therefore "continuations may
502 * contain a mixture of encoded and unencoded segments" applies to
503 * a contiguous string of a single character set that has been torn in
504 * pieces due to space restrictions, and it happened that some pieces
505 * didn't need to be percent encoded.
506 *
507 * _In particular_ it therefore doesn't repeat the RFC 2047 paradigm
508 * that encoded-words-are-atomic, meaning that a single character-set
509 * conversion run over the final, joined, partially percent-decoded value
510 * should be sufficient */
511 #ifdef mx_HAVE_ICONV
512 if (f & _HAVE_ICONV) {
513 sin.s = NULL;
514 sin.l = 0;
515 if (n_iconv_str(fhicd, n_ICONV_UNIDEFAULT, &sin, &sou, NULL) != 0) {
516 if (!(f & _ERRORS)) /* XXX won't be reported with _UNIDFEFAULT */
517 *emsg = N_("character set conversion failed on value");
518 f |= _ERRORS;
519 }
520 n_free(sou.s);
521 sou = sin;
522
523 n_iconv_close(fhicd);
524 }
525 #endif
526
527 su_mem_copy(*result = n_autorec_alloc(sou.l +1), sou.s, sou.l +1);
528 n_free(sou.s);
529 NYD2_OU;
530 return ((f & _ERRORS) != 0);
531 }
532
533 static void
_mime_param_create(struct mime_param_builder * self)534 _mime_param_create(struct mime_param_builder *self)
535 {
536 struct mime_param_builder next;
537 /* Don't use MIME_LINELEN_(MAX|LIMIT) stack buffer sizes: normally we won't
538 * exceed plain MIME_LINELEN, so that this would be a factor 10 wastage.
539 * On the other hand we may excess _LINELEN to avoid breaking up possible
540 * multibyte sequences until sizeof(buf) is reached, but since we (a) don't
541 * support stateful encodings and (b) will try to synchronize on UTF-8 this
542 * problem is scarce, possibly even artificial */
543 char buf[MIN(MIME_LINELEN_MAX >> 1, MIME_LINELEN * 2)],
544 *bp, *bp_max, *bp_xmax, *bp_lanoenc;
545 char const *vb, *vb_lanoenc;
546 uz vl;
547 enum {
548 _NONE = 0,
549 _ISENC = 1<<0,
550 _HADRAW = 1<<1,
551 _RAW = 1<<2
552 } f = _NONE;
553 NYD2_IN;
554 LCTA(sizeof(buf) >= MIME_LINELEN * 2, "Buffer to small for operation");
555
556 jneed_enc:
557 self->mpb_buf = bp = bp_lanoenc = buf;
558 self->mpb_buf_len = 0;
559 self->mpb_is_enc = ((f & _ISENC) != 0);
560 vb_lanoenc = vb = self->mpb_value;
561 vl = self->mpb_value_len;
562
563 /* Configure bp_max to fit in SHOULD, bp_xmax to extent */
564 bp_max = (buf + MIME_LINELEN) -
565 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
566 bp_xmax = (buf + sizeof(buf)) -
567 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
568 if ((f & _ISENC) && self->mpb_level == 0) {
569 bp_max -= self->mpb_charset_len;
570 bp_xmax -= self->mpb_charset_len;
571 }
572 if (PCMP(bp_max, <=, buf + sizeof("Hunky Dory"))) {
573 su_DBG( n_alert("_mime_param_create(): Hunky Dory!"); )
574 bp_max = buf + (MIME_LINELEN >> 1); /* And then it is SHOULD, anyway */
575 }
576 ASSERT(PCMP(bp_max + (4 * 3), <=, bp_xmax)); /* UTF-8 extra pad, below */
577
578 f &= _ISENC;
579 while (vl > 0) {
580 union {char c; u8 uc;} u; u.c = *vb;
581
582 f |= _RAW;
583 if (!(f & _ISENC)) {
584 if (u.uc > 0x7F || su_cs_is_cntrl(u.c)) { /* XXX reject _is_cntrl? */
585 /* We need to percent encode this character, possibly changing
586 * overall strategy, but anyway the one of this level, possibly
587 * rendering invalid any output byte we yet have produced here.
588 * Instead of throwing away that work just recurse if some fancy
589 * magic condition is true */
590 /* *However*, many tested MUAs fail to deal with parameters that
591 * are split across "too many" fields, including ones that
592 * misread RFC 2231 to allow only one digit, i.e., a maximum of
593 * ten. This is plain wrong, but that won't help their users */
594 if (P2UZ(bp - buf) > /*10 (strawberry) COMPAT*/MIME_LINELEN>>1)
595 goto jrecurse;
596 f |= _ISENC;
597 goto jneed_enc;
598 }
599
600 if (u.uc == '"' || u.uc == '\\') {
601 f ^= _RAW;
602 bp[0] = '\\';
603 bp[1] = u.c;
604 bp += 2;
605 }
606 } else if (u.uc > 0x7F || _rfc2231_etab[u.uc]) {
607 f ^= _RAW;
608 bp[0] = '%';
609 n_c_to_hex_base16(bp + 1, u.c);
610 bp += 3;
611 }
612
613 ++vb;
614 --vl;
615 if (f & _RAW) {
616 f |= _HADRAW;
617 vb_lanoenc = vb;
618 *bp++ = u.c;
619 bp_lanoenc = bp;
620 }
621
622 /* If all available space has been consumed we must split.
623 * Due to compatibility reasons we must take care not to break up
624 * multibyte sequences -- even though RFC 2231 rather implies that the
625 * split value should be joined (after percent encoded fields have
626 * been percent decoded) and the resulting string be treated in the
627 * specified character set / language, MUAs have been seen which apply
628 * the RFC 2047 encoded-words-are-atomic even to RFC 2231 values, even
629 * if stateful encodings cannot truly be supported like that?!..
630 *
631 * So split at 7-bit character if we have seen any and the wastage isn't
632 * too large; recall that we need to keep the overall number of P=V
633 * values as low as possible due to compatibility reasons.
634 * If we haven't seen any plain bytes be laxe and realize that bp_max
635 * reflects SHOULD lines, and try to extend this as long as possible.
636 * However, with UTF-8, try to backward synchronize on sequence start */
637 if (bp <= bp_max)
638 continue;
639
640 if ((f & _HADRAW) && (PCMP(bp - bp_lanoenc, <=, bp_lanoenc - buf) ||
641 (!self->mpb_is_utf8 &&
642 P2UZ(bp_lanoenc - buf) >= (MIME_LINELEN >> 2)))) {
643 bp = bp_lanoenc;
644 vl += P2UZ(vb - vb_lanoenc);
645 vb = vb_lanoenc;
646 goto jrecurse;
647 }
648
649 if (self->mpb_is_utf8 && ((u8)(vb[-1]) & 0xC0) != 0x80) {
650 bp -= 3;
651 --vb;
652 ++vl;
653 goto jrecurse;
654 }
655
656 if (bp <= bp_xmax)
657 continue;
658 /* (Shit.) */
659 goto jrecurse;
660 }
661
662 /* That level made the great and completed encoding. Build result */
663 self->mpb_is_enc = ((f & _ISENC) != 0);
664 self->mpb_buf_len = P2UZ(bp - buf);
665 __mime_param_join(self);
666 jleave:
667 NYD2_OU;
668 return;
669
670 /* Need to recurse, take care not to excess magical limit of 999 levels */
671 jrecurse:
672 if (self->mpb_level == 999) {
673 /*if (n_poption & n_PO_D_V)*/
674 n_err(_("Message RFC 2231 parameters nested too deeply!\n"));
675 goto jleave;
676 }
677
678 self->mpb_is_enc = ((f & _ISENC) != 0);
679 self->mpb_buf_len = P2UZ(bp - buf);
680
681 su_mem_set(&next, 0, sizeof next);
682 next.mpb_next = self;
683 next.mpb_level = self->mpb_level + 1;
684 next.mpb_name_len = self->mpb_name_len;
685 next.mpb_value_len = vl;
686 next.mpb_is_utf8 = self->mpb_is_utf8;
687 next.mpb_name = self->mpb_name;
688 next.mpb_value = vb;
689 _mime_param_create(&next);
690 goto jleave;
691 }
692
693 static void
__mime_param_join(struct mime_param_builder * head)694 __mime_param_join(struct mime_param_builder *head)
695 {
696 char nobuf[16];
697 struct mime_param_builder *np;
698 uz i, ll; ASSERT_INJ( uz len_max; )
699 struct str *result;
700 char *cp;
701 enum {
702 _NONE = 0,
703 _ISENC = 1<<0,
704 _ISQUOTE = 1<<1,
705 _ISCONT = 1<<2
706 } f = _NONE;
707 NYD2_IN;
708
709 /* Traverse the stack upwards to find out result length (worst case).
710 * Reverse the list while doing so */
711 for (i = 0, np = head, head = NULL; np != NULL;) {
712 struct mime_param_builder *tmp;
713
714 i += np->mpb_buf_len + np->mpb_name_len + sizeof(" *999*=\"\";\n") -1;
715 if (np->mpb_is_enc)
716 f |= _ISENC;
717
718 tmp = np->mpb_next;
719 np->mpb_next = head;
720 head = np;
721 np = tmp;
722 }
723 if (f & _ISENC)
724 i += head->mpb_charset_len; /* sizeof("''") -1 covered by \"\" above */
725 ASSERT_INJ( len_max = i; )
726 head->mpb_rv = TRU1;
727
728 result = head->mpb_result;
729 if (head->mpb_next != NULL)
730 f |= _ISCONT;
731 cp = result->s = n_autorec_alloc(i +1);
732
733 for (ll = 0, np = head;;) {
734 /* Name part */
735 su_mem_copy(cp, np->mpb_name, i = np->mpb_name_len);
736 cp += i;
737 ll += i;
738
739 if (f & _ISCONT) {
740 char *cpo = cp, *nop = nobuf + sizeof(nobuf);
741 u32 noi = np->mpb_level;
742
743 *--nop = '\0';
744 do
745 *--nop = "0123456789"[noi % 10];
746 while ((noi /= 10) != 0);
747
748 *cp++ = '*';
749 while (*nop != '\0')
750 *cp++ = *nop++;
751
752 ll += P2UZ(cp - cpo);
753 }
754
755 if ((f & _ISENC) || np->mpb_is_enc) {
756 *cp++ = '*';
757 ++ll;
758 }
759 *cp++ = '=';
760 ++ll;
761
762 /* Value part */
763 if (f & _ISENC) {
764 f &= ~_ISENC;
765 su_mem_copy(cp, np->mpb_charset, i = np->mpb_charset_len);
766 cp += i;
767 cp[0] = '\'';
768 cp[1] = '\'';
769 cp += 2;
770 ll += i + 2;
771 } else if (!np->mpb_is_enc) {
772 f |= _ISQUOTE;
773 *cp++ = '"';
774 ++ll;
775 }
776
777 su_mem_copy(cp, np->mpb_buf, i = np->mpb_buf_len);
778 cp += i;
779 ll += i;
780
781 if (f & _ISQUOTE) {
782 f ^= _ISQUOTE;
783 *cp++ = '"';
784 ++ll;
785 }
786
787 if ((np = np->mpb_next) == NULL)
788 break;
789 *cp++ = ';';
790 ++ll;
791
792 i = ll;
793 i += np->mpb_name_len + np->mpb_buf_len + sizeof(" *999*=\"\";\n") -1;
794 if (i >= MIME_LINELEN) {
795 head->mpb_rv = -TRU1;
796 *cp++ = '\n';
797 ll = 0;
798 }
799
800 *cp++ = ' ';
801 ++ll;
802 }
803 *cp = '\0';
804 result->l = P2UZ(cp - result->s);
805 ASSERT(result->l < len_max);
806 NYD2_OU;
807 }
808
809 FL char *
mime_param_get(char const * param,char const * headerbody)810 mime_param_get(char const *param, char const *headerbody) /* TODO rewr. */
811 {
812 struct str xval;
813 char *rv = NULL;
814 uz plen;
815 char const *p;
816 NYD_IN;
817
818 plen = su_cs_len(param);
819 p = headerbody;
820
821 /* At the beginning of headerbody there is no parameter=value pair xxx */
822 if (!su_cs_is_white(*p))
823 goto jskip1st;
824
825 for (;;) {
826 while (su_cs_is_white(*p))
827 ++p;
828
829 if (!su_cs_cmp_case_n(p, param, plen)) {
830 p += plen;
831 while (su_cs_is_white(*p)) /* XXX? */
832 ++p;
833 switch (*p++) {
834 case '*':
835 rv = _rfc2231_param_parse(param, plen, p);
836 goto jleave;
837 case '=':
838 if (!_mime_param_value_trim(&xval, p, NULL)) {
839 /* XXX LOG? */
840 goto jleave;
841 }
842 rv = xval.s;
843
844 /* We do have a result, but some (elder) software (S-nail <v14.8)
845 * will use RFC 2047 encoded words in parameter values, too */
846 /* TODO Automatically check whether the value seems to be RFC 2047
847 * TODO encwd. -- instead use *rfc2047_parameters* like mutt(1)? */
848 if ((p = su_cs_find(rv, "=?")) != NULL &&
849 su_cs_find(p, "?=") != NULL) {
850 struct str ti, to;
851
852 ti.l = su_cs_len(ti.s = rv);
853 mime_fromhdr(&ti, &to, TD_ISPR | TD_ICONV | TD_DELCTRL);
854 rv = savestrbuf(to.s, to.l);
855 n_free(to.s);
856 }
857 goto jleave;
858 default:
859 /* Not our desired parameter, skip and continue */
860 break;
861 }
862 }
863
864 jskip1st:
865 if (*(p = _mime_param_skip(p)) == '\0')
866 goto jleave;
867 }
868
869 jleave:
870 NYD_OU;
871 return rv;
872 }
873
874 FL s8
mime_param_create(struct str * result,char const * name,char const * value)875 mime_param_create(struct str *result, char const *name, char const *value)
876 {
877 /* TODO All this needs rework when we have (1) a real string and even more
878 * TODO (2) use objects instead of stupid string concat; it's temporary
879 * TODO I.e., this function should return a HeaderBodyParam */
880 struct mime_param_builder top;
881 uz i;
882 NYD_IN;
883
884 su_mem_set(result, 0, sizeof *result);
885
886 su_mem_set(&top, 0, sizeof top);
887 top.mpb_result = result;
888 if ((i = su_cs_len(top.mpb_name = name)) >= U32_MAX)
889 goto jleave;
890 top.mpb_name_len = (u32)i;
891 if ((i = su_cs_len(top.mpb_value = value)) >= U32_MAX)
892 goto jleave;
893 top.mpb_value_len = (u32)i;
894 if ((i = su_cs_len(name = ok_vlook(ttycharset))) >= U32_MAX)
895 goto jleave;
896 top.mpb_charset_len = (u32)i;
897 top.mpb_charset = n_autorec_alloc(++i);
898 su_mem_copy(n_UNCONST(top.mpb_charset), name, i);
899 if(top.mpb_charset_len >= 4 && !su_mem_cmp(top.mpb_charset, "utf", 3) &&
900 ((top.mpb_charset[3] == '-' && top.mpb_charset[4] == '8' &&
901 top.mpb_charset_len == 5) || (top.mpb_charset[3] == '8' &&
902 top.mpb_charset_len == 4)))
903 top.mpb_is_utf8 = TRU1;
904 else
905 top.mpb_is_utf8 = FAL0;
906
907 _mime_param_create(&top);
908 jleave:
909 NYD_OU;
910 return top.mpb_rv;
911 }
912
913 FL char *
mime_param_boundary_get(char const * headerbody,uz * len)914 mime_param_boundary_get(char const *headerbody, uz *len)
915 {
916 char *q = NULL, *p;
917 NYD_IN;
918
919 if ((p = mime_param_get("boundary", headerbody)) != NULL) {
920 uz i = su_cs_len(p);
921
922 if (len != NULL)
923 *len = i + 2;
924 q = n_autorec_alloc(i + 2 +1);
925 q[0] = q[1] = '-';
926 su_mem_copy(q + 2, p, i);
927 *(q + i + 2) = '\0';
928 }
929 NYD_OU;
930 return q;
931 }
932
933 FL char *
mime_param_boundary_create(void)934 mime_param_boundary_create(void)
935 {
936 static u32 reprocnt;
937 char *bp;
938 NYD_IN;
939
940 bp = n_autorec_alloc(36 + 6 +1);
941 bp[0] = bp[2] = bp[39] = bp[41] = '=';
942 bp[1] = bp[40] = '-';
943 su_mem_copy(bp + 3, mx_random_create_cp(36, &reprocnt), 36);
944 bp[42] = '\0';
945 NYD_OU;
946 return bp;
947 }
948
949 #include "su/code-ou.h"
950 /* s-it-mode */
951