1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2  *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047;
3  *@ for _header() versions: including "encoded word" as of RFC 2049):
4  *@ - Quoted-Printable, section 6.7
5  *@ - Base64, section 6.8
6  *@ QP quoting and _b64_decode(), b64_encode() inspired from NetBSDs mailx(1):
7  *@   $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $
8  *@ TODO We have no notion of a "current message context" and thus badly log.
9  *@ TODO This is not final yet, v15 will bring "filters".
10  *
11  * Copyright (c) 2012 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12  * SPDX-License-Identifier: ISC
13  *
14  * Permission to use, copy, modify, and/or distribute this software for any
15  * purpose with or without fee is hereby granted, provided that the above
16  * copyright notice and this permission notice appear in all copies.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
19  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
20  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
21  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
24  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
25  */
26 #undef su_FILE
27 #define su_FILE mime_enc
28 #define mx_SOURCE
29 
30 #ifndef mx_HAVE_AMALGAMATION
31 # include "mx/nail.h"
32 #endif
33 
34 #include <su/cs.h>
35 #include <su/mem.h>
36 
37 /* TODO fake */
38 #include "su/code-in.h"
39 
40 enum a_me_qact{
41    a_ME_N = 0,
42    a_ME_Q = 1,       /* Must quote */
43    a_ME_SP = 2,      /* sp */
44    a_ME_XF = 3,      /* Special character 'F' - maybe quoted */
45    a_ME_XD = 4,      /* Special character '.' - maybe quoted */
46    a_ME_UU = 5,      /* In header, _ must be quoted in encoded word */
47    a_ME_US = '_',    /* In header, ' ' must be quoted as _ in encoded word */
48    a_ME_QM = '?',    /* In header, special character ? not always quoted */
49    a_ME_EQ = '=',    /* In header, '=' must be quoted in encoded word */
50    a_ME_HT ='\t',    /* Body HT=SP.  Head HT=HT, BUT quote in encoded word */
51    a_ME_NL = 0,      /* Don't quote '\n' (NL) */
52    a_ME_CR = a_ME_Q  /* Always quote a '\r' (CR) */
53 };
54 
55 /* Lookup tables to decide whether a character must be encoded or not.
56  * Email header differences according to RFC 2047, section 4.2:
57  * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
58  * - don't care about the special ^F[rom] and ^.$ */
59 static u8 const a_me_qp_body[] = {
60     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
61     a_ME_Q, a_ME_SP, a_ME_NL,  a_ME_Q,  a_ME_Q, a_ME_CR,  a_ME_Q,  a_ME_Q,
62     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
63     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
64    a_ME_SP,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
65     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N, a_ME_XD,  a_ME_N,
66     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
67     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_Q,  a_ME_N,  a_ME_N,
68 
69     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N, a_ME_XF,  a_ME_N,
70     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
71     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
72     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
73     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
74     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
75     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
76     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_Q,
77 }, a_me_qp_head[] = {
78     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
79     a_ME_Q, a_ME_HT,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
80     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
81     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
82    a_ME_US,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
83     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
84     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
85     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N, a_ME_EQ,  a_ME_N, a_ME_QM,
86 
87     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
88     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
89     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
90     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N, a_ME_UU,
91     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
92     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
93     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
94     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_Q,
95 };
96 
97 /* The decoding table is only accessed via a_ME_B64_DECUI8() */
98 static char const a_me_b64_enctbl[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
99       "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
100 static signed char const a_me_b64__dectbl[] = {
101    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
102    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
103    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
104    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
105    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
106    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
107    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
108    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
109 };
110 #define a_ME_B64_EQU (u32)-2
111 #define a_ME_B64_BAD (u32)-1
112 #define a_ME_B64_DECUI8(C) \
113    ((u8)(C) >= sizeof(a_me_b64__dectbl)\
114     ? a_ME_B64_BAD : (u32)a_me_b64__dectbl[(u8)(C)])
115 
116 /* (Ugly to place an enum here) */
117 static char const a_me_ctes[] = "7bit\0" "8bit\0" \
118       "base64\0" "quoted-printable\0" "binary\0" \
119       /* abbrevs */ "8b\0" "b64\0" "qp\0";
120 enum a_me_ctes_off{
121    a_ME_CTES_7B_OFF = 0, a_ME_CTES_7B_LEN = 4,
122    a_ME_CTES_8B_OFF = 5, a_ME_CTES_8B_LEN = 4,
123    a_ME_CTES_B64_OFF = 10, a_ME_CTES_B64_LEN = 6,
124    a_ME_CTES_QP_OFF = 17,  a_ME_CTES_QP_LEN = 16,
125    a_ME_CTES_BIN_OFF = 34, a_ME_CTES_BIN_LEN = 6,
126 
127    a_ME_CTES_S8B_OFF = 41, a_ME_CTES_S8B_LEN = 2,
128    a_ME_CTES_SB64_OFF = 44, a_ME_CTES_SB64_LEN = 3,
129    a_ME_CTES_SQP_OFF = 48, a_ME_CTES_SQP_LEN = 2
130 };
131 
132 /* Check whether *s must be quoted according to flags, else body rules;
133  * sol indicates whether we are at the first character of a line/field */
134 su_SINLINE enum a_me_qact a_me_mustquote(char const *s, char const *e,
135                            boole sol, enum mime_enc_flags flags);
136 
137 /* Trim WS and make work point to the decodable range of in.
138  * Return the amount of bytes a b64_decode operation on that buffer requires,
139  * or UZ_MAX on overflow error */
140 static uz a_me_b64_decode_prepare(struct str *work, struct str const *in);
141 
142 /* Perform b64_decode on in(put) to sufficiently spaced out(put).
143  * Return number of useful bytes in out or -1 on error.
144  * Note: may enter endless loop if in->l < 4 and 0 return is not handled! */
145 static sz a_me_b64_decode(struct str *out, struct str *in);
146 
147 su_SINLINE enum a_me_qact
a_me_mustquote(char const * s,char const * e,boole sol,enum mime_enc_flags flags)148 a_me_mustquote(char const *s, char const *e, boole sol,
149       enum mime_enc_flags flags){
150    u8 const *qtab;
151    enum a_me_qact a, r;
152    NYD2_IN;
153 
154    qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
155          ? a_me_qp_head : a_me_qp_body;
156 
157    if((u8)*s > 0x7F){
158       r = a_ME_Q;
159       goto jleave;
160    }
161 
162    a = qtab[(u8)*s];
163 
164    if((r = a) == a_ME_N || a == a_ME_Q)
165       goto jleave;
166 
167    r = a_ME_Q;
168 
169    /* Special header fields */
170    if(flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)){
171       /* Special massage for encoded words */
172       if(flags & MIMEEF_ISENCWORD){
173          switch(a){
174          case a_ME_HT:
175          case a_ME_US:
176          case a_ME_EQ:
177             r = a;
178             /* FALLTHRU */
179          case a_ME_UU:
180             goto jleave;
181          default:
182             break;
183          }
184       }
185 
186       /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
187        * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
188        * should be hard to match */
189       if(a == a_ME_QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
190          goto jleave;
191       goto jnquote;
192    }
193 
194    /* Body-only */
195 
196    if(a == a_ME_SP){
197       /* WS only if trailing white space */
198       if(&s[1] == e || s[1] == '\n')
199          goto jleave;
200       goto jnquote;
201    }
202 
203    /* Rest are special begin-of-line cases */
204    if(!sol)
205       goto jnquote;
206 
207    /* ^From */
208    if(a == a_ME_XF){
209       if(&s[4] < e && s[1] == 'r' && s[2] == 'o' && s[3] == 'm' && s[4] == ' ')
210          goto jleave;
211       goto jnquote;
212    }
213    /* ^.$ */
214    if(a == a_ME_XD && (&s[1] == e || s[1] == '\n'))
215       goto jleave;
216 jnquote:
217    r = 0;
218 jleave:
219    NYD2_OU;
220    return r;
221 }
222 
223 static uz
a_me_b64_decode_prepare(struct str * work,struct str const * in)224 a_me_b64_decode_prepare(struct str *work, struct str const *in){
225    uz cp_len;
226    NYD2_IN;
227 
228    *work = *in;
229    cp_len = n_str_trim(work, n_STR_TRIM_BOTH)->l;
230 
231    if(cp_len > 16){
232       /* su_ERR_OVERFLOW */
233       if(UZ_MAX / 3 <= cp_len){
234          cp_len = UZ_MAX;
235          goto jleave;
236       }
237       cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
238    }
239    cp_len += (2 * 3) +1;
240 jleave:
241    NYD2_OU;
242    return cp_len;
243 }
244 
245 static sz
a_me_b64_decode(struct str * out,struct str * in)246 a_me_b64_decode(struct str *out, struct str *in){
247    u8 *p, pb;
248    u8 const *q, *end;
249    sz rv;
250    NYD2_IN;
251 
252    rv = -1;
253    p = (u8*)&out->s[out->l];
254    q = (u8 const*)in->s;
255 
256    for(end = &q[in->l]; P2UZ(end - q) >= 4; q += 4){
257       u32 a, b, c, d;
258 
259       a = a_ME_B64_DECUI8(q[0]);
260       b = a_ME_B64_DECUI8(q[1]);
261       c = a_ME_B64_DECUI8(q[2]);
262       d = a_ME_B64_DECUI8(q[3]);
263 
264       if(UNLIKELY(a >= a_ME_B64_EQU || b >= a_ME_B64_EQU ||
265             c == a_ME_B64_BAD || d == a_ME_B64_BAD))
266          goto jleave;
267 
268       pb = ((a << 2) | ((b & 0x30) >> 4));
269       if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
270          *p++ = pb;
271 
272       if(c == a_ME_B64_EQU){ /* got '=' */
273          q += 4;
274          if(UNLIKELY(d != a_ME_B64_EQU))
275             goto jleave;
276          break;
277       }
278 
279       pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
280       if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
281          *p++ = pb;
282 
283       if(d == a_ME_B64_EQU) /* got '=' */
284          break;
285       pb = (((c & 0x03) << 6) | d);
286       if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
287          *p++ = pb;
288    }
289    rv ^= rv;
290 
291 jleave:{
292       uz i;
293 
294       i = P2UZ((char*)p - out->s);
295       out->l = i;
296       if(rv == 0)
297          rv = (sz)i;
298    }
299    in->l -= P2UZ(q - (u8*)in->s);
300    in->s = n_UNCONST(q);
301    NYD2_OU;
302    return rv;
303 }
304 
305 FL enum mime_enc
mime_enc_target(void)306 mime_enc_target(void){
307    char const *cp, *v15;
308    enum mime_enc rv;
309    NYD2_IN;
310 
311    if((v15 = ok_vlook(encoding)) != NULL)
312       n_OBSOLETE(_("please use *mime-encoding* instead of *encoding*"));
313 
314    if((cp = ok_vlook(mime_encoding)) == NULL && (cp = v15) == NULL)
315       rv = MIME_DEFAULT_ENCODING;
316    else if(!su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_S8B_OFF]) ||
317          !su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_8B_OFF]))
318       rv = MIMEE_8B;
319    else if(!su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_SB64_OFF]) ||
320          !su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_B64_OFF]))
321       rv = MIMEE_B64;
322    else if(!su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_SQP_OFF]) ||
323          !su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_QP_OFF]))
324       rv = MIMEE_QP;
325    else{
326       n_err(_("Warning: invalid *mime-encoding*, using Base64: %s\n"), cp);
327       rv = MIMEE_B64;
328    }
329    NYD2_OU;
330    return rv;
331 }
332 
333 FL enum mime_enc
mime_enc_from_ctehead(char const * hbody)334 mime_enc_from_ctehead(char const *hbody){
335    enum mime_enc rv;
336    NYD2_IN;
337 
338    if(hbody == NULL)
339       rv = MIMEE_7B;
340    else{
341       struct{
342          u8 off;
343          u8 len;
344          u8 enc;
345          u8 __dummy;
346       } const *cte, cte_base[] = {
347          {a_ME_CTES_7B_OFF, a_ME_CTES_7B_LEN, MIMEE_7B, 0},
348          {a_ME_CTES_8B_OFF, a_ME_CTES_8B_LEN, MIMEE_8B, 0},
349          {a_ME_CTES_B64_OFF, a_ME_CTES_B64_LEN, MIMEE_B64, 0},
350          {a_ME_CTES_QP_OFF, a_ME_CTES_QP_LEN, MIMEE_QP, 0},
351          {a_ME_CTES_BIN_OFF, a_ME_CTES_BIN_LEN, MIMEE_BIN, 0},
352          {0, 0, MIMEE_NONE, 0}
353       };
354       union {char const *s; uz l;} u;
355 
356       if(*hbody == '"')
357          for(u.s = ++hbody; *u.s != '\0' && *u.s != '"'; ++u.s)
358             ;
359       else
360          for(u.s = hbody; *u.s != '\0' && !su_cs_is_white(*u.s); ++u.s)
361             ;
362       u.l = P2UZ(u.s - hbody);
363 
364       for(cte = cte_base;;)
365          if(cte->len == u.l && !su_cs_cmp_case(&a_me_ctes[cte->off], hbody)){
366             rv = cte->enc;
367             break;
368          }else if((++cte)->enc == MIMEE_NONE){
369             rv = MIMEE_NONE;
370             break;
371          }
372    }
373    NYD2_OU;
374    return rv;
375 }
376 
377 FL char const *
mime_enc_from_conversion(enum conversion const convert)378 mime_enc_from_conversion(enum conversion const convert){
379    char const *rv;
380    NYD2_IN;
381 
382    switch(convert){
383    case CONV_7BIT: rv = &a_me_ctes[a_ME_CTES_7B_OFF]; break;
384    case CONV_8BIT: rv = &a_me_ctes[a_ME_CTES_8B_OFF]; break;
385    case CONV_TOQP: rv = &a_me_ctes[a_ME_CTES_QP_OFF]; break;
386    case CONV_TOB64: rv = &a_me_ctes[a_ME_CTES_B64_OFF]; break;
387    case CONV_NONE: rv = &a_me_ctes[a_ME_CTES_BIN_OFF]; break;
388    default: rv = n_empty; break;
389    }
390    NYD2_OU;
391    return rv;
392 }
393 
394 FL uz
mime_enc_mustquote(char const * ln,uz lnlen,enum mime_enc_flags flags)395 mime_enc_mustquote(char const *ln, uz lnlen, enum mime_enc_flags flags){
396    uz rv;
397    boole sol;
398    NYD2_IN;
399 
400    for(rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
401       switch(a_me_mustquote(ln, ln + lnlen, sol, flags)){
402       case a_ME_US:
403       case a_ME_EQ:
404       case a_ME_HT:
405          ASSERT(flags & MIMEEF_ISENCWORD);
406          /* FALLTHRU */
407       case 0:
408          continue;
409       default:
410          ++rv;
411       }
412    NYD2_OU;
413    return rv;
414 }
415 
416 FL uz
qp_encode_calc_size(uz len)417 qp_encode_calc_size(uz len){
418    uz bytes, lines;
419    NYD2_IN;
420 
421    /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
422     * However, we must be aware that (a) the output may span multiple lines
423     * and (b) the input does not end with a newline itself (nonetheless):
424     *    LC_ALL=C awk 'BEGIN{
425     *       for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
426     *    }' |
427     *    s-nail -:/ -dSsendcharsets=utf8 -s testsub no@where */
428 
429    /* Several su_ERR_OVERFLOW */
430    if(len >= UZ_MAX / 3){
431       len = UZ_MAX;
432       goto jleave;
433    }
434    bytes = len * 3;
435    lines = bytes / QP_LINESIZE;
436    len += lines;
437 
438    if(len >= UZ_MAX / 3){
439       len = UZ_MAX;
440       goto jleave;
441    }
442    /* Trailing hard NL may be missing, so there may be two lines.
443     * Thus add soft + hard NL per line and a trailing NUL */
444    bytes = len * 3;
445    lines = (bytes / QP_LINESIZE) + 1;
446    lines <<= 1;
447    ++bytes;
448    /*if(UZ_MAX - bytes >= lines){
449       len = UZ_MAX;
450       goto jleave;
451    }*/
452    bytes += lines;
453    len = bytes;
454 jleave:
455    NYD2_OU;
456    return len;
457 }
458 
459 #ifdef notyet
460 FL struct str *
qp_encode_cp(struct str * out,char const * cp,enum qpflags flags)461 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags){
462    struct str in;
463    NYD_IN;
464 
465    in.s = n_UNCONST(cp);
466    in.l = su_cs_len(cp);
467    out = qp_encode(out, &in, flags);
468    NYD_OU;
469    return out;
470 }
471 
472 FL struct str *
qp_encode_buf(struct str * out,void const * vp,uz vp_len,enum qpflags flags)473 qp_encode_buf(struct str *out, void const *vp, uz vp_len,
474       enum qpflags flags){
475    struct str in;
476    NYD_IN;
477 
478    in.s = n_UNCONST(vp);
479    in.l = vp_len;
480    out = qp_encode(out, &in, flags);
481    NYD_OU;
482    return out;
483 }
484 #endif /* notyet */
485 
486 FL struct str *
qp_encode(struct str * out,struct str const * in,enum qpflags flags)487 qp_encode(struct str *out, struct str const *in, enum qpflags flags){
488    uz lnlen;
489    char *qp;
490    char const *is, *ie;
491    boole sol, seenx;
492    NYD_IN;
493 
494    sol = (flags & QP_ISHEAD ? FAL0 : TRU1);
495 
496    if(!(flags & QP_BUF)){
497       if((lnlen = qp_encode_calc_size(in->l)) == UZ_MAX){
498          out = NULL;
499          goto jerr;
500       }
501       out->s = (flags & QP_SALLOC) ? n_autorec_alloc(lnlen)
502             : n_realloc(out->s, lnlen);
503    }
504    qp = out->s;
505    is = in->s;
506    ie = is + in->l;
507 
508    if(flags & QP_ISHEAD){
509       enum mime_enc_flags ef;
510 
511       ef = MIMEEF_ISHEAD | (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
512 
513       for(seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp){
514          char c;
515          enum a_me_qact mq;
516 
517          mq = a_me_mustquote(is, ie, sol, ef);
518          c = *is++;
519 
520          if(mq == a_ME_N){
521             /* We convert into a single *encoded-word*, that'll end up in
522              * =?C?Q??=; quote '?' from when we're inside there on */
523             if(seenx && c == '?')
524                goto jheadq;
525             *qp = c;
526          }else if(mq == a_ME_US)
527             *qp = a_ME_US;
528          else{
529             seenx = TRU1;
530 jheadq:
531             *qp++ = '=';
532             qp = n_c_to_hex_base16(qp, c) + 1;
533          }
534       }
535       goto jleave;
536    }
537 
538    /* The body needs to take care for soft line breaks etc. */
539    for(lnlen = 0, seenx = FAL0; is < ie; sol = FAL0){
540       char c;
541       enum a_me_qact mq;
542 
543       mq = a_me_mustquote(is, ie, sol, MIMEEF_NONE);
544       c = *is++;
545 
546       if(mq == a_ME_N && (c != '\n' || !seenx)){
547          *qp++ = c;
548          if(++lnlen < QP_LINESIZE - 1)
549             continue;
550          /* Don't write a soft line break when we're in the last possible
551           * column and either an LF has been written or only an LF follows, as
552           * that'll end the line anyway */
553          /* XXX but - ensure is+1>=ie, then??
554           * xxx and/or - what about resetting lnlen; that contra
555           * xxx dicts input==1 input line ASSERTion, though */
556          if(c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
557             continue;
558 jsoftnl:
559          qp[0] = '=';
560          qp[1] = '\n';
561          qp += 2;
562          lnlen = 0;
563          continue;
564       }
565 
566       if(lnlen > QP_LINESIZE - 3 - 1){
567          qp[0] = '=';
568          qp[1] = '\n';
569          qp += 2;
570          lnlen = 0;
571       }
572       *qp++ = '=';
573       qp = n_c_to_hex_base16(qp, c);
574       qp += 2;
575       lnlen += 3;
576       if(c != '\n' || !seenx)
577          seenx = (c == '\r');
578       else{
579          seenx = FAL0;
580          goto jsoftnl;
581       }
582    }
583 
584    /* Enforce soft line break if we haven't seen LF */
585    if(in->l > 0 && *--is != '\n'){
586       qp[0] = '=';
587       qp[1] = '\n';
588       qp += 2;
589    }
590 jleave:
591    out->l = P2UZ(qp - out->s);
592    out->s[out->l] = '\0';
593 jerr:
594    NYD_OU;
595    return out;
596 }
597 
598 FL boole
qp_decode_header(struct str * out,struct str const * in)599 qp_decode_header(struct str *out, struct str const *in){
600    struct n_string s;
601    char const *is, *ie;
602    NYD_IN;
603 
604    /* su_ERR_OVERFLOW */
605    if(UZ_MAX -1 - out->l <= in->l ||
606          S32_MAX <= out->l + in->l){ /* XXX wrong, we may replace */
607       out->l = 0;
608       out = NULL;
609       goto jleave;
610    }
611 
612    n_string_creat(&s);
613    n_string_reserve(n_string_take_ownership(&s, out->s,
614          (out->l == 0 ? 0 : out->l +1), out->l),
615       in->l + (in->l >> 2));
616 
617    for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
618       s32 c;
619 
620       c = *is++;
621       if(c == '='){
622          if(is >= ie){
623             goto jpushc; /* TODO According to RFC 2045, 6.7,
624             * ++is; TODO we should warn the user, but have no context
625             * goto jehead; TODO to do so; can't over and over */
626          }else if((c = n_c_from_hex_base16(is)) >= 0){
627             is += 2;
628             goto jpushc;
629          }else{
630             /* Invalid according to RFC 2045, section 6.7 */
631             /* TODO Follow RFC 2045, 6.7 advise and simply put through */
632             c = '=';
633             goto jpushc;
634 /* TODO jehead:
635  * TODO      if(n_psonce & n_PSO_UNICODE)
636  *              n_string_push_buf(&s, su_utf_8_replacer,
637  *                 sizeof(su_utf_8_replacer) -1);
638  * TODO       else{
639  * TODO          c = '?';
640  * TODO          goto jpushc;
641  * TODO       }*/
642          }
643       }else{
644          if(c == '_' /* a_ME_US */)
645             c = ' ';
646 jpushc:
647          n_string_push_c(&s, S(char,c));
648       }
649    }
650 
651    out->s = n_string_cp(&s);
652    out->l = s.s_len;
653    n_string_gut(n_string_drop_ownership(&s));
654 jleave:
655    NYD_OU;
656    return (out != NULL);
657 }
658 
659 FL boole
qp_decode_part(struct str * out,struct str const * in,struct str * outrest,struct str * inrest_or_null)660 qp_decode_part(struct str *out, struct str const *in, struct str *outrest,
661       struct str *inrest_or_null){
662    struct n_string s_b, *s;
663    char const *is, *ie;
664    NYD_IN;
665 
666    if(outrest->l != 0){
667       is = out->s;
668       *out = *outrest;
669       outrest->s = n_UNCONST(is);
670       outrest->l = 0;
671    }
672 
673    /* su_ERR_OVERFLOW */
674    if(UZ_MAX -1 - out->l <= in->l ||
675          S32_MAX <= out->l + in->l) /* XXX wrong, we may replace */
676       goto jerr;
677 
678    s = n_string_creat(&s_b);
679    s = n_string_take_ownership(s, out->s,
680          (out->l == 0 ? 0 : out->l +1), out->l);
681    s = n_string_reserve(s, in->l + (in->l >> 2));
682 
683    for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
684       s32 c;
685 
686       if((c = *is++) != '='){
687 jpushc:
688          n_string_push_c(s, (char)c);
689          continue;
690       }
691 
692       /* RFC 2045, 6.7:
693        *   Therefore, when decoding a Quoted-Printable body, any
694        *   trailing white space on a line must be deleted, as it will
695        *   necessarily have been added by intermediate transport
696        *   agents */
697       for(; is <= ie && su_cs_is_blank(*is); ++is)
698          ;
699       if(is >= ie){
700          /* Soft line break? */
701          if(*is == '\n')
702             goto jsoftnl;
703         goto jpushc; /* TODO According to RFC 2045, 6.7,
704          * ++is; TODO we should warn the user, but have no context
705          * goto jebody; TODO to do so; can't over and over */
706       }
707 
708       /* Not a soft line break? */
709       if(*is != '\n'){
710          if((c = n_c_from_hex_base16(is)) >= 0){
711             is += 2;
712             goto jpushc;
713          }
714          /* Invalid according to RFC 2045, section 6.7 */
715          /* TODO Follow RFC 2045, 6.7 advise and simply put through */
716          c = '=';
717          goto jpushc;
718 /* TODO jebody:
719  * TODO   if(n_psonce & n_PSO_UNICODE)
720  *           n_string_push_buf(&s, su_utf_8_replacer,
721  *              sizeof(su_utf_8_replacer) -1);
722  * TODO    else{
723  * TODO       c = '?';
724  * TODO       goto jpushc;
725  * TODO    }*/
726       }
727 
728       /* CRLF line endings are encoded as QP, followed by a soft line break, so
729        * check for this special case, and simply forget we have seen one, so as
730        * not to end up with the entire DOS file in a contiguous buffer */
731 jsoftnl:
732       if(s->s_len > 0 && s->s_dat[s->s_len - 1] == '\n'){
733 #if 0       /* TODO qp_decode_part() we do not normalize CRLF
734           * TODO to LF because for that we would need
735           * TODO to know if we are about to write to
736           * TODO the display or do save the file!
737           * TODO 'hope the MIME/send layer rewrite will
738           * TODO offer the possibility to DTRT */
739          if(s->s_len > 1 && s->s_dat[s->s_len - 2] == '\r')
740             n_string_push_c(n_string_trunc(s, s->s_len - 2), '\n');
741 #endif
742          break;
743       }
744 
745       /* C99 */{
746          char *cp;
747          uz l;
748 
749          if((l = P2UZ(ie - is)) > 0){
750             if(inrest_or_null == NULL)
751                goto jerr;
752             n_str_assign_buf(inrest_or_null, is, l);
753          }
754          cp = outrest->s;
755          outrest->s = n_string_cp(s);
756          outrest->l = s->s_len;
757          n_string_drop_ownership(s);
758          if(cp != NULL)
759             n_free(cp);
760       }
761       break;
762    }
763 
764    out->s = n_string_cp(s);
765    out->l = s->s_len;
766    n_string_gut(n_string_drop_ownership(s));
767 jleave:
768    NYD_OU;
769    return (out != NULL);
770 jerr:
771    out->l = 0;
772    out = NULL;
773    goto jleave;
774 }
775 
776 FL uz
b64_encode_calc_size(uz len)777 b64_encode_calc_size(uz len){
778    NYD2_IN;
779    if(len >= UZ_MAX / 4)
780       len = UZ_MAX;
781    else{
782       len = (len * 4) / 3;
783       len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
784       len += 2 + 1; /* CRLF, \0 */
785    }
786    NYD2_OU;
787    return len;
788 }
789 
790 FL struct str *
b64_encode(struct str * out,struct str const * in,enum b64flags flags)791 b64_encode(struct str *out, struct str const *in, enum b64flags flags){
792    u8 const *p;
793    uz i, lnlen;
794    char *b64;
795    NYD_IN;
796 
797    ASSERT(!(flags & B64_NOPAD) ||
798       !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
799 
800    p = (u8 const*)in->s;
801 
802    if(!(flags & B64_BUF)){
803       if((i = b64_encode_calc_size(in->l)) == UZ_MAX){
804          out = NULL;
805          goto jleave;
806       }
807       out->s = (flags & B64_SALLOC) ? n_autorec_alloc(i)
808             : n_realloc(out->s, i);
809    }
810    b64 = out->s;
811 
812    if(!(flags & (B64_CRLF | B64_LF)))
813       flags &= ~B64_MULTILINE;
814 
815    for(lnlen = 0, i = in->l; (sz)i > 0; p += 3, i -= 3){
816       u32 a, b, c;
817 
818       a = p[0];
819       b64[0] = a_me_b64_enctbl[a >> 2];
820 
821       switch(i){
822       case 1:
823          b64[1] = a_me_b64_enctbl[((a & 0x3) << 4)];
824          b64[2] =
825          b64[3] = '=';
826          break;
827       case 2:
828          b = p[1];
829          b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
830          b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2)];
831          b64[3] = '=';
832          break;
833       default:
834          b = p[1];
835          c = p[2];
836          b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
837          b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
838          b64[3] = a_me_b64_enctbl[c & 0x3F];
839          break;
840       }
841 
842       b64 += 4;
843       if(!(flags & B64_MULTILINE))
844          continue;
845       lnlen += 4;
846       if(lnlen < B64_LINESIZE)
847          continue;
848 
849       lnlen = 0;
850       if(flags & B64_CRLF)
851          *b64++ = '\r';
852       if(flags & (B64_CRLF | B64_LF))
853          *b64++ = '\n';
854    }
855 
856    if((flags & (B64_CRLF | B64_LF)) &&
857          (!(flags & B64_MULTILINE) || lnlen != 0)){
858       if(flags & B64_CRLF)
859          *b64++ = '\r';
860       if(flags & (B64_CRLF | B64_LF))
861          *b64++ = '\n';
862    }else if(flags & B64_NOPAD)
863       while(b64 != out->s && b64[-1] == '=')
864          --b64;
865 
866    out->l = P2UZ(b64 - out->s);
867    out->s[out->l] = '\0';
868 
869    /* Base64 includes + and /, replace them with _ and -.
870     * This is base64url according to RFC 4648, then.  Since we only support
871     * that for encoding and it is only used for boundary strings, this is
872     * yet a primitive implementation; xxx use tables; support decoding */
873    if(flags & B64_RFC4648URL){
874       char c;
875 
876       for(b64 = out->s; (c = *b64) != '\0'; ++b64)
877          if(c == '+')
878             *b64 = '-';
879          else if(c == '/')
880                *b64 = '_';
881    }
882 jleave:
883    NYD_OU;
884    return out;
885 }
886 
887 FL struct str *
b64_encode_buf(struct str * out,void const * vp,uz vp_len,enum b64flags flags)888 b64_encode_buf(struct str *out, void const *vp, uz vp_len,
889       enum b64flags flags){
890    struct str in;
891    NYD_IN;
892 
893    in.s = n_UNCONST(vp);
894    in.l = vp_len;
895    out = b64_encode(out, &in, flags);
896    NYD_OU;
897    return out;
898 }
899 
900 #ifdef notyet
901 FL struct str *
b64_encode_cp(struct str * out,char const * cp,enum b64flags flags)902 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags){
903    struct str in;
904    NYD_IN;
905 
906    in.s = n_UNCONST(cp);
907    in.l = su_cs_len(cp);
908    out = b64_encode(out, &in, flags);
909    NYD_OU;
910    return out;
911 }
912 #endif /* notyet */
913 
914 FL boole
b64_decode(struct str * out,struct str const * in)915 b64_decode(struct str *out, struct str const *in){
916    struct str work;
917    uz len;
918    NYD_IN;
919 
920    out->l = 0;
921 
922    if((len = a_me_b64_decode_prepare(&work, in)) == UZ_MAX)
923       goto jerr;
924 
925    /* Ignore an empty input, as may happen for an empty final line */
926    if(work.l == 0)
927       out->s = n_realloc(out->s, 1);
928    else if(work.l >= 4 && !(work.l & 3)){
929       out->s = n_realloc(out->s, len +1);
930       if((sz)(len = a_me_b64_decode(out, &work)) < 0)
931          goto jerr;
932    }else
933       goto jerr;
934    out->s[out->l] = '\0';
935 jleave:
936    NYD_OU;
937    return (out != NULL);
938 jerr:
939    out = NULL;
940    goto jleave;
941 }
942 
943 FL boole
b64_decode_header(struct str * out,struct str const * in)944 b64_decode_header(struct str *out, struct str const *in){
945    struct str outr, inr;
946    NYD_IN;
947 
948    if(!b64_decode(out, in)){
949       su_mem_set(&outr, 0, sizeof outr);
950       su_mem_set(&inr, 0, sizeof inr);
951 
952       if(!b64_decode_part(out, in, &outr, &inr) || outr.l > 0 || inr.l > 0)
953          out = NULL;
954 
955       if(inr.s != NULL)
956          n_free(inr.s);
957       if(outr.s != NULL)
958          n_free(outr.s);
959    }
960    NYD_OU;
961    return (out != NULL);
962 }
963 
964 FL boole
b64_decode_part(struct str * out,struct str const * in,struct str * outrest,struct str * inrest_or_null)965 b64_decode_part(struct str *out, struct str const *in, struct str *outrest,
966       struct str *inrest_or_null){
967    struct str work, save;
968    u32 a, b, c, b64l;
969    char ca, cb, cc, cx;
970    struct n_string s, workbuf;
971    uz len;
972    NYD_IN;
973 
974    n_string_creat(&s);
975    if((len = out->l) > 0 && out->s[len] == '\0')
976       (void)n_string_take_ownership(&s, out->s, len +1, len);
977    else{
978       if(len > 0)
979          n_string_push_buf(&s, out->s, len);
980       if(out->s != NULL)
981          n_free(out->s);
982    }
983    out->s = NULL, out->l = 0;
984    n_string_creat(&workbuf);
985 
986    if((len = a_me_b64_decode_prepare(&work, in)) == UZ_MAX)
987       goto jerr;
988 
989    if(outrest->l > 0){
990       n_string_push_buf(&s, outrest->s, outrest->l);
991       outrest->l = 0;
992    }
993 
994    /* su_ERR_OVERFLOW */
995    if(UZ_MAX - len <= s.s_len ||
996          S32_MAX <= len + s.s_len) /* XXX wrong, we may replace */
997       goto jerr;
998 
999    if(work.l == 0)
1000       goto jok;
1001 
1002    /* This text decoder is extremely expensive, especially given that in all
1003     * but _invalid_ cases it is not even needed!  So try once to do the normal
1004     * decoding, if that fails, go the hard way */
1005    save = work;
1006    out->s = n_string_resize(&s, len + (out->l = b64l = s.s_len))->s_dat;
1007 
1008    if(work.l >= 4 && a_me_b64_decode(out, &work) >= 0){
1009       n_string_trunc(&s, out->l);
1010       if(work.l == 0)
1011          goto jok;
1012    }
1013 
1014    n_string_trunc(&s, b64l);
1015    work = save;
1016    out->s = NULL, out->l = 0;
1017 
1018    /* TODO b64_decode_part() does not yet STOP if it sees padding, whereas
1019     * TODO OpenSSL and mutt simply bail on such stuff */
1020    UNINIT(ca, 0); UNINIT(a, 0);
1021    UNINIT(cb, 0); UNINIT(b, 0);
1022    UNINIT(cc, 0); UNINIT(c, 0);
1023    for(b64l = 0;;){
1024       u32 x;
1025 
1026       x = a_ME_B64_DECUI8((u8)(cx = *work.s));
1027       switch(b64l){
1028       case 0:
1029          if(x >= a_ME_B64_EQU)
1030             goto jrepl;
1031          ca = cx;
1032          a = x;
1033          ++b64l;
1034          break;
1035       case 1:
1036          if(x >= a_ME_B64_EQU)
1037             goto jrepl;
1038          cb = cx;
1039          b = x;
1040          ++b64l;
1041          break;
1042       case 2:
1043          if(x == a_ME_B64_BAD)
1044             goto jrepl;
1045          cc = cx;
1046          c = x;
1047          ++b64l;
1048          break;
1049       case 3:
1050          if(x == a_ME_B64_BAD){
1051 jrepl:
1052             /* TODO This would be wrong since iconv(3) may be applied first! */
1053             n_err(_("Invalid base64 encoding ignored\n"));
1054 #if 0
1055             if(n_psonce & n_PSO_UNICODE)
1056                n_string_push_buf(&s, su_utf_8_replacer,
1057                   sizeof(su_utf_8_replacer) -1);
1058             else
1059                n_string_push_c(&s, '?');
1060 #endif
1061             ;
1062          }else if(c == a_ME_B64_EQU && x != a_ME_B64_EQU){
1063             /* This is not only invalid but bogus.  Skip it over! */
1064             /* TODO This would be wrong since iconv(3) may be applied first! */
1065             n_err(_("Illegal base64 encoding ignored\n"));
1066 #if 0
1067             n_string_push_buf(&s, su_UTF_8_REPLACER su_UTF_8_REPLACEMENT
1068                su_UTF_8_REPLACER su_UTF_8_REPLACEMENT,
1069                (sizeof(su_UTF_8_REPLACER) -1) * 4);
1070 #endif
1071             b64l = 0;
1072          }else{
1073             u8 pb;
1074 
1075             pb = ((a << 2) | ((b & 0x30) >> 4));
1076             if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1077                n_string_push_c(&s, (char)pb);
1078             pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
1079             if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1080                n_string_push_c(&s, (char)pb);
1081             if(x != a_ME_B64_EQU){
1082                pb = (((c & 0x03) << 6) | x);
1083                if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1084                   n_string_push_c(&s, (char)pb);
1085             }
1086             ++b64l;
1087          }
1088          break;
1089       }
1090 
1091       ++work.s;
1092       if(--work.l == 0){
1093          if(b64l > 0 && b64l != 4){
1094             if(inrest_or_null == NULL)
1095                goto jerr;
1096             inrest_or_null->s = n_realloc(inrest_or_null->s, b64l +1);
1097             inrest_or_null->s[0] = ca;
1098             if(b64l > 1)
1099                inrest_or_null->s[1] = cb;
1100             if(b64l > 2)
1101                inrest_or_null->s[2] = cc;
1102             inrest_or_null->s[inrest_or_null->l = b64l] = '\0';
1103          }
1104          goto jok;
1105       }
1106       if(b64l == 4)
1107          b64l = 0;
1108    }
1109 
1110 jok:
1111    out->s = n_string_cp(&s);
1112    out->l = s.s_len;
1113    n_string_drop_ownership(&s);
1114 jleave:
1115    n_string_gut(&workbuf);
1116    n_string_gut(&s);
1117    NYD_OU;
1118    return (out != NULL);
1119 jerr:
1120    out = NULL;
1121    goto jleave;
1122 }
1123 
1124 #include "su/code-ou.h"
1125 /* s-it-mode */
1126