1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047;
3 *@ for _header() versions: including "encoded word" as of RFC 2049):
4 *@ - Quoted-Printable, section 6.7
5 *@ - Base64, section 6.8
6 *@ QP quoting and _b64_decode(), b64_encode() inspired from NetBSDs mailx(1):
7 *@ $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $
8 *@ TODO We have no notion of a "current message context" and thus badly log.
9 *@ TODO This is not final yet, v15 will bring "filters".
10 *
11 * Copyright (c) 2012 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12 * SPDX-License-Identifier: ISC
13 *
14 * Permission to use, copy, modify, and/or distribute this software for any
15 * purpose with or without fee is hereby granted, provided that the above
16 * copyright notice and this permission notice appear in all copies.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
19 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
20 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
21 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
24 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
25 */
26 #undef su_FILE
27 #define su_FILE mime_enc
28 #define mx_SOURCE
29
30 #ifndef mx_HAVE_AMALGAMATION
31 # include "mx/nail.h"
32 #endif
33
34 #include <su/cs.h>
35 #include <su/mem.h>
36
37 /* TODO fake */
38 #include "su/code-in.h"
39
40 enum a_me_qact{
41 a_ME_N = 0,
42 a_ME_Q = 1, /* Must quote */
43 a_ME_SP = 2, /* sp */
44 a_ME_XF = 3, /* Special character 'F' - maybe quoted */
45 a_ME_XD = 4, /* Special character '.' - maybe quoted */
46 a_ME_UU = 5, /* In header, _ must be quoted in encoded word */
47 a_ME_US = '_', /* In header, ' ' must be quoted as _ in encoded word */
48 a_ME_QM = '?', /* In header, special character ? not always quoted */
49 a_ME_EQ = '=', /* In header, '=' must be quoted in encoded word */
50 a_ME_HT ='\t', /* Body HT=SP. Head HT=HT, BUT quote in encoded word */
51 a_ME_NL = 0, /* Don't quote '\n' (NL) */
52 a_ME_CR = a_ME_Q /* Always quote a '\r' (CR) */
53 };
54
55 /* Lookup tables to decide whether a character must be encoded or not.
56 * Email header differences according to RFC 2047, section 4.2:
57 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
58 * - don't care about the special ^F[rom] and ^.$ */
59 static u8 const a_me_qp_body[] = {
60 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
61 a_ME_Q, a_ME_SP, a_ME_NL, a_ME_Q, a_ME_Q, a_ME_CR, a_ME_Q, a_ME_Q,
62 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
63 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
64 a_ME_SP, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
65 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_XD, a_ME_N,
66 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
67 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q, a_ME_N, a_ME_N,
68
69 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_XF, a_ME_N,
70 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
71 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
72 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
73 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
74 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
75 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
76 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q,
77 }, a_me_qp_head[] = {
78 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
79 a_ME_Q, a_ME_HT, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
80 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
81 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
82 a_ME_US, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
83 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
84 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
85 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_EQ, a_ME_N, a_ME_QM,
86
87 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
88 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
89 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
90 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_UU,
91 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
92 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
93 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
94 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q,
95 };
96
97 /* The decoding table is only accessed via a_ME_B64_DECUI8() */
98 static char const a_me_b64_enctbl[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
99 "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
100 static signed char const a_me_b64__dectbl[] = {
101 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
102 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
103 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
104 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
105 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
106 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
107 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
108 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
109 };
110 #define a_ME_B64_EQU (u32)-2
111 #define a_ME_B64_BAD (u32)-1
112 #define a_ME_B64_DECUI8(C) \
113 ((u8)(C) >= sizeof(a_me_b64__dectbl)\
114 ? a_ME_B64_BAD : (u32)a_me_b64__dectbl[(u8)(C)])
115
116 /* (Ugly to place an enum here) */
117 static char const a_me_ctes[] = "7bit\0" "8bit\0" \
118 "base64\0" "quoted-printable\0" "binary\0" \
119 /* abbrevs */ "8b\0" "b64\0" "qp\0";
120 enum a_me_ctes_off{
121 a_ME_CTES_7B_OFF = 0, a_ME_CTES_7B_LEN = 4,
122 a_ME_CTES_8B_OFF = 5, a_ME_CTES_8B_LEN = 4,
123 a_ME_CTES_B64_OFF = 10, a_ME_CTES_B64_LEN = 6,
124 a_ME_CTES_QP_OFF = 17, a_ME_CTES_QP_LEN = 16,
125 a_ME_CTES_BIN_OFF = 34, a_ME_CTES_BIN_LEN = 6,
126
127 a_ME_CTES_S8B_OFF = 41, a_ME_CTES_S8B_LEN = 2,
128 a_ME_CTES_SB64_OFF = 44, a_ME_CTES_SB64_LEN = 3,
129 a_ME_CTES_SQP_OFF = 48, a_ME_CTES_SQP_LEN = 2
130 };
131
132 /* Check whether *s must be quoted according to flags, else body rules;
133 * sol indicates whether we are at the first character of a line/field */
134 su_SINLINE enum a_me_qact a_me_mustquote(char const *s, char const *e,
135 boole sol, enum mime_enc_flags flags);
136
137 /* Trim WS and make work point to the decodable range of in.
138 * Return the amount of bytes a b64_decode operation on that buffer requires,
139 * or UZ_MAX on overflow error */
140 static uz a_me_b64_decode_prepare(struct str *work, struct str const *in);
141
142 /* Perform b64_decode on in(put) to sufficiently spaced out(put).
143 * Return number of useful bytes in out or -1 on error.
144 * Note: may enter endless loop if in->l < 4 and 0 return is not handled! */
145 static sz a_me_b64_decode(struct str *out, struct str *in);
146
147 su_SINLINE enum a_me_qact
a_me_mustquote(char const * s,char const * e,boole sol,enum mime_enc_flags flags)148 a_me_mustquote(char const *s, char const *e, boole sol,
149 enum mime_enc_flags flags){
150 u8 const *qtab;
151 enum a_me_qact a, r;
152 NYD2_IN;
153
154 qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
155 ? a_me_qp_head : a_me_qp_body;
156
157 if((u8)*s > 0x7F){
158 r = a_ME_Q;
159 goto jleave;
160 }
161
162 a = qtab[(u8)*s];
163
164 if((r = a) == a_ME_N || a == a_ME_Q)
165 goto jleave;
166
167 r = a_ME_Q;
168
169 /* Special header fields */
170 if(flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)){
171 /* Special massage for encoded words */
172 if(flags & MIMEEF_ISENCWORD){
173 switch(a){
174 case a_ME_HT:
175 case a_ME_US:
176 case a_ME_EQ:
177 r = a;
178 /* FALLTHRU */
179 case a_ME_UU:
180 goto jleave;
181 default:
182 break;
183 }
184 }
185
186 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
187 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
188 * should be hard to match */
189 if(a == a_ME_QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
190 goto jleave;
191 goto jnquote;
192 }
193
194 /* Body-only */
195
196 if(a == a_ME_SP){
197 /* WS only if trailing white space */
198 if(&s[1] == e || s[1] == '\n')
199 goto jleave;
200 goto jnquote;
201 }
202
203 /* Rest are special begin-of-line cases */
204 if(!sol)
205 goto jnquote;
206
207 /* ^From */
208 if(a == a_ME_XF){
209 if(&s[4] < e && s[1] == 'r' && s[2] == 'o' && s[3] == 'm' && s[4] == ' ')
210 goto jleave;
211 goto jnquote;
212 }
213 /* ^.$ */
214 if(a == a_ME_XD && (&s[1] == e || s[1] == '\n'))
215 goto jleave;
216 jnquote:
217 r = 0;
218 jleave:
219 NYD2_OU;
220 return r;
221 }
222
223 static uz
a_me_b64_decode_prepare(struct str * work,struct str const * in)224 a_me_b64_decode_prepare(struct str *work, struct str const *in){
225 uz cp_len;
226 NYD2_IN;
227
228 *work = *in;
229 cp_len = n_str_trim(work, n_STR_TRIM_BOTH)->l;
230
231 if(cp_len > 16){
232 /* su_ERR_OVERFLOW */
233 if(UZ_MAX / 3 <= cp_len){
234 cp_len = UZ_MAX;
235 goto jleave;
236 }
237 cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
238 }
239 cp_len += (2 * 3) +1;
240 jleave:
241 NYD2_OU;
242 return cp_len;
243 }
244
245 static sz
a_me_b64_decode(struct str * out,struct str * in)246 a_me_b64_decode(struct str *out, struct str *in){
247 u8 *p, pb;
248 u8 const *q, *end;
249 sz rv;
250 NYD2_IN;
251
252 rv = -1;
253 p = (u8*)&out->s[out->l];
254 q = (u8 const*)in->s;
255
256 for(end = &q[in->l]; P2UZ(end - q) >= 4; q += 4){
257 u32 a, b, c, d;
258
259 a = a_ME_B64_DECUI8(q[0]);
260 b = a_ME_B64_DECUI8(q[1]);
261 c = a_ME_B64_DECUI8(q[2]);
262 d = a_ME_B64_DECUI8(q[3]);
263
264 if(UNLIKELY(a >= a_ME_B64_EQU || b >= a_ME_B64_EQU ||
265 c == a_ME_B64_BAD || d == a_ME_B64_BAD))
266 goto jleave;
267
268 pb = ((a << 2) | ((b & 0x30) >> 4));
269 if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
270 *p++ = pb;
271
272 if(c == a_ME_B64_EQU){ /* got '=' */
273 q += 4;
274 if(UNLIKELY(d != a_ME_B64_EQU))
275 goto jleave;
276 break;
277 }
278
279 pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
280 if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
281 *p++ = pb;
282
283 if(d == a_ME_B64_EQU) /* got '=' */
284 break;
285 pb = (((c & 0x03) << 6) | d);
286 if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
287 *p++ = pb;
288 }
289 rv ^= rv;
290
291 jleave:{
292 uz i;
293
294 i = P2UZ((char*)p - out->s);
295 out->l = i;
296 if(rv == 0)
297 rv = (sz)i;
298 }
299 in->l -= P2UZ(q - (u8*)in->s);
300 in->s = n_UNCONST(q);
301 NYD2_OU;
302 return rv;
303 }
304
305 FL enum mime_enc
mime_enc_target(void)306 mime_enc_target(void){
307 char const *cp, *v15;
308 enum mime_enc rv;
309 NYD2_IN;
310
311 if((v15 = ok_vlook(encoding)) != NULL)
312 n_OBSOLETE(_("please use *mime-encoding* instead of *encoding*"));
313
314 if((cp = ok_vlook(mime_encoding)) == NULL && (cp = v15) == NULL)
315 rv = MIME_DEFAULT_ENCODING;
316 else if(!su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_S8B_OFF]) ||
317 !su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_8B_OFF]))
318 rv = MIMEE_8B;
319 else if(!su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_SB64_OFF]) ||
320 !su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_B64_OFF]))
321 rv = MIMEE_B64;
322 else if(!su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_SQP_OFF]) ||
323 !su_cs_cmp_case(cp, &a_me_ctes[a_ME_CTES_QP_OFF]))
324 rv = MIMEE_QP;
325 else{
326 n_err(_("Warning: invalid *mime-encoding*, using Base64: %s\n"), cp);
327 rv = MIMEE_B64;
328 }
329 NYD2_OU;
330 return rv;
331 }
332
333 FL enum mime_enc
mime_enc_from_ctehead(char const * hbody)334 mime_enc_from_ctehead(char const *hbody){
335 enum mime_enc rv;
336 NYD2_IN;
337
338 if(hbody == NULL)
339 rv = MIMEE_7B;
340 else{
341 struct{
342 u8 off;
343 u8 len;
344 u8 enc;
345 u8 __dummy;
346 } const *cte, cte_base[] = {
347 {a_ME_CTES_7B_OFF, a_ME_CTES_7B_LEN, MIMEE_7B, 0},
348 {a_ME_CTES_8B_OFF, a_ME_CTES_8B_LEN, MIMEE_8B, 0},
349 {a_ME_CTES_B64_OFF, a_ME_CTES_B64_LEN, MIMEE_B64, 0},
350 {a_ME_CTES_QP_OFF, a_ME_CTES_QP_LEN, MIMEE_QP, 0},
351 {a_ME_CTES_BIN_OFF, a_ME_CTES_BIN_LEN, MIMEE_BIN, 0},
352 {0, 0, MIMEE_NONE, 0}
353 };
354 union {char const *s; uz l;} u;
355
356 if(*hbody == '"')
357 for(u.s = ++hbody; *u.s != '\0' && *u.s != '"'; ++u.s)
358 ;
359 else
360 for(u.s = hbody; *u.s != '\0' && !su_cs_is_white(*u.s); ++u.s)
361 ;
362 u.l = P2UZ(u.s - hbody);
363
364 for(cte = cte_base;;)
365 if(cte->len == u.l && !su_cs_cmp_case(&a_me_ctes[cte->off], hbody)){
366 rv = cte->enc;
367 break;
368 }else if((++cte)->enc == MIMEE_NONE){
369 rv = MIMEE_NONE;
370 break;
371 }
372 }
373 NYD2_OU;
374 return rv;
375 }
376
377 FL char const *
mime_enc_from_conversion(enum conversion const convert)378 mime_enc_from_conversion(enum conversion const convert){
379 char const *rv;
380 NYD2_IN;
381
382 switch(convert){
383 case CONV_7BIT: rv = &a_me_ctes[a_ME_CTES_7B_OFF]; break;
384 case CONV_8BIT: rv = &a_me_ctes[a_ME_CTES_8B_OFF]; break;
385 case CONV_TOQP: rv = &a_me_ctes[a_ME_CTES_QP_OFF]; break;
386 case CONV_TOB64: rv = &a_me_ctes[a_ME_CTES_B64_OFF]; break;
387 case CONV_NONE: rv = &a_me_ctes[a_ME_CTES_BIN_OFF]; break;
388 default: rv = n_empty; break;
389 }
390 NYD2_OU;
391 return rv;
392 }
393
394 FL uz
mime_enc_mustquote(char const * ln,uz lnlen,enum mime_enc_flags flags)395 mime_enc_mustquote(char const *ln, uz lnlen, enum mime_enc_flags flags){
396 uz rv;
397 boole sol;
398 NYD2_IN;
399
400 for(rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
401 switch(a_me_mustquote(ln, ln + lnlen, sol, flags)){
402 case a_ME_US:
403 case a_ME_EQ:
404 case a_ME_HT:
405 ASSERT(flags & MIMEEF_ISENCWORD);
406 /* FALLTHRU */
407 case 0:
408 continue;
409 default:
410 ++rv;
411 }
412 NYD2_OU;
413 return rv;
414 }
415
416 FL uz
qp_encode_calc_size(uz len)417 qp_encode_calc_size(uz len){
418 uz bytes, lines;
419 NYD2_IN;
420
421 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
422 * However, we must be aware that (a) the output may span multiple lines
423 * and (b) the input does not end with a newline itself (nonetheless):
424 * LC_ALL=C awk 'BEGIN{
425 * for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
426 * }' |
427 * s-nail -:/ -dSsendcharsets=utf8 -s testsub no@where */
428
429 /* Several su_ERR_OVERFLOW */
430 if(len >= UZ_MAX / 3){
431 len = UZ_MAX;
432 goto jleave;
433 }
434 bytes = len * 3;
435 lines = bytes / QP_LINESIZE;
436 len += lines;
437
438 if(len >= UZ_MAX / 3){
439 len = UZ_MAX;
440 goto jleave;
441 }
442 /* Trailing hard NL may be missing, so there may be two lines.
443 * Thus add soft + hard NL per line and a trailing NUL */
444 bytes = len * 3;
445 lines = (bytes / QP_LINESIZE) + 1;
446 lines <<= 1;
447 ++bytes;
448 /*if(UZ_MAX - bytes >= lines){
449 len = UZ_MAX;
450 goto jleave;
451 }*/
452 bytes += lines;
453 len = bytes;
454 jleave:
455 NYD2_OU;
456 return len;
457 }
458
459 #ifdef notyet
460 FL struct str *
qp_encode_cp(struct str * out,char const * cp,enum qpflags flags)461 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags){
462 struct str in;
463 NYD_IN;
464
465 in.s = n_UNCONST(cp);
466 in.l = su_cs_len(cp);
467 out = qp_encode(out, &in, flags);
468 NYD_OU;
469 return out;
470 }
471
472 FL struct str *
qp_encode_buf(struct str * out,void const * vp,uz vp_len,enum qpflags flags)473 qp_encode_buf(struct str *out, void const *vp, uz vp_len,
474 enum qpflags flags){
475 struct str in;
476 NYD_IN;
477
478 in.s = n_UNCONST(vp);
479 in.l = vp_len;
480 out = qp_encode(out, &in, flags);
481 NYD_OU;
482 return out;
483 }
484 #endif /* notyet */
485
486 FL struct str *
qp_encode(struct str * out,struct str const * in,enum qpflags flags)487 qp_encode(struct str *out, struct str const *in, enum qpflags flags){
488 uz lnlen;
489 char *qp;
490 char const *is, *ie;
491 boole sol, seenx;
492 NYD_IN;
493
494 sol = (flags & QP_ISHEAD ? FAL0 : TRU1);
495
496 if(!(flags & QP_BUF)){
497 if((lnlen = qp_encode_calc_size(in->l)) == UZ_MAX){
498 out = NULL;
499 goto jerr;
500 }
501 out->s = (flags & QP_SALLOC) ? n_autorec_alloc(lnlen)
502 : n_realloc(out->s, lnlen);
503 }
504 qp = out->s;
505 is = in->s;
506 ie = is + in->l;
507
508 if(flags & QP_ISHEAD){
509 enum mime_enc_flags ef;
510
511 ef = MIMEEF_ISHEAD | (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
512
513 for(seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp){
514 char c;
515 enum a_me_qact mq;
516
517 mq = a_me_mustquote(is, ie, sol, ef);
518 c = *is++;
519
520 if(mq == a_ME_N){
521 /* We convert into a single *encoded-word*, that'll end up in
522 * =?C?Q??=; quote '?' from when we're inside there on */
523 if(seenx && c == '?')
524 goto jheadq;
525 *qp = c;
526 }else if(mq == a_ME_US)
527 *qp = a_ME_US;
528 else{
529 seenx = TRU1;
530 jheadq:
531 *qp++ = '=';
532 qp = n_c_to_hex_base16(qp, c) + 1;
533 }
534 }
535 goto jleave;
536 }
537
538 /* The body needs to take care for soft line breaks etc. */
539 for(lnlen = 0, seenx = FAL0; is < ie; sol = FAL0){
540 char c;
541 enum a_me_qact mq;
542
543 mq = a_me_mustquote(is, ie, sol, MIMEEF_NONE);
544 c = *is++;
545
546 if(mq == a_ME_N && (c != '\n' || !seenx)){
547 *qp++ = c;
548 if(++lnlen < QP_LINESIZE - 1)
549 continue;
550 /* Don't write a soft line break when we're in the last possible
551 * column and either an LF has been written or only an LF follows, as
552 * that'll end the line anyway */
553 /* XXX but - ensure is+1>=ie, then??
554 * xxx and/or - what about resetting lnlen; that contra
555 * xxx dicts input==1 input line ASSERTion, though */
556 if(c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
557 continue;
558 jsoftnl:
559 qp[0] = '=';
560 qp[1] = '\n';
561 qp += 2;
562 lnlen = 0;
563 continue;
564 }
565
566 if(lnlen > QP_LINESIZE - 3 - 1){
567 qp[0] = '=';
568 qp[1] = '\n';
569 qp += 2;
570 lnlen = 0;
571 }
572 *qp++ = '=';
573 qp = n_c_to_hex_base16(qp, c);
574 qp += 2;
575 lnlen += 3;
576 if(c != '\n' || !seenx)
577 seenx = (c == '\r');
578 else{
579 seenx = FAL0;
580 goto jsoftnl;
581 }
582 }
583
584 /* Enforce soft line break if we haven't seen LF */
585 if(in->l > 0 && *--is != '\n'){
586 qp[0] = '=';
587 qp[1] = '\n';
588 qp += 2;
589 }
590 jleave:
591 out->l = P2UZ(qp - out->s);
592 out->s[out->l] = '\0';
593 jerr:
594 NYD_OU;
595 return out;
596 }
597
598 FL boole
qp_decode_header(struct str * out,struct str const * in)599 qp_decode_header(struct str *out, struct str const *in){
600 struct n_string s;
601 char const *is, *ie;
602 NYD_IN;
603
604 /* su_ERR_OVERFLOW */
605 if(UZ_MAX -1 - out->l <= in->l ||
606 S32_MAX <= out->l + in->l){ /* XXX wrong, we may replace */
607 out->l = 0;
608 out = NULL;
609 goto jleave;
610 }
611
612 n_string_creat(&s);
613 n_string_reserve(n_string_take_ownership(&s, out->s,
614 (out->l == 0 ? 0 : out->l +1), out->l),
615 in->l + (in->l >> 2));
616
617 for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
618 s32 c;
619
620 c = *is++;
621 if(c == '='){
622 if(is >= ie){
623 goto jpushc; /* TODO According to RFC 2045, 6.7,
624 * ++is; TODO we should warn the user, but have no context
625 * goto jehead; TODO to do so; can't over and over */
626 }else if((c = n_c_from_hex_base16(is)) >= 0){
627 is += 2;
628 goto jpushc;
629 }else{
630 /* Invalid according to RFC 2045, section 6.7 */
631 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
632 c = '=';
633 goto jpushc;
634 /* TODO jehead:
635 * TODO if(n_psonce & n_PSO_UNICODE)
636 * n_string_push_buf(&s, su_utf_8_replacer,
637 * sizeof(su_utf_8_replacer) -1);
638 * TODO else{
639 * TODO c = '?';
640 * TODO goto jpushc;
641 * TODO }*/
642 }
643 }else{
644 if(c == '_' /* a_ME_US */)
645 c = ' ';
646 jpushc:
647 n_string_push_c(&s, S(char,c));
648 }
649 }
650
651 out->s = n_string_cp(&s);
652 out->l = s.s_len;
653 n_string_gut(n_string_drop_ownership(&s));
654 jleave:
655 NYD_OU;
656 return (out != NULL);
657 }
658
659 FL boole
qp_decode_part(struct str * out,struct str const * in,struct str * outrest,struct str * inrest_or_null)660 qp_decode_part(struct str *out, struct str const *in, struct str *outrest,
661 struct str *inrest_or_null){
662 struct n_string s_b, *s;
663 char const *is, *ie;
664 NYD_IN;
665
666 if(outrest->l != 0){
667 is = out->s;
668 *out = *outrest;
669 outrest->s = n_UNCONST(is);
670 outrest->l = 0;
671 }
672
673 /* su_ERR_OVERFLOW */
674 if(UZ_MAX -1 - out->l <= in->l ||
675 S32_MAX <= out->l + in->l) /* XXX wrong, we may replace */
676 goto jerr;
677
678 s = n_string_creat(&s_b);
679 s = n_string_take_ownership(s, out->s,
680 (out->l == 0 ? 0 : out->l +1), out->l);
681 s = n_string_reserve(s, in->l + (in->l >> 2));
682
683 for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
684 s32 c;
685
686 if((c = *is++) != '='){
687 jpushc:
688 n_string_push_c(s, (char)c);
689 continue;
690 }
691
692 /* RFC 2045, 6.7:
693 * Therefore, when decoding a Quoted-Printable body, any
694 * trailing white space on a line must be deleted, as it will
695 * necessarily have been added by intermediate transport
696 * agents */
697 for(; is <= ie && su_cs_is_blank(*is); ++is)
698 ;
699 if(is >= ie){
700 /* Soft line break? */
701 if(*is == '\n')
702 goto jsoftnl;
703 goto jpushc; /* TODO According to RFC 2045, 6.7,
704 * ++is; TODO we should warn the user, but have no context
705 * goto jebody; TODO to do so; can't over and over */
706 }
707
708 /* Not a soft line break? */
709 if(*is != '\n'){
710 if((c = n_c_from_hex_base16(is)) >= 0){
711 is += 2;
712 goto jpushc;
713 }
714 /* Invalid according to RFC 2045, section 6.7 */
715 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
716 c = '=';
717 goto jpushc;
718 /* TODO jebody:
719 * TODO if(n_psonce & n_PSO_UNICODE)
720 * n_string_push_buf(&s, su_utf_8_replacer,
721 * sizeof(su_utf_8_replacer) -1);
722 * TODO else{
723 * TODO c = '?';
724 * TODO goto jpushc;
725 * TODO }*/
726 }
727
728 /* CRLF line endings are encoded as QP, followed by a soft line break, so
729 * check for this special case, and simply forget we have seen one, so as
730 * not to end up with the entire DOS file in a contiguous buffer */
731 jsoftnl:
732 if(s->s_len > 0 && s->s_dat[s->s_len - 1] == '\n'){
733 #if 0 /* TODO qp_decode_part() we do not normalize CRLF
734 * TODO to LF because for that we would need
735 * TODO to know if we are about to write to
736 * TODO the display or do save the file!
737 * TODO 'hope the MIME/send layer rewrite will
738 * TODO offer the possibility to DTRT */
739 if(s->s_len > 1 && s->s_dat[s->s_len - 2] == '\r')
740 n_string_push_c(n_string_trunc(s, s->s_len - 2), '\n');
741 #endif
742 break;
743 }
744
745 /* C99 */{
746 char *cp;
747 uz l;
748
749 if((l = P2UZ(ie - is)) > 0){
750 if(inrest_or_null == NULL)
751 goto jerr;
752 n_str_assign_buf(inrest_or_null, is, l);
753 }
754 cp = outrest->s;
755 outrest->s = n_string_cp(s);
756 outrest->l = s->s_len;
757 n_string_drop_ownership(s);
758 if(cp != NULL)
759 n_free(cp);
760 }
761 break;
762 }
763
764 out->s = n_string_cp(s);
765 out->l = s->s_len;
766 n_string_gut(n_string_drop_ownership(s));
767 jleave:
768 NYD_OU;
769 return (out != NULL);
770 jerr:
771 out->l = 0;
772 out = NULL;
773 goto jleave;
774 }
775
776 FL uz
b64_encode_calc_size(uz len)777 b64_encode_calc_size(uz len){
778 NYD2_IN;
779 if(len >= UZ_MAX / 4)
780 len = UZ_MAX;
781 else{
782 len = (len * 4) / 3;
783 len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
784 len += 2 + 1; /* CRLF, \0 */
785 }
786 NYD2_OU;
787 return len;
788 }
789
790 FL struct str *
b64_encode(struct str * out,struct str const * in,enum b64flags flags)791 b64_encode(struct str *out, struct str const *in, enum b64flags flags){
792 u8 const *p;
793 uz i, lnlen;
794 char *b64;
795 NYD_IN;
796
797 ASSERT(!(flags & B64_NOPAD) ||
798 !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
799
800 p = (u8 const*)in->s;
801
802 if(!(flags & B64_BUF)){
803 if((i = b64_encode_calc_size(in->l)) == UZ_MAX){
804 out = NULL;
805 goto jleave;
806 }
807 out->s = (flags & B64_SALLOC) ? n_autorec_alloc(i)
808 : n_realloc(out->s, i);
809 }
810 b64 = out->s;
811
812 if(!(flags & (B64_CRLF | B64_LF)))
813 flags &= ~B64_MULTILINE;
814
815 for(lnlen = 0, i = in->l; (sz)i > 0; p += 3, i -= 3){
816 u32 a, b, c;
817
818 a = p[0];
819 b64[0] = a_me_b64_enctbl[a >> 2];
820
821 switch(i){
822 case 1:
823 b64[1] = a_me_b64_enctbl[((a & 0x3) << 4)];
824 b64[2] =
825 b64[3] = '=';
826 break;
827 case 2:
828 b = p[1];
829 b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
830 b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2)];
831 b64[3] = '=';
832 break;
833 default:
834 b = p[1];
835 c = p[2];
836 b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
837 b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
838 b64[3] = a_me_b64_enctbl[c & 0x3F];
839 break;
840 }
841
842 b64 += 4;
843 if(!(flags & B64_MULTILINE))
844 continue;
845 lnlen += 4;
846 if(lnlen < B64_LINESIZE)
847 continue;
848
849 lnlen = 0;
850 if(flags & B64_CRLF)
851 *b64++ = '\r';
852 if(flags & (B64_CRLF | B64_LF))
853 *b64++ = '\n';
854 }
855
856 if((flags & (B64_CRLF | B64_LF)) &&
857 (!(flags & B64_MULTILINE) || lnlen != 0)){
858 if(flags & B64_CRLF)
859 *b64++ = '\r';
860 if(flags & (B64_CRLF | B64_LF))
861 *b64++ = '\n';
862 }else if(flags & B64_NOPAD)
863 while(b64 != out->s && b64[-1] == '=')
864 --b64;
865
866 out->l = P2UZ(b64 - out->s);
867 out->s[out->l] = '\0';
868
869 /* Base64 includes + and /, replace them with _ and -.
870 * This is base64url according to RFC 4648, then. Since we only support
871 * that for encoding and it is only used for boundary strings, this is
872 * yet a primitive implementation; xxx use tables; support decoding */
873 if(flags & B64_RFC4648URL){
874 char c;
875
876 for(b64 = out->s; (c = *b64) != '\0'; ++b64)
877 if(c == '+')
878 *b64 = '-';
879 else if(c == '/')
880 *b64 = '_';
881 }
882 jleave:
883 NYD_OU;
884 return out;
885 }
886
887 FL struct str *
b64_encode_buf(struct str * out,void const * vp,uz vp_len,enum b64flags flags)888 b64_encode_buf(struct str *out, void const *vp, uz vp_len,
889 enum b64flags flags){
890 struct str in;
891 NYD_IN;
892
893 in.s = n_UNCONST(vp);
894 in.l = vp_len;
895 out = b64_encode(out, &in, flags);
896 NYD_OU;
897 return out;
898 }
899
900 #ifdef notyet
901 FL struct str *
b64_encode_cp(struct str * out,char const * cp,enum b64flags flags)902 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags){
903 struct str in;
904 NYD_IN;
905
906 in.s = n_UNCONST(cp);
907 in.l = su_cs_len(cp);
908 out = b64_encode(out, &in, flags);
909 NYD_OU;
910 return out;
911 }
912 #endif /* notyet */
913
914 FL boole
b64_decode(struct str * out,struct str const * in)915 b64_decode(struct str *out, struct str const *in){
916 struct str work;
917 uz len;
918 NYD_IN;
919
920 out->l = 0;
921
922 if((len = a_me_b64_decode_prepare(&work, in)) == UZ_MAX)
923 goto jerr;
924
925 /* Ignore an empty input, as may happen for an empty final line */
926 if(work.l == 0)
927 out->s = n_realloc(out->s, 1);
928 else if(work.l >= 4 && !(work.l & 3)){
929 out->s = n_realloc(out->s, len +1);
930 if((sz)(len = a_me_b64_decode(out, &work)) < 0)
931 goto jerr;
932 }else
933 goto jerr;
934 out->s[out->l] = '\0';
935 jleave:
936 NYD_OU;
937 return (out != NULL);
938 jerr:
939 out = NULL;
940 goto jleave;
941 }
942
943 FL boole
b64_decode_header(struct str * out,struct str const * in)944 b64_decode_header(struct str *out, struct str const *in){
945 struct str outr, inr;
946 NYD_IN;
947
948 if(!b64_decode(out, in)){
949 su_mem_set(&outr, 0, sizeof outr);
950 su_mem_set(&inr, 0, sizeof inr);
951
952 if(!b64_decode_part(out, in, &outr, &inr) || outr.l > 0 || inr.l > 0)
953 out = NULL;
954
955 if(inr.s != NULL)
956 n_free(inr.s);
957 if(outr.s != NULL)
958 n_free(outr.s);
959 }
960 NYD_OU;
961 return (out != NULL);
962 }
963
964 FL boole
b64_decode_part(struct str * out,struct str const * in,struct str * outrest,struct str * inrest_or_null)965 b64_decode_part(struct str *out, struct str const *in, struct str *outrest,
966 struct str *inrest_or_null){
967 struct str work, save;
968 u32 a, b, c, b64l;
969 char ca, cb, cc, cx;
970 struct n_string s, workbuf;
971 uz len;
972 NYD_IN;
973
974 n_string_creat(&s);
975 if((len = out->l) > 0 && out->s[len] == '\0')
976 (void)n_string_take_ownership(&s, out->s, len +1, len);
977 else{
978 if(len > 0)
979 n_string_push_buf(&s, out->s, len);
980 if(out->s != NULL)
981 n_free(out->s);
982 }
983 out->s = NULL, out->l = 0;
984 n_string_creat(&workbuf);
985
986 if((len = a_me_b64_decode_prepare(&work, in)) == UZ_MAX)
987 goto jerr;
988
989 if(outrest->l > 0){
990 n_string_push_buf(&s, outrest->s, outrest->l);
991 outrest->l = 0;
992 }
993
994 /* su_ERR_OVERFLOW */
995 if(UZ_MAX - len <= s.s_len ||
996 S32_MAX <= len + s.s_len) /* XXX wrong, we may replace */
997 goto jerr;
998
999 if(work.l == 0)
1000 goto jok;
1001
1002 /* This text decoder is extremely expensive, especially given that in all
1003 * but _invalid_ cases it is not even needed! So try once to do the normal
1004 * decoding, if that fails, go the hard way */
1005 save = work;
1006 out->s = n_string_resize(&s, len + (out->l = b64l = s.s_len))->s_dat;
1007
1008 if(work.l >= 4 && a_me_b64_decode(out, &work) >= 0){
1009 n_string_trunc(&s, out->l);
1010 if(work.l == 0)
1011 goto jok;
1012 }
1013
1014 n_string_trunc(&s, b64l);
1015 work = save;
1016 out->s = NULL, out->l = 0;
1017
1018 /* TODO b64_decode_part() does not yet STOP if it sees padding, whereas
1019 * TODO OpenSSL and mutt simply bail on such stuff */
1020 UNINIT(ca, 0); UNINIT(a, 0);
1021 UNINIT(cb, 0); UNINIT(b, 0);
1022 UNINIT(cc, 0); UNINIT(c, 0);
1023 for(b64l = 0;;){
1024 u32 x;
1025
1026 x = a_ME_B64_DECUI8((u8)(cx = *work.s));
1027 switch(b64l){
1028 case 0:
1029 if(x >= a_ME_B64_EQU)
1030 goto jrepl;
1031 ca = cx;
1032 a = x;
1033 ++b64l;
1034 break;
1035 case 1:
1036 if(x >= a_ME_B64_EQU)
1037 goto jrepl;
1038 cb = cx;
1039 b = x;
1040 ++b64l;
1041 break;
1042 case 2:
1043 if(x == a_ME_B64_BAD)
1044 goto jrepl;
1045 cc = cx;
1046 c = x;
1047 ++b64l;
1048 break;
1049 case 3:
1050 if(x == a_ME_B64_BAD){
1051 jrepl:
1052 /* TODO This would be wrong since iconv(3) may be applied first! */
1053 n_err(_("Invalid base64 encoding ignored\n"));
1054 #if 0
1055 if(n_psonce & n_PSO_UNICODE)
1056 n_string_push_buf(&s, su_utf_8_replacer,
1057 sizeof(su_utf_8_replacer) -1);
1058 else
1059 n_string_push_c(&s, '?');
1060 #endif
1061 ;
1062 }else if(c == a_ME_B64_EQU && x != a_ME_B64_EQU){
1063 /* This is not only invalid but bogus. Skip it over! */
1064 /* TODO This would be wrong since iconv(3) may be applied first! */
1065 n_err(_("Illegal base64 encoding ignored\n"));
1066 #if 0
1067 n_string_push_buf(&s, su_UTF_8_REPLACER su_UTF_8_REPLACEMENT
1068 su_UTF_8_REPLACER su_UTF_8_REPLACEMENT,
1069 (sizeof(su_UTF_8_REPLACER) -1) * 4);
1070 #endif
1071 b64l = 0;
1072 }else{
1073 u8 pb;
1074
1075 pb = ((a << 2) | ((b & 0x30) >> 4));
1076 if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1077 n_string_push_c(&s, (char)pb);
1078 pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
1079 if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1080 n_string_push_c(&s, (char)pb);
1081 if(x != a_ME_B64_EQU){
1082 pb = (((c & 0x03) << 6) | x);
1083 if(pb != (u8)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1084 n_string_push_c(&s, (char)pb);
1085 }
1086 ++b64l;
1087 }
1088 break;
1089 }
1090
1091 ++work.s;
1092 if(--work.l == 0){
1093 if(b64l > 0 && b64l != 4){
1094 if(inrest_or_null == NULL)
1095 goto jerr;
1096 inrest_or_null->s = n_realloc(inrest_or_null->s, b64l +1);
1097 inrest_or_null->s[0] = ca;
1098 if(b64l > 1)
1099 inrest_or_null->s[1] = cb;
1100 if(b64l > 2)
1101 inrest_or_null->s[2] = cc;
1102 inrest_or_null->s[inrest_or_null->l = b64l] = '\0';
1103 }
1104 goto jok;
1105 }
1106 if(b64l == 4)
1107 b64l = 0;
1108 }
1109
1110 jok:
1111 out->s = n_string_cp(&s);
1112 out->l = s.s_len;
1113 n_string_drop_ownership(&s);
1114 jleave:
1115 n_string_gut(&workbuf);
1116 n_string_gut(&s);
1117 NYD_OU;
1118 return (out != NULL);
1119 jerr:
1120 out = NULL;
1121 goto jleave;
1122 }
1123
1124 #include "su/code-ou.h"
1125 /* s-it-mode */
1126