1 /* This file is part of Mailfromd.
2 Copyright (C) 2020-2021 Sergey Poznyakoff
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20 #include <stdlib.h>
21 #include <string.h>
22 #include <mailutils/errno.h>
23 #include <mailutils/filter.h>
24 #include <mailutils/cctype.h>
25
26 /*
27 * This file defines a mailutils filter implementing the two DKIM
28 * canonicalization algrorithms: "simple" and "relaxed" (see
29 * RFC 6376, 3.4. "Canonicalization" (page 13).
30 *
31 * The filter reads a LF-delimited message stream from its input
32 * and outpus a canonicalized message, with header and body parts
33 * processed using the selected algorithm.
34 *
35 * For further processing the output stream must be passed through a
36 * CRLF encoder.
37 *
38 * The filter is governed by the following structure:
39 */
40
41 struct encoder_state {
42 int canon[2]; /* Canonicalization algorithm. 0 - for headers,
43 1 - for body. See the DKIM_CANON_ constants
44 in dkim.h */
45 int state; /* Encoder state (see below) */
46 size_t nlcount; /* Number of contiguous empty lines for "relaxed"
47 body algorithm. */
48
49 };
50
51 /* Enoder states: */
52 enum {
53 /* Header "simple" canonicalization states */
54 HS_INIT,
55 HS_NL,
56 /* Header "relaxed" canonicalization states */
57 HR_NAME,
58 HR_SPACE,
59 HR_COLON,
60 HR_VALUE,
61 HR_WS,
62 HR_NL,
63 /* Delimiter */
64 ST_DELIM,
65 /* Body "simple" canonicalization states */
66 BS_INIT,
67 BS_NL,
68 /* Body "relaxed" canonicalization states */
69 BR_INIT,
70 BR_WS,
71 BR_NL
72 };
73
74 /* Initial states states */
75 static int init_state[][2] = {
76 { HS_INIT, HR_NAME },
77 { BS_INIT, BR_INIT }
78 };
79
80 static enum mu_filter_result
dkim_canonicalizer(void * xd,enum mu_filter_command cmd,struct mu_filter_io * iobuf)81 dkim_canonicalizer(void *xd,
82 enum mu_filter_command cmd,
83 struct mu_filter_io *iobuf)
84 {
85 struct encoder_state *encoder = xd;
86 const char *iptr, *iendptr;
87 char *optr, *oendptr;
88
89 switch (cmd) {
90 case mu_filter_init:
91 encoder->state = init_state[0][encoder->canon[0]];
92 case mu_filter_done:
93 return mu_filter_ok;
94 default:
95 break;
96 }
97
98 iptr = iobuf->input;
99 iendptr = iptr + iobuf->isize;
100 optr = iobuf->output;
101 oendptr = optr + iobuf->osize;
102
103 while (iptr < iendptr && optr < oendptr) {
104 switch (encoder->state) {
105 /* Header "simple" canonicalization */
106 case HS_INIT:
107 if (*iptr == '\n')
108 encoder->state = HS_NL;
109 else
110 *optr++ = *iptr;
111 iptr++;
112 break;
113
114 case HS_NL:
115 if (*iptr == '\n') {
116 encoder->state = ST_DELIM;
117 } else {
118 *optr++ = '\n';
119 encoder->state = HS_INIT;
120 }
121 break;
122
123 /* Header "relaxed" canonicalization */
124 case HR_NAME:
125 if (mu_isblank(*iptr)) {
126 iptr++;
127 encoder->state = HR_SPACE;
128 } else if (*iptr == ':') {
129 *optr++ = *iptr++;
130 encoder->state = HR_COLON;
131 } else if (mu_isheadr(*iptr) ||
132 /*
133 * Work around the bug in mailutils 3.9:
134 * the MU_CTYPE_HEADR class did not include
135 * underscore.
136 */
137 *iptr == '_') {
138 *optr++ = mu_tolower(*iptr++);
139 } else {
140 iobuf->errcode = MU_ERR_USER0;
141 return mu_filter_failure;
142 }
143 break;
144
145 case HR_SPACE:
146 if (mu_isblank(*iptr))
147 iptr++;
148 else if (*iptr == ':') {
149 *optr++ = *iptr++;
150 encoder->state = HR_COLON;
151 } else {
152 iobuf->errcode = MU_ERR_USER0;
153 return mu_filter_failure;
154 }
155 break;
156
157 case HR_COLON:
158 if (mu_isblank(*iptr))
159 iptr++;
160 else
161 encoder->state = HR_VALUE;
162 break;
163
164 case HR_VALUE:
165 if (mu_isblank(*iptr)) {
166 iptr++;
167 encoder->state = HR_WS;
168 } else if (*iptr == '\n') {
169 iptr++;
170 encoder->state = HR_NL;
171 } else
172 *optr++ = *iptr++;
173 break;
174
175 case HR_WS:
176 if (mu_isblank(*iptr))
177 iptr++;
178 else if (*iptr == '\n') {
179 iptr++;
180 encoder->state = HR_NL;
181 } else {
182 *optr++ = ' ';
183 encoder->state = HR_VALUE;
184 }
185 break;
186
187 case HR_NL:
188 if (*iptr == '\n') {
189 encoder->state = ST_DELIM;
190 } else if (mu_isblank(*iptr)) {
191 iptr++;
192 encoder->state = HR_WS;
193 } else {
194 *optr++ = '\n';
195 encoder->state = HR_NAME;
196 }
197 break;
198
199 /* Delimiter between header and body */
200 case ST_DELIM:
201 if (oendptr - optr < 2)
202 goto end;
203 iptr++;
204 *optr++ = '\n';
205 *optr++ = '\n';
206 encoder->state = init_state[1][encoder->canon[1]];
207 break;
208
209 /* Body "simple" canonicalization */
210 case BS_INIT:
211 if (*iptr == '\n') {
212 iptr++;
213 encoder->nlcount++;
214 encoder->state = BS_NL;
215 } else
216 *optr++ = *iptr++;
217 break;
218
219 case BS_NL:
220 if (*iptr == '\n') {
221 iptr++;
222 encoder->nlcount++;
223 } else {
224 for (; encoder->nlcount; encoder->nlcount--) {
225 if (optr == oendptr)
226 goto end;
227 *optr++ = '\n';
228 }
229 encoder->state = BS_INIT;
230 }
231 break;
232
233 /* Body "relaxed" canonicalization */
234 case BR_INIT:
235 if (*iptr == '\n') {
236 encoder->nlcount++;
237 encoder->state = BR_NL;
238 } else if (mu_isblank (*iptr))
239 encoder->state = BR_WS;
240 else
241 *optr++ = *iptr;
242 iptr++;
243 break;
244
245 case BR_WS:
246 if (*iptr == '\n') {
247 iptr++;
248 encoder->nlcount++;
249 encoder->state = BR_NL;
250 } else if (!mu_isblank(*iptr)) {
251 *optr++ = ' ';
252 encoder->state = BR_INIT;
253 } else {
254 iptr++;
255 }
256 break;
257
258 case BR_NL:
259 if (*iptr == '\n') {
260 iptr++;
261 encoder->nlcount++;
262 } else {
263 for (; encoder->nlcount; encoder->nlcount--) {
264 if (optr == oendptr)
265 goto end;
266 *optr++ = '\n';
267 }
268 encoder->state = BR_INIT;
269 }
270 break;
271 }
272 }
273
274 if (cmd == mu_filter_lastbuf && iptr == iendptr &&
275 encoder->state >= BS_INIT) {
276 if (oendptr == optr) {
277 iobuf->osize++;
278 return mu_filter_moreoutput;
279 }
280 *optr++ = '\n';
281 }
282
283 end:
284 iobuf->isize = iptr - iobuf->input;
285 iobuf->osize = optr - iobuf->output;
286
287 return mu_filter_ok;
288 }
289
290 /*
291 * Create a message canonicalizer. Arguments:
292 *
293 * pstream return pointer
294 * stream input stream
295 * canon_header header canonicalization algorithm
296 * canon_body body canonicalization algorithm
297 *
298 * Return value: mailutils error code.
299 */
300 int
dkim_canonicalizer_create(mu_stream_t * pstream,mu_stream_t stream,int canon_header,int canon_body,int flags)301 dkim_canonicalizer_create(mu_stream_t *pstream,
302 mu_stream_t stream,
303 int canon_header,
304 int canon_body,
305 int flags)
306 {
307 struct encoder_state *encoder = malloc(sizeof (*encoder));
308 if (!encoder)
309 return ENOMEM;
310 memset(encoder, 0, sizeof(*encoder));
311 encoder->canon[0] = canon_header;
312 encoder->canon[1] = canon_body;
313 return mu_filter_stream_create(pstream, stream,
314 MU_FILTER_ENCODE,
315 dkim_canonicalizer, encoder, flags);
316 }
317
318 #if 0
319 /*
320 * The canonicalizer can be registered as a regular mailutils filter.
321 * If such approach is ever needed, uncomment this block.
322 */
323
324 static int
325 alloc_state(void **pret, int mode, int argc, const char **argv)
326 {
327 struct encoder_state *encoder;
328 switch (mode) {
329 case MU_FILTER_ENCODE:
330 encoder = malloc(sizeof(*encoder));
331 if (!encoder)
332 return ENOMEM;
333 memset(encoder, 0, sizeof(*encoder));
334 *pret = encoder;
335 break;
336
337 case MU_FILTER_DECODE:;
338 }
339 return 0;
340 }
341
342 static struct _mu_filter_record dkim_canonicalize_filter_s = {
343 "DKIM",
344 alloc_state,
345 dkim_canonicalizer,
346 NULL
347 };
348
349 mu_filter_record_t dkim_canonicalize_filter = &dkim_canonicalize_filter_s;
350 #endif
351