1 /* This file is part of Mailfromd.
2    Copyright (C) 2020-2021 Sergey Poznyakoff
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20 #include <stdlib.h>
21 #include <string.h>
22 #include <mailutils/errno.h>
23 #include <mailutils/filter.h>
24 #include <mailutils/cctype.h>
25 
26 /*
27  * This file defines a mailutils filter implementing the two DKIM
28  * canonicalization algrorithms: "simple" and "relaxed" (see
29  * RFC 6376, 3.4.  "Canonicalization" (page 13).
30  *
31  * The filter reads a LF-delimited message stream from its input
32  * and outpus a canonicalized message, with header and body parts
33  * processed using the selected algorithm.
34  *
35  * For further processing the output stream must be passed through a
36  * CRLF encoder.
37  *
38  * The filter is governed by the following structure:
39  */
40 
41 struct encoder_state {
42 	int canon[2];   /* Canonicalization algorithm.  0 - for headers,
43 			   1 - for body.  See the DKIM_CANON_ constants
44 			   in dkim.h */
45 	int state;      /* Encoder state (see below) */
46 	size_t nlcount; /* Number of contiguous empty lines for "relaxed"
47 			   body algorithm. */
48 
49 };
50 
51 /* Enoder states: */
52 enum {
53 	/* Header "simple" canonicalization states */
54 	HS_INIT,
55 	HS_NL,
56 	/* Header "relaxed" canonicalization states */
57 	HR_NAME,
58 	HR_SPACE,
59 	HR_COLON,
60 	HR_VALUE,
61 	HR_WS,
62 	HR_NL,
63 	/* Delimiter */
64 	ST_DELIM,
65 	/* Body "simple" canonicalization states */
66 	BS_INIT,
67 	BS_NL,
68 	/* Body "relaxed" canonicalization states */
69 	BR_INIT,
70 	BR_WS,
71 	BR_NL
72 };
73 
74 /* Initial states states */
75 static int init_state[][2] = {
76 	{ HS_INIT, HR_NAME },
77 	{ BS_INIT, BR_INIT }
78 };
79 
80 static enum mu_filter_result
dkim_canonicalizer(void * xd,enum mu_filter_command cmd,struct mu_filter_io * iobuf)81 dkim_canonicalizer(void *xd,
82 		   enum mu_filter_command cmd,
83 		   struct mu_filter_io *iobuf)
84 {
85 	struct encoder_state *encoder = xd;
86 	const char *iptr, *iendptr;
87 	char *optr, *oendptr;
88 
89 	switch (cmd) {
90 	case mu_filter_init:
91 		encoder->state = init_state[0][encoder->canon[0]];
92 	case mu_filter_done:
93 		return mu_filter_ok;
94 	default:
95 		break;
96 	}
97 
98 	iptr = iobuf->input;
99 	iendptr = iptr + iobuf->isize;
100 	optr = iobuf->output;
101 	oendptr = optr + iobuf->osize;
102 
103 	while (iptr < iendptr && optr < oendptr) {
104 		switch (encoder->state) {
105 			/* Header "simple" canonicalization */
106 		case HS_INIT:
107 			if (*iptr == '\n')
108 				encoder->state = HS_NL;
109 			else
110 				*optr++ = *iptr;
111 			iptr++;
112 			break;
113 
114 		case HS_NL:
115 			if (*iptr == '\n') {
116 				encoder->state = ST_DELIM;
117 			} else {
118 				*optr++ = '\n';
119 				encoder->state = HS_INIT;
120 			}
121 			break;
122 
123 			/* Header "relaxed" canonicalization */
124 		case HR_NAME:
125 			if (mu_isblank(*iptr)) {
126 				iptr++;
127 				encoder->state = HR_SPACE;
128 			} else if (*iptr == ':') {
129 				*optr++ = *iptr++;
130 				encoder->state = HR_COLON;
131 			} else if (mu_isheadr(*iptr) ||
132 				   /*
133 				    * Work around the bug in mailutils 3.9:
134 				    * the MU_CTYPE_HEADR class did not include
135 				    * underscore.
136 				    */
137 				   *iptr == '_') {
138 				*optr++ = mu_tolower(*iptr++);
139 			} else {
140 				iobuf->errcode = MU_ERR_USER0;
141 				return mu_filter_failure;
142 			}
143 			break;
144 
145 		case HR_SPACE:
146 			if (mu_isblank(*iptr))
147 				iptr++;
148 			else if (*iptr == ':') {
149 				*optr++ = *iptr++;
150 				encoder->state = HR_COLON;
151 			} else {
152 				iobuf->errcode = MU_ERR_USER0;
153 				return mu_filter_failure;
154 			}
155 			break;
156 
157 		case HR_COLON:
158 			if (mu_isblank(*iptr))
159 				iptr++;
160 			else
161 				encoder->state = HR_VALUE;
162 			break;
163 
164 		case HR_VALUE:
165 			if (mu_isblank(*iptr)) {
166 				iptr++;
167 				encoder->state = HR_WS;
168 			} else if (*iptr == '\n') {
169 				iptr++;
170 				encoder->state = HR_NL;
171 			} else
172 				*optr++ = *iptr++;
173 			break;
174 
175 		case HR_WS:
176 			if (mu_isblank(*iptr))
177 				iptr++;
178 			else if (*iptr == '\n') {
179 				iptr++;
180 				encoder->state = HR_NL;
181 			} else {
182 				*optr++ = ' ';
183 				encoder->state = HR_VALUE;
184 			}
185 			break;
186 
187 		case HR_NL:
188 			if (*iptr == '\n') {
189 				encoder->state = ST_DELIM;
190 			} else if (mu_isblank(*iptr)) {
191 				iptr++;
192 				encoder->state = HR_WS;
193 			} else {
194 				*optr++ = '\n';
195 				encoder->state = HR_NAME;
196 			}
197 			break;
198 
199 			/* Delimiter between header and body */
200 		case ST_DELIM:
201 			if (oendptr - optr < 2)
202 				goto end;
203 			iptr++;
204 			*optr++ = '\n';
205 			*optr++ = '\n';
206 			encoder->state = init_state[1][encoder->canon[1]];
207 			break;
208 
209 			/* Body "simple" canonicalization */
210 		case BS_INIT:
211 			if (*iptr == '\n') {
212 				iptr++;
213 				encoder->nlcount++;
214 				encoder->state = BS_NL;
215 			} else
216 				*optr++ = *iptr++;
217 			break;
218 
219 		case BS_NL:
220 			if (*iptr == '\n') {
221 				iptr++;
222 				encoder->nlcount++;
223 			} else {
224 				for (; encoder->nlcount; encoder->nlcount--) {
225 					if (optr == oendptr)
226 						goto end;
227 					*optr++ = '\n';
228 				}
229 				encoder->state = BS_INIT;
230 			}
231 			break;
232 
233 			/* Body "relaxed" canonicalization */
234 		case BR_INIT:
235 			if (*iptr == '\n') {
236 				encoder->nlcount++;
237 				encoder->state = BR_NL;
238 			} else if (mu_isblank (*iptr))
239 				encoder->state = BR_WS;
240 			else
241 				*optr++ = *iptr;
242 			iptr++;
243 			break;
244 
245 		case BR_WS:
246 			if (*iptr == '\n') {
247 				iptr++;
248 				encoder->nlcount++;
249 				encoder->state = BR_NL;
250 			} else if (!mu_isblank(*iptr)) {
251 				*optr++ = ' ';
252 				encoder->state = BR_INIT;
253 			} else {
254 				iptr++;
255 			}
256 			break;
257 
258 		case BR_NL:
259 			if (*iptr == '\n') {
260 				iptr++;
261 				encoder->nlcount++;
262 			} else {
263 				for (; encoder->nlcount; encoder->nlcount--) {
264 					if (optr == oendptr)
265 						goto end;
266 					*optr++ = '\n';
267 				}
268 				encoder->state = BR_INIT;
269 			}
270 			break;
271 		}
272 	}
273 
274 	if (cmd == mu_filter_lastbuf && iptr == iendptr &&
275 	    encoder->state >= BS_INIT) {
276 		if (oendptr == optr) {
277 			iobuf->osize++;
278 			return mu_filter_moreoutput;
279 		}
280 		*optr++ = '\n';
281 	}
282 
283 end:
284 	iobuf->isize = iptr - iobuf->input;
285 	iobuf->osize = optr - iobuf->output;
286 
287 	return mu_filter_ok;
288 }
289 
290 /*
291  * Create a message canonicalizer.  Arguments:
292  *
293  * pstream       return pointer
294  * stream        input stream
295  * canon_header  header canonicalization algorithm
296  * canon_body    body canonicalization algorithm
297  *
298  * Return value: mailutils error code.
299  */
300 int
dkim_canonicalizer_create(mu_stream_t * pstream,mu_stream_t stream,int canon_header,int canon_body,int flags)301 dkim_canonicalizer_create(mu_stream_t *pstream,
302 			  mu_stream_t stream,
303 			  int canon_header,
304 			  int canon_body,
305 			  int flags)
306 {
307 	struct encoder_state *encoder = malloc(sizeof (*encoder));
308 	if (!encoder)
309 		return ENOMEM;
310 	memset(encoder, 0, sizeof(*encoder));
311 	encoder->canon[0] = canon_header;
312 	encoder->canon[1] = canon_body;
313 	return mu_filter_stream_create(pstream, stream,
314 				       MU_FILTER_ENCODE,
315 				       dkim_canonicalizer, encoder, flags);
316 }
317 
318 #if 0
319 /*
320  * The canonicalizer can be registered as a regular mailutils filter.
321  * If such approach is ever needed, uncomment this block.
322  */
323 
324 static int
325 alloc_state(void **pret, int mode, int argc, const char **argv)
326 {
327 	struct encoder_state *encoder;
328 	switch (mode) {
329 	case MU_FILTER_ENCODE:
330 		encoder = malloc(sizeof(*encoder));
331 		if (!encoder)
332 			return ENOMEM;
333 		memset(encoder, 0, sizeof(*encoder));
334 		*pret = encoder;
335 		break;
336 
337 	case MU_FILTER_DECODE:;
338 	}
339 	return 0;
340 }
341 
342 static struct _mu_filter_record dkim_canonicalize_filter_s = {
343 	"DKIM",
344 	alloc_state,
345 	dkim_canonicalizer,
346 	NULL
347 };
348 
349 mu_filter_record_t dkim_canonicalize_filter = &dkim_canonicalize_filter_s;
350 #endif
351