1 /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
2 
3 #include "lib.h"
4 #include "buffer.h"
5 #include "istream.h"
6 #include "str.h"
7 #include "strfuncs.h"
8 #include "unichar.h"
9 #include "message-size.h"
10 #include "message-header-parser.h"
11 
12 /* RFC 5322 2.1.1 and 2.2 */
13 #define MESSAGE_HEADER_NAME_MAX_LEN 1000
14 
15 struct message_header_parser_ctx {
16 	struct message_header_line line;
17 
18 	struct istream *input;
19 	struct message_size *hdr_size;
20 
21 	string_t *name;
22 	buffer_t *value_buf;
23 
24 	enum message_header_parser_flags flags;
25 	bool skip_line:1;
26 	bool has_nuls:1;
27 };
28 
29 struct message_header_parser_ctx *
message_parse_header_init(struct istream * input,struct message_size * hdr_size,enum message_header_parser_flags flags)30 message_parse_header_init(struct istream *input, struct message_size *hdr_size,
31 			  enum message_header_parser_flags flags)
32 {
33 	struct message_header_parser_ctx *ctx;
34 
35 	ctx = i_new(struct message_header_parser_ctx, 1);
36 	ctx->input = input;
37 	ctx->hdr_size = hdr_size;
38 	ctx->name = str_new(default_pool, 128);
39 	ctx->flags = flags;
40 	ctx->value_buf = buffer_create_dynamic(default_pool, 4096);
41 	i_stream_ref(input);
42 
43 	if (hdr_size != NULL)
44 		i_zero(hdr_size);
45 	return ctx;
46 }
47 
message_parse_header_deinit(struct message_header_parser_ctx ** _ctx)48 void message_parse_header_deinit(struct message_header_parser_ctx **_ctx)
49 {
50 	struct message_header_parser_ctx *ctx = *_ctx;
51 
52 	i_stream_unref(&ctx->input);
53 	buffer_free(&ctx->value_buf);
54 	str_free(&ctx->name);
55 	i_free(ctx);
56 
57 	*_ctx = NULL;
58 }
59 
message_parse_header_next(struct message_header_parser_ctx * ctx,struct message_header_line ** hdr_r)60 int message_parse_header_next(struct message_header_parser_ctx *ctx,
61 			      struct message_header_line **hdr_r)
62 {
63         struct message_header_line *line = &ctx->line;
64 	const unsigned char *msg;
65 	size_t i, size, startpos, colon_pos, parse_size, skip = 0;
66 	int ret;
67 	bool continued, continues, last_no_newline, last_crlf;
68 	bool no_newline, crlf_newline;
69 
70 	*hdr_r = NULL;
71 	if (line->eoh)
72 		return -1;
73 
74 	if (line->continues)
75 		colon_pos = 0;
76 	else {
77 		/* new header line */
78 		line->name_offset = ctx->input->v_offset;
79 		colon_pos = UINT_MAX;
80 		buffer_set_used_size(ctx->value_buf, 0);
81 	}
82 
83 	no_newline = FALSE;
84 	crlf_newline = FALSE;
85 	continued = line->continues;
86 	continues = FALSE;
87 
88 	for (startpos = 0;;) {
89 		ret = i_stream_read_bytes(ctx->input, &msg, &size, startpos+2);
90 		if (ret >= 0) {
91 			/* we want to know one byte in advance to find out
92 			   if it's multiline header */
93 			parse_size = size == 0 ? 0 : size-1;
94 		} else {
95 			parse_size = size;
96 		}
97 
98 		if (ret <= 0 && startpos == parse_size) {
99 			if (ret == -1) {
100 				if (startpos > 0) {
101 					/* header ended unexpectedly. */
102 					no_newline = TRUE;
103 					skip = startpos;
104 					break;
105 				}
106 				/* error / EOF with no bytes */
107 				i_assert(skip == 0);
108 				return -1;
109 			}
110 
111 			if (size > 0 && !ctx->skip_line && !continued &&
112 			    (msg[0] == '\n' ||
113 			     (msg[0] == '\r' && size > 1 && msg[1] == '\n'))) {
114 				/* end of headers - this mostly happens just
115 				   with mbox where headers are read separately
116 				   from body */
117 				size = 0;
118 				if (ctx->hdr_size != NULL)
119 					ctx->hdr_size->lines++;
120 				if (msg[0] == '\r') {
121 					skip = 2;
122 					crlf_newline = TRUE;
123 				} else {
124 					skip = 1;
125 					if (ctx->hdr_size != NULL)
126 						ctx->hdr_size->virtual_size++;
127 				}
128 				break;
129 			}
130 			if (ret == 0 && !ctx->input->eof) {
131 				/* stream is nonblocking - need more data */
132 				i_assert(skip == 0);
133 				return 0;
134 			}
135 			i_assert(size > 0);
136 
137 			/* a) line is larger than input buffer
138 			   b) header ended unexpectedly */
139 			if (ret == -2) {
140 				/* go back to last LWSP if found. */
141 				size_t min_pos = !continued ? colon_pos : 0;
142 				for (i = size-1; i > min_pos; i--) {
143 					if (IS_LWSP(msg[i])) {
144 						size = i;
145 						break;
146 					}
147 				}
148 				if (i == min_pos && (msg[size-1] == '\r' ||
149 						     msg[size-1] == '\n')) {
150 					/* we may or may not have a full header,
151 					   but we don't know until we get the
152 					   next character. leave out the
153 					   linefeed and finish the header on
154 					   the next run. */
155 					size--;
156 					if (size > 0 && msg[size-1] == '\r')
157 						size--;
158 				}
159 				/* the buffer really has to be more than 2 to
160 				   avoid CRLF looping forever */
161 				i_assert(size > 0);
162 
163 				continues = TRUE;
164 			}
165 			no_newline = TRUE;
166 			skip = size;
167 			break;
168 		}
169 
170 		/* find ':' */
171 		if (colon_pos == UINT_MAX) {
172 			for (i = startpos; i < parse_size; i++) {
173 				if (msg[i] > ':')
174 					continue;
175 
176 				if (msg[i] == ':' && !ctx->skip_line) {
177 					colon_pos = i;
178 					line->full_value_offset =
179 						ctx->input->v_offset + i + 1;
180 					break;
181 				}
182 				if (msg[i] == '\n') {
183 					/* end of headers, or error */
184 					break;
185 				}
186 
187 				if (msg[i] == '\0')
188 					ctx->has_nuls = TRUE;
189 			}
190 		} else {
191 			i = startpos;
192 		}
193 
194 		/* find '\n' */
195 		for (; i < parse_size; i++) {
196 			if (msg[i] <= '\n') {
197 				if (msg[i] == '\n')
198 					break;
199 				if (msg[i] == '\0')
200 					ctx->has_nuls = TRUE;
201 			}
202 		}
203 
204 		if (i < parse_size && i+1 == size && ret == -2) {
205 			/* we don't know if the line continues. */
206 			i++;
207 		} else if (i < parse_size) {
208 			/* got a line */
209 			if (ctx->skip_line) {
210 				/* skipping a line with a huge header name */
211 				if (ctx->hdr_size != NULL) {
212 					ctx->hdr_size->lines++;
213 					ctx->hdr_size->physical_size += i + 1;
214 					ctx->hdr_size->virtual_size += i + 1;
215 				}
216 				if (i == 0 || msg[i-1] != '\r') {
217 					/* missing CR */
218 					if (ctx->hdr_size != NULL)
219 						ctx->hdr_size->virtual_size++;
220 				}
221 
222 				i_stream_skip(ctx->input, i + 1);
223 				startpos = 0;
224 				ctx->skip_line = FALSE;
225 				continue;
226 			}
227 			continues = i+1 < size && IS_LWSP(msg[i+1]);
228 
229 			if (ctx->hdr_size != NULL)
230 				ctx->hdr_size->lines++;
231 			if (i == 0 || msg[i-1] != '\r') {
232 				/* missing CR */
233 				if (ctx->hdr_size != NULL)
234 					ctx->hdr_size->virtual_size++;
235 				size = i;
236 			} else {
237 				size = i-1;
238 				crlf_newline = TRUE;
239 			}
240 
241 			skip = i+1;
242 			break;
243 		}
244 
245 		startpos = i;
246 	}
247 
248 	last_crlf = line->crlf_newline &&
249 		(ctx->flags & MESSAGE_HEADER_PARSER_FLAG_DROP_CR) == 0;
250 	last_no_newline = line->no_newline ||
251 		(ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0;
252 
253 	line->continues = continues;
254 	line->continued = continued;
255 	line->crlf_newline = crlf_newline;
256 	line->no_newline = no_newline;
257 	if (size == 0 && !continued) {
258 		/* end of headers */
259 		line->eoh = TRUE;
260 		line->name_len = line->value_len = line->full_value_len = 0;
261 		line->name = ""; line->value = line->full_value = NULL;
262 		line->middle = NULL; line->middle_len = 0;
263 		line->full_value_offset = line->name_offset;
264 		line->continues = FALSE;
265 	} else if (line->continued) {
266 		line->value = msg;
267 		line->value_len = size;
268 	} else if (colon_pos == UINT_MAX) {
269 		/* missing ':', assume the whole line is value */
270 		line->value = msg;
271 		line->value_len = size;
272 		line->full_value_offset = line->name_offset;
273 
274 		line->name = "";
275 		line->name_len = 0;
276 
277 		line->middle = uchar_empty_ptr;
278 		line->middle_len = 0;
279 	} else {
280 		size_t pos;
281 
282 		line->value = msg + colon_pos+1;
283 		line->value_len = size - colon_pos - 1;
284 		if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP) != 0) {
285 			/* get value. skip all LWSP after ':'. Note that
286 			   RFC2822 doesn't say we should, but history behind
287 			   it..
288 
289 			   Exception to this is if the value consists only of
290 			   LWSP, then skip only the one LWSP after ':'. */
291 			for (pos = 0; pos < line->value_len; pos++) {
292 				if (!IS_LWSP(line->value[pos]))
293 					break;
294 			}
295 
296 			if (pos == line->value_len) {
297 				/* everything was LWSP */
298 				if (line->value_len > 0 &&
299 				    IS_LWSP(line->value[0]))
300 					pos = 1;
301 			}
302 		} else {
303 			pos = line->value_len > 0 &&
304 				IS_LWSP(line->value[0]) ? 1 : 0;
305 		}
306 
307 		line->value += pos;
308 		line->value_len -= pos;
309 		line->full_value_offset += pos;
310 
311 		/* get name, skip LWSP before ':' */
312 		while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
313 			colon_pos--;
314 
315 		/* Treat overlong header names as if the full header line was
316 		   a value. Callers can usually handle large values better than
317 		   large names. */
318 		if (colon_pos > MESSAGE_HEADER_NAME_MAX_LEN) {
319 			line->name = "";
320 			line->name_len = 0;
321 			line->middle = uchar_empty_ptr;
322 			line->middle_len = 0;
323 			line->value = msg;
324 			line->value_len = size;
325 			line->full_value_offset = line->name_offset;
326 		} else {
327 			str_truncate(ctx->name, 0);
328 			/* use buffer_append() so the name won't be truncated if there
329 			   are NULs. */
330 			buffer_append(ctx->name, msg, colon_pos);
331 			str_append_c(ctx->name, '\0');
332 
333 			/* keep middle stored also in ctx->name so it's available
334 			   with use_full_value */
335 			line->middle = msg + colon_pos;
336 			line->middle_len = (size_t)(line->value - line->middle);
337 			str_append_data(ctx->name, line->middle, line->middle_len);
338 
339 			line->name = str_c(ctx->name);
340 			line->name_len = colon_pos;
341 			line->middle = str_data(ctx->name) + line->name_len + 1;
342 		}
343 	}
344 
345 	if (!line->continued) {
346 		/* first header line. make a copy of the line since we can't
347 		   really trust input stream not to lose it. */
348 		buffer_append(ctx->value_buf, line->value, line->value_len);
349 		line->value = line->full_value = ctx->value_buf->data;
350 		line->full_value_len = line->value_len;
351 	} else if (line->use_full_value) {
352 		/* continue saving the full value. */
353 		if (last_no_newline) {
354 			/* line is longer than fit into our buffer, so we
355 			   were forced to break it into multiple
356 			   message_header_lines */
357 		} else {
358 			if (last_crlf)
359 				buffer_append_c(ctx->value_buf, '\r');
360 			buffer_append_c(ctx->value_buf, '\n');
361 		}
362 		if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0 &&
363 		    line->value_len > 0 && line->value[0] != ' ' &&
364 		    IS_LWSP(line->value[0])) {
365 			buffer_append_c(ctx->value_buf, ' ');
366 			buffer_append(ctx->value_buf,
367 				      line->value + 1, line->value_len - 1);
368 		} else {
369 			buffer_append(ctx->value_buf,
370 				      line->value, line->value_len);
371 		}
372 		line->full_value = ctx->value_buf->data;
373 		line->full_value_len = ctx->value_buf->used;
374 	} else {
375 		/* we didn't want full_value, and this is a continued line. */
376 		line->full_value = NULL;
377 		line->full_value_len = 0;
378 	}
379 
380 	/* always reset it */
381 	line->use_full_value = FALSE;
382 
383 	if (ctx->hdr_size != NULL) {
384 		ctx->hdr_size->physical_size += skip;
385 		ctx->hdr_size->virtual_size += skip;
386 	}
387 	i_stream_skip(ctx->input, skip);
388 
389 	*hdr_r = line;
390 	return 1;
391 }
392 
message_parse_header_has_nuls(const struct message_header_parser_ctx * ctx)393 bool message_parse_header_has_nuls(const struct message_header_parser_ctx *ctx)
394 {
395 	return ctx->has_nuls;
396 }
397 
398 #undef message_parse_header
message_parse_header(struct istream * input,struct message_size * hdr_size,enum message_header_parser_flags flags,message_header_callback_t * callback,void * context)399 void message_parse_header(struct istream *input, struct message_size *hdr_size,
400 			  enum message_header_parser_flags flags,
401 			  message_header_callback_t *callback, void *context)
402 {
403 	struct message_header_parser_ctx *hdr_ctx;
404 	struct message_header_line *hdr;
405 	int ret;
406 
407 	hdr_ctx = message_parse_header_init(input, hdr_size, flags);
408 	while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0)
409 		callback(hdr, context);
410 	i_assert(ret != 0);
411 	message_parse_header_deinit(&hdr_ctx);
412 
413 	/* call after the final skipping */
414 	callback(NULL, context);
415 }
416 
message_header_line_write(buffer_t * output,const struct message_header_line * hdr)417 void message_header_line_write(buffer_t *output,
418 			       const struct message_header_line *hdr)
419 {
420 	if (!hdr->continued) {
421 		buffer_append(output, hdr->name, strlen(hdr->name));
422 		buffer_append(output, hdr->middle, hdr->middle_len);
423 	}
424 	buffer_append(output, hdr->value, hdr->value_len);
425 	if (!hdr->no_newline) {
426 		if (hdr->crlf_newline)
427 			buffer_append_c(output, '\r');
428 		buffer_append_c(output, '\n');
429 	}
430 }
431 
432 const char *
message_header_strdup(pool_t pool,const unsigned char * data,size_t size)433 message_header_strdup(pool_t pool, const unsigned char *data, size_t size)
434 {
435 	if (memchr(data, '\0', size) == NULL) {
436 		/* fast path */
437 		char *dest = p_malloc(pool, size+1);
438 		memcpy(dest, data, size);
439 		return dest;
440 	}
441 
442 	/* slow path - this could be made faster, but it should be
443 	   rare so keep it simple */
444 	string_t *str = str_new(pool, size+2);
445 	for (size_t i = 0; i < size; i++) {
446 		if (data[i] != '\0')
447 			str_append_c(str, data[i]);
448 		else
449 			str_append(str, UNICODE_REPLACEMENT_CHAR_UTF8);
450 	}
451 	return str_c(str);
452 }
453 
message_header_name_is_valid(const char * name)454 bool message_header_name_is_valid(const char *name)
455 {
456 	/*
457 	  field-name      =   1*ftext
458 
459 	  ftext           =   %d33-57 /          ; Printable US-ASCII
460 			      %d59-126           ;  characters not including
461 						 ;  ":".
462 	*/
463 	for (unsigned int i = 0; name[i] != '\0'; i++) {
464 		unsigned char c = name[i];
465 		if (c >= 33 && c <= 57) {
466 			/* before ":" */
467 		} else if (c >= 59 && c <= 126) {
468 			/* after ":" */
469 		} else {
470 			return FALSE;
471 		}
472 	}
473 	return TRUE;
474 }
475