1 /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
2
3 #include "lib.h"
4 #include "buffer.h"
5 #include "istream.h"
6 #include "str.h"
7 #include "strfuncs.h"
8 #include "unichar.h"
9 #include "message-size.h"
10 #include "message-header-parser.h"
11
12 /* RFC 5322 2.1.1 and 2.2 */
13 #define MESSAGE_HEADER_NAME_MAX_LEN 1000
14
15 struct message_header_parser_ctx {
16 struct message_header_line line;
17
18 struct istream *input;
19 struct message_size *hdr_size;
20
21 string_t *name;
22 buffer_t *value_buf;
23
24 enum message_header_parser_flags flags;
25 bool skip_line:1;
26 bool has_nuls:1;
27 };
28
29 struct message_header_parser_ctx *
message_parse_header_init(struct istream * input,struct message_size * hdr_size,enum message_header_parser_flags flags)30 message_parse_header_init(struct istream *input, struct message_size *hdr_size,
31 enum message_header_parser_flags flags)
32 {
33 struct message_header_parser_ctx *ctx;
34
35 ctx = i_new(struct message_header_parser_ctx, 1);
36 ctx->input = input;
37 ctx->hdr_size = hdr_size;
38 ctx->name = str_new(default_pool, 128);
39 ctx->flags = flags;
40 ctx->value_buf = buffer_create_dynamic(default_pool, 4096);
41 i_stream_ref(input);
42
43 if (hdr_size != NULL)
44 i_zero(hdr_size);
45 return ctx;
46 }
47
message_parse_header_deinit(struct message_header_parser_ctx ** _ctx)48 void message_parse_header_deinit(struct message_header_parser_ctx **_ctx)
49 {
50 struct message_header_parser_ctx *ctx = *_ctx;
51
52 i_stream_unref(&ctx->input);
53 buffer_free(&ctx->value_buf);
54 str_free(&ctx->name);
55 i_free(ctx);
56
57 *_ctx = NULL;
58 }
59
message_parse_header_next(struct message_header_parser_ctx * ctx,struct message_header_line ** hdr_r)60 int message_parse_header_next(struct message_header_parser_ctx *ctx,
61 struct message_header_line **hdr_r)
62 {
63 struct message_header_line *line = &ctx->line;
64 const unsigned char *msg;
65 size_t i, size, startpos, colon_pos, parse_size, skip = 0;
66 int ret;
67 bool continued, continues, last_no_newline, last_crlf;
68 bool no_newline, crlf_newline;
69
70 *hdr_r = NULL;
71 if (line->eoh)
72 return -1;
73
74 if (line->continues)
75 colon_pos = 0;
76 else {
77 /* new header line */
78 line->name_offset = ctx->input->v_offset;
79 colon_pos = UINT_MAX;
80 buffer_set_used_size(ctx->value_buf, 0);
81 }
82
83 no_newline = FALSE;
84 crlf_newline = FALSE;
85 continued = line->continues;
86 continues = FALSE;
87
88 for (startpos = 0;;) {
89 ret = i_stream_read_bytes(ctx->input, &msg, &size, startpos+2);
90 if (ret >= 0) {
91 /* we want to know one byte in advance to find out
92 if it's multiline header */
93 parse_size = size == 0 ? 0 : size-1;
94 } else {
95 parse_size = size;
96 }
97
98 if (ret <= 0 && startpos == parse_size) {
99 if (ret == -1) {
100 if (startpos > 0) {
101 /* header ended unexpectedly. */
102 no_newline = TRUE;
103 skip = startpos;
104 break;
105 }
106 /* error / EOF with no bytes */
107 i_assert(skip == 0);
108 return -1;
109 }
110
111 if (size > 0 && !ctx->skip_line && !continued &&
112 (msg[0] == '\n' ||
113 (msg[0] == '\r' && size > 1 && msg[1] == '\n'))) {
114 /* end of headers - this mostly happens just
115 with mbox where headers are read separately
116 from body */
117 size = 0;
118 if (ctx->hdr_size != NULL)
119 ctx->hdr_size->lines++;
120 if (msg[0] == '\r') {
121 skip = 2;
122 crlf_newline = TRUE;
123 } else {
124 skip = 1;
125 if (ctx->hdr_size != NULL)
126 ctx->hdr_size->virtual_size++;
127 }
128 break;
129 }
130 if (ret == 0 && !ctx->input->eof) {
131 /* stream is nonblocking - need more data */
132 i_assert(skip == 0);
133 return 0;
134 }
135 i_assert(size > 0);
136
137 /* a) line is larger than input buffer
138 b) header ended unexpectedly */
139 if (ret == -2) {
140 /* go back to last LWSP if found. */
141 size_t min_pos = !continued ? colon_pos : 0;
142 for (i = size-1; i > min_pos; i--) {
143 if (IS_LWSP(msg[i])) {
144 size = i;
145 break;
146 }
147 }
148 if (i == min_pos && (msg[size-1] == '\r' ||
149 msg[size-1] == '\n')) {
150 /* we may or may not have a full header,
151 but we don't know until we get the
152 next character. leave out the
153 linefeed and finish the header on
154 the next run. */
155 size--;
156 if (size > 0 && msg[size-1] == '\r')
157 size--;
158 }
159 /* the buffer really has to be more than 2 to
160 avoid CRLF looping forever */
161 i_assert(size > 0);
162
163 continues = TRUE;
164 }
165 no_newline = TRUE;
166 skip = size;
167 break;
168 }
169
170 /* find ':' */
171 if (colon_pos == UINT_MAX) {
172 for (i = startpos; i < parse_size; i++) {
173 if (msg[i] > ':')
174 continue;
175
176 if (msg[i] == ':' && !ctx->skip_line) {
177 colon_pos = i;
178 line->full_value_offset =
179 ctx->input->v_offset + i + 1;
180 break;
181 }
182 if (msg[i] == '\n') {
183 /* end of headers, or error */
184 break;
185 }
186
187 if (msg[i] == '\0')
188 ctx->has_nuls = TRUE;
189 }
190 } else {
191 i = startpos;
192 }
193
194 /* find '\n' */
195 for (; i < parse_size; i++) {
196 if (msg[i] <= '\n') {
197 if (msg[i] == '\n')
198 break;
199 if (msg[i] == '\0')
200 ctx->has_nuls = TRUE;
201 }
202 }
203
204 if (i < parse_size && i+1 == size && ret == -2) {
205 /* we don't know if the line continues. */
206 i++;
207 } else if (i < parse_size) {
208 /* got a line */
209 if (ctx->skip_line) {
210 /* skipping a line with a huge header name */
211 if (ctx->hdr_size != NULL) {
212 ctx->hdr_size->lines++;
213 ctx->hdr_size->physical_size += i + 1;
214 ctx->hdr_size->virtual_size += i + 1;
215 }
216 if (i == 0 || msg[i-1] != '\r') {
217 /* missing CR */
218 if (ctx->hdr_size != NULL)
219 ctx->hdr_size->virtual_size++;
220 }
221
222 i_stream_skip(ctx->input, i + 1);
223 startpos = 0;
224 ctx->skip_line = FALSE;
225 continue;
226 }
227 continues = i+1 < size && IS_LWSP(msg[i+1]);
228
229 if (ctx->hdr_size != NULL)
230 ctx->hdr_size->lines++;
231 if (i == 0 || msg[i-1] != '\r') {
232 /* missing CR */
233 if (ctx->hdr_size != NULL)
234 ctx->hdr_size->virtual_size++;
235 size = i;
236 } else {
237 size = i-1;
238 crlf_newline = TRUE;
239 }
240
241 skip = i+1;
242 break;
243 }
244
245 startpos = i;
246 }
247
248 last_crlf = line->crlf_newline &&
249 (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_DROP_CR) == 0;
250 last_no_newline = line->no_newline ||
251 (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0;
252
253 line->continues = continues;
254 line->continued = continued;
255 line->crlf_newline = crlf_newline;
256 line->no_newline = no_newline;
257 if (size == 0 && !continued) {
258 /* end of headers */
259 line->eoh = TRUE;
260 line->name_len = line->value_len = line->full_value_len = 0;
261 line->name = ""; line->value = line->full_value = NULL;
262 line->middle = NULL; line->middle_len = 0;
263 line->full_value_offset = line->name_offset;
264 line->continues = FALSE;
265 } else if (line->continued) {
266 line->value = msg;
267 line->value_len = size;
268 } else if (colon_pos == UINT_MAX) {
269 /* missing ':', assume the whole line is value */
270 line->value = msg;
271 line->value_len = size;
272 line->full_value_offset = line->name_offset;
273
274 line->name = "";
275 line->name_len = 0;
276
277 line->middle = uchar_empty_ptr;
278 line->middle_len = 0;
279 } else {
280 size_t pos;
281
282 line->value = msg + colon_pos+1;
283 line->value_len = size - colon_pos - 1;
284 if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP) != 0) {
285 /* get value. skip all LWSP after ':'. Note that
286 RFC2822 doesn't say we should, but history behind
287 it..
288
289 Exception to this is if the value consists only of
290 LWSP, then skip only the one LWSP after ':'. */
291 for (pos = 0; pos < line->value_len; pos++) {
292 if (!IS_LWSP(line->value[pos]))
293 break;
294 }
295
296 if (pos == line->value_len) {
297 /* everything was LWSP */
298 if (line->value_len > 0 &&
299 IS_LWSP(line->value[0]))
300 pos = 1;
301 }
302 } else {
303 pos = line->value_len > 0 &&
304 IS_LWSP(line->value[0]) ? 1 : 0;
305 }
306
307 line->value += pos;
308 line->value_len -= pos;
309 line->full_value_offset += pos;
310
311 /* get name, skip LWSP before ':' */
312 while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
313 colon_pos--;
314
315 /* Treat overlong header names as if the full header line was
316 a value. Callers can usually handle large values better than
317 large names. */
318 if (colon_pos > MESSAGE_HEADER_NAME_MAX_LEN) {
319 line->name = "";
320 line->name_len = 0;
321 line->middle = uchar_empty_ptr;
322 line->middle_len = 0;
323 line->value = msg;
324 line->value_len = size;
325 line->full_value_offset = line->name_offset;
326 } else {
327 str_truncate(ctx->name, 0);
328 /* use buffer_append() so the name won't be truncated if there
329 are NULs. */
330 buffer_append(ctx->name, msg, colon_pos);
331 str_append_c(ctx->name, '\0');
332
333 /* keep middle stored also in ctx->name so it's available
334 with use_full_value */
335 line->middle = msg + colon_pos;
336 line->middle_len = (size_t)(line->value - line->middle);
337 str_append_data(ctx->name, line->middle, line->middle_len);
338
339 line->name = str_c(ctx->name);
340 line->name_len = colon_pos;
341 line->middle = str_data(ctx->name) + line->name_len + 1;
342 }
343 }
344
345 if (!line->continued) {
346 /* first header line. make a copy of the line since we can't
347 really trust input stream not to lose it. */
348 buffer_append(ctx->value_buf, line->value, line->value_len);
349 line->value = line->full_value = ctx->value_buf->data;
350 line->full_value_len = line->value_len;
351 } else if (line->use_full_value) {
352 /* continue saving the full value. */
353 if (last_no_newline) {
354 /* line is longer than fit into our buffer, so we
355 were forced to break it into multiple
356 message_header_lines */
357 } else {
358 if (last_crlf)
359 buffer_append_c(ctx->value_buf, '\r');
360 buffer_append_c(ctx->value_buf, '\n');
361 }
362 if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0 &&
363 line->value_len > 0 && line->value[0] != ' ' &&
364 IS_LWSP(line->value[0])) {
365 buffer_append_c(ctx->value_buf, ' ');
366 buffer_append(ctx->value_buf,
367 line->value + 1, line->value_len - 1);
368 } else {
369 buffer_append(ctx->value_buf,
370 line->value, line->value_len);
371 }
372 line->full_value = ctx->value_buf->data;
373 line->full_value_len = ctx->value_buf->used;
374 } else {
375 /* we didn't want full_value, and this is a continued line. */
376 line->full_value = NULL;
377 line->full_value_len = 0;
378 }
379
380 /* always reset it */
381 line->use_full_value = FALSE;
382
383 if (ctx->hdr_size != NULL) {
384 ctx->hdr_size->physical_size += skip;
385 ctx->hdr_size->virtual_size += skip;
386 }
387 i_stream_skip(ctx->input, skip);
388
389 *hdr_r = line;
390 return 1;
391 }
392
message_parse_header_has_nuls(const struct message_header_parser_ctx * ctx)393 bool message_parse_header_has_nuls(const struct message_header_parser_ctx *ctx)
394 {
395 return ctx->has_nuls;
396 }
397
398 #undef message_parse_header
message_parse_header(struct istream * input,struct message_size * hdr_size,enum message_header_parser_flags flags,message_header_callback_t * callback,void * context)399 void message_parse_header(struct istream *input, struct message_size *hdr_size,
400 enum message_header_parser_flags flags,
401 message_header_callback_t *callback, void *context)
402 {
403 struct message_header_parser_ctx *hdr_ctx;
404 struct message_header_line *hdr;
405 int ret;
406
407 hdr_ctx = message_parse_header_init(input, hdr_size, flags);
408 while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0)
409 callback(hdr, context);
410 i_assert(ret != 0);
411 message_parse_header_deinit(&hdr_ctx);
412
413 /* call after the final skipping */
414 callback(NULL, context);
415 }
416
message_header_line_write(buffer_t * output,const struct message_header_line * hdr)417 void message_header_line_write(buffer_t *output,
418 const struct message_header_line *hdr)
419 {
420 if (!hdr->continued) {
421 buffer_append(output, hdr->name, strlen(hdr->name));
422 buffer_append(output, hdr->middle, hdr->middle_len);
423 }
424 buffer_append(output, hdr->value, hdr->value_len);
425 if (!hdr->no_newline) {
426 if (hdr->crlf_newline)
427 buffer_append_c(output, '\r');
428 buffer_append_c(output, '\n');
429 }
430 }
431
432 const char *
message_header_strdup(pool_t pool,const unsigned char * data,size_t size)433 message_header_strdup(pool_t pool, const unsigned char *data, size_t size)
434 {
435 if (memchr(data, '\0', size) == NULL) {
436 /* fast path */
437 char *dest = p_malloc(pool, size+1);
438 memcpy(dest, data, size);
439 return dest;
440 }
441
442 /* slow path - this could be made faster, but it should be
443 rare so keep it simple */
444 string_t *str = str_new(pool, size+2);
445 for (size_t i = 0; i < size; i++) {
446 if (data[i] != '\0')
447 str_append_c(str, data[i]);
448 else
449 str_append(str, UNICODE_REPLACEMENT_CHAR_UTF8);
450 }
451 return str_c(str);
452 }
453
message_header_name_is_valid(const char * name)454 bool message_header_name_is_valid(const char *name)
455 {
456 /*
457 field-name = 1*ftext
458
459 ftext = %d33-57 / ; Printable US-ASCII
460 %d59-126 ; characters not including
461 ; ":".
462 */
463 for (unsigned int i = 0; name[i] != '\0'; i++) {
464 unsigned char c = name[i];
465 if (c >= 33 && c <= 57) {
466 /* before ":" */
467 } else if (c >= 59 && c <= 126) {
468 /* after ":" */
469 } else {
470 return FALSE;
471 }
472 }
473 return TRUE;
474 }
475