1 /*++
2 /* NAME
3 /* mime_state 3
4 /* SUMMARY
5 /* MIME parser state machine
6 /* SYNOPSIS
7 /* #include <mime_state.h>
8 /*
9 /* MIME_STATE *mime_state_alloc(flags, head_out, head_end,
10 /* body_out, body_end,
11 /* err_print, context)
12 /* int flags;
13 /* void (*head_out)(void *ptr, int header_class,
14 /* const HEADER_OPTS *header_info,
15 /* VSTRING *buf, off_t offset);
16 /* void (*head_end)(void *ptr);
17 /* void (*body_out)(void *ptr, int rec_type,
18 /* const char *buf, ssize_t len,
19 /* off_t offset);
20 /* void (*body_end)(void *ptr);
21 /* void (*err_print)(void *ptr, int err_flag, const char *text)
22 /* void *context;
23 /*
24 /* int mime_state_update(state, rec_type, buf, len)
25 /* MIME_STATE *state;
26 /* int rec_type;
27 /* const char *buf;
28 /* ssize_t len;
29 /*
30 /* MIME_STATE *mime_state_free(state)
31 /* MIME_STATE *state;
32 /*
33 /* const char *mime_state_error(error_code)
34 /* int error_code;
35 /*
36 /* typedef struct {
37 /* .in +4
38 /* const int code; /* internal error code */
39 /* const char *dsn; /* RFC 3463 */
40 /* const char *text; /* descriptive text */
41 /* .in -4
42 /* } MIME_STATE_DETAIL;
43 /*
44 /* const MIME_STATE_DETAIL *mime_state_detail(error_code)
45 /* int error_code;
46 /* DESCRIPTION
47 /* This module implements a one-pass MIME processor with optional
48 /* 8-bit to quoted-printable conversion.
49 /*
50 /* In order to fend off denial of service attacks, message headers
51 /* are truncated at or above var_header_limit bytes, message boundary
52 /* strings are truncated at var_mime_bound_len bytes, and the multipart
53 /* nesting level is limited to var_mime_maxdepth levels.
54 /*
55 /* mime_state_alloc() creates a MIME state machine. The machine
56 /* is delivered in its initial state, expecting content type
57 /* text/plain, 7-bit data.
58 /*
59 /* mime_state_update() updates the MIME state machine according
60 /* to the input record type and the record content.
61 /* The result value is the bit-wise OR of zero or more of the following:
62 /* .IP MIME_ERR_TRUNC_HEADER
63 /* A message header was longer than var_header_limit bytes.
64 /* .IP MIME_ERR_NESTING
65 /* The MIME structure was nested more than var_mime_maxdepth levels.
66 /* .IP MIME_ERR_8BIT_IN_HEADER
67 /* A message header contains 8-bit data. This is always illegal.
68 /* .IP MIME_ERR_8BIT_IN_7BIT_BODY
69 /* A MIME header specifies (or defaults to) 7-bit content, but the
70 /* corresponding message body or body parts contain 8-bit content.
71 /* .IP MIME_ERR_ENCODING_DOMAIN
72 /* An entity of type "message" or "multipart" specifies the wrong
73 /* content transfer encoding domain, or specifies a transformation
74 /* (quoted-printable, base64) instead of a domain (7bit, 8bit,
75 /* or binary).
76 /* .PP
77 /* mime_state_free() releases storage for a MIME state machine,
78 /* and conveniently returns a null pointer.
79 /*
80 /* mime_state_error() returns a string representation for the
81 /* specified error code. When multiple errors are specified it
82 /* reports what it deems the most serious one.
83 /*
84 /* mime_state_detail() returns a table entry with error
85 /* information for the specified error code. When multiple
86 /* errors are specified it reports what it deems the most
87 /* serious one.
88 /*
89 /* Arguments:
90 /* .IP body_out
91 /* The output routine for body lines. It receives unmodified input
92 /* records, or the result of 8-bit -> 7-bit conversion.
93 /* .IP body_end
94 /* A null pointer, or a pointer to a routine that is called after
95 /* the last input record is processed.
96 /* .IP buf
97 /* Buffer with the content of a logical or physical message record.
98 /* .IP context
99 /* Caller context that is passed on to the head_out and body_out
100 /* routines.
101 /* .IP enc_type
102 /* The content encoding: MIME_ENC_7BIT or MIME_ENC_8BIT.
103 /* .IP err_print
104 /* Null pointer, or pointer to a function that is called with
105 /* arguments: the application context, the error type, and the
106 /* offending input. Only one instance per error type is reported.
107 /* .IP flags
108 /* Special processing options. Specify the bit-wise OR of zero or
109 /* more of the following:
110 /* .RS
111 /* .IP MIME_OPT_DISABLE_MIME
112 /* Pay no attention to Content-* message headers, and switch to
113 /* message body state at the end of the primary message headers.
114 /* .IP MIME_OPT_REPORT_TRUNC_HEADER
115 /* Report errors that set the MIME_ERR_TRUNC_HEADER error flag
116 /* (see above).
117 /* .IP MIME_OPT_REPORT_8BIT_IN_HEADER
118 /* Report errors that set the MIME_ERR_8BIT_IN_HEADER error
119 /* flag (see above). This rarely stops legitimate mail.
120 /* .IP MIME_OPT_REPORT_8BIT_IN_7BIT_BODY
121 /* Report errors that set the MIME_ERR_8BIT_IN_7BIT_BODY error
122 /* flag (see above). This currently breaks Majordomo mail that is
123 /* forwarded for approval, because Majordomo does not propagate
124 /* MIME type information from the enclosed message to the message
125 /* headers of the request for approval.
126 /* .IP MIME_OPT_REPORT_ENCODING_DOMAIN
127 /* Report errors that set the MIME_ERR_ENCODING_DOMAIN error
128 /* flag (see above).
129 /* .IP MIME_OPT_REPORT_NESTING
130 /* Report errors that set the MIME_ERR_NESTING error flag
131 /* (see above).
132 /* .IP MIME_OPT_DOWNGRADE
133 /* Transform content that claims to be 8-bit into quoted-printable.
134 /* Where appropriate, update Content-Transfer-Encoding: message
135 /* headers.
136 /* .RE
137 /* .sp
138 /* For convenience, MIME_OPT_NONE requests no special processing.
139 /* .IP header_class
140 /* Specifies where a message header is located.
141 /* .RS
142 /* .IP MIME_HDR_PRIMARY
143 /* In the primary message header section.
144 /* .IP MIME_HDR_MULTIPART
145 /* In the header section after a multipart boundary string.
146 /* .IP MIME_HDR_NESTED
147 /* At the start of a nested (e.g., message/rfc822) message.
148 /* .RE
149 /* .sp
150 /* For convenience, the macros MIME_HDR_FIRST and MIME_HDR_LAST
151 /* specify the range of MIME_HDR_MUMBLE macros.
152 /* .sp
153 /* To find out if something is a MIME header at the beginning
154 /* of an RFC 822 message or an attached message, look at the
155 /* header_info argument.
156 /* .IP header_info
157 /* Null pointer or information about the message header, see
158 /* header_opts(3).
159 /* .IP head_out
160 /* The output routine that is invoked for outputting a message header.
161 /* A multi-line header is passed as one chunk of text with embedded
162 /* newlines.
163 /* It is the responsibility of the output routine to break the text
164 /* at embedded newlines, and to break up long text between newlines
165 /* into multiple output records.
166 /* Note: an output routine is explicitly allowed to modify the text.
167 /* .IP head_end
168 /* A null pointer, or a pointer to a routine that is called after
169 /* the last message header in the first header block is processed.
170 /* .IP len
171 /* Length of non-VSTRING input buffer.
172 /* .IP offset
173 /* The offset in bytes from the start of the current block of message
174 /* headers or body lines. Line boundaries are counted as one byte.
175 /* .IP rec_type
176 /* The input record type as defined in rec_type(3h). State is
177 /* updated for text records (REC_TYPE_NORM or REC_TYPE_CONT).
178 /* Some input records are stored internally in order to reconstruct
179 /* multi-line input. Upon receipt of any non-text record type, all
180 /* stored input is flushed and the state is set to "body".
181 /* .IP state
182 /* MIME parser state created with mime_state_alloc().
183 /* BUGS
184 /* NOTE: when the end of headers is reached, mime_state_update()
185 /* may execute up to three call-backs before returning to the
186 /* caller: head_out(), head_end(), and body_out() or body_end().
187 /* As long as call-backs return no result, it is up to the
188 /* call-back routines to check if a previous call-back experienced
189 /* an error.
190 /*
191 /* Different mail user agents treat malformed message boundary
192 /* strings in different ways. The Postfix MIME processor cannot
193 /* be bug-compatible with everything.
194 /*
195 /* This module will not glue together multipart boundary strings that
196 /* span multiple input records.
197 /*
198 /* This module will not glue together RFC 2231 formatted (boundary)
199 /* parameter values. RFC 2231 claims compatibility with existing
200 /* MIME processors. Splitting boundary strings is not backwards
201 /* compatible.
202 /*
203 /* The "8-bit data inside 7-bit body" test is myopic. It is not aware
204 /* of any enclosing (message or multipart) encoding information.
205 /*
206 /* If the input ends in data other than a hard line break, this module
207 /* will add a hard line break of its own. No line break is added to
208 /* empty input.
209 /*
210 /* This code recognizes the obsolete form "headername :" but will
211 /* normalize it to the canonical form "headername:". Leaving the
212 /* obsolete form alone would cause too much trouble with existing code
213 /* that expects only the normalized form.
214 /* SEE ALSO
215 /* msg(3) diagnostics interface
216 /* header_opts(3) header information lookup
217 /* RFC 822 (ARPA Internet Text Messages)
218 /* RFC 2045 (MIME: Format of internet message bodies)
219 /* RFC 2046 (MIME: Media types)
220 /* DIAGNOSTICS
221 /* Fatal errors: memory allocation problem.
222 /* LICENSE
223 /* .ad
224 /* .fi
225 /* The Secure Mailer license must be distributed with this software.
226 /* HISTORY
227 /* .ad
228 /* .fi
229 /* This code was implemented from scratch after reading the RFC
230 /* documents. This was a relatively straightforward effort with
231 /* few if any surprises. Victor Duchovni of Morgan Stanley shared
232 /* his experiences with ambiguities in real-life MIME implementations.
233 /* Liviu Daia of the Romanian Academy shared his insights in some
234 /* of the darker corners.
235 /* AUTHOR(S)
236 /* Wietse Venema
237 /* IBM T.J. Watson Research
238 /* P.O. Box 704
239 /* Yorktown Heights, NY 10598, USA
240 /*
241 /* Wietse Venema
242 /* Google, Inc.
243 /* 111 8th Avenue
244 /* New York, NY 10011, USA
245 /*--*/
246
247 /* System library. */
248
249 #include <sys_defs.h>
250 #include <stdarg.h>
251 #include <ctype.h>
252 #include <string.h>
253
254 #ifdef STRCASECMP_IN_STRINGS_H
255 #include <strings.h>
256 #endif
257
258 /* Utility library. */
259
260 #include <mymalloc.h>
261 #include <msg.h>
262 #include <vstring.h>
263
264 /* Global library. */
265
266 #include <rec_type.h>
267 #include <is_header.h>
268 #include <header_opts.h>
269 #include <mail_params.h>
270 #include <header_token.h>
271 #include <lex_822.h>
272 #include <mime_state.h>
273
274 /* Application-specific. */
275
276 /*
277 * Mime parser stack element for multipart content.
278 */
279 typedef struct MIME_STACK {
280 int def_ctype; /* default content type */
281 int def_stype; /* default content subtype */
282 char *boundary; /* boundary string */
283 ssize_t bound_len; /* boundary length */
284 struct MIME_STACK *next; /* linkage */
285 } MIME_STACK;
286
287 /*
288 * Mime parser state.
289 */
290 #define MIME_MAX_TOKEN 3 /* tokens per attribute */
291
292 struct MIME_STATE {
293
294 /*
295 * Volatile members.
296 */
297 int curr_state; /* header/body state */
298 int curr_ctype; /* last or default content type */
299 int curr_stype; /* last or default content subtype */
300 int curr_encoding; /* last or default content encoding */
301 int curr_domain; /* last or default encoding unit */
302 VSTRING *output_buffer; /* headers, quoted-printable body */
303 int prev_rec_type; /* previous input record type */
304 int nesting_level; /* safety */
305 MIME_STACK *stack; /* for composite types */
306 HEADER_TOKEN token[MIME_MAX_TOKEN]; /* header token array */
307 VSTRING *token_buffer; /* header parser scratch buffer */
308 int err_flags; /* processing errors */
309 off_t head_offset; /* offset in header block */
310 off_t body_offset; /* offset in body block */
311
312 /*
313 * Static members.
314 */
315 int static_flags; /* static processing options */
316 MIME_STATE_HEAD_OUT head_out; /* header output routine */
317 MIME_STATE_ANY_END head_end; /* end of primary header routine */
318 MIME_STATE_BODY_OUT body_out; /* body output routine */
319 MIME_STATE_ANY_END body_end; /* end of body output routine */
320 MIME_STATE_ERR_PRINT err_print; /* error report */
321 void *app_context; /* application context */
322 };
323
324 /*
325 * Content types and subtypes that we care about, either because we have to,
326 * or because we want to filter out broken MIME messages.
327 */
328 #define MIME_CTYPE_OTHER 0
329 #define MIME_CTYPE_TEXT 1
330 #define MIME_CTYPE_MESSAGE 2
331 #define MIME_CTYPE_MULTIPART 3
332
333 #define MIME_STYPE_OTHER 0
334 #define MIME_STYPE_PLAIN 1
335 #define MIME_STYPE_RFC822 2
336 #define MIME_STYPE_PARTIAL 3
337 #define MIME_STYPE_EXTERN_BODY 4
338 #define MIME_STYPE_GLOBAL 5
339
340 /*
341 * MIME parser states. We steal from the public interface.
342 */
343 #define MIME_STATE_PRIMARY MIME_HDR_PRIMARY /* primary headers */
344 #define MIME_STATE_MULTIPART MIME_HDR_MULTIPART /* after --boundary */
345 #define MIME_STATE_NESTED MIME_HDR_NESTED /* message/rfc822 */
346 #define MIME_STATE_BODY (MIME_HDR_NESTED + 1)
347
348 #define SET_MIME_STATE(ptr, state, ctype, stype, encoding, domain) do { \
349 (ptr)->curr_state = (state); \
350 (ptr)->curr_ctype = (ctype); \
351 (ptr)->curr_stype = (stype); \
352 (ptr)->curr_encoding = (encoding); \
353 (ptr)->curr_domain = (domain); \
354 if ((state) == MIME_STATE_BODY) \
355 (ptr)->body_offset = 0; \
356 else \
357 (ptr)->head_offset = 0; \
358 } while (0)
359
360 #define SET_CURR_STATE(ptr, state) do { \
361 (ptr)->curr_state = (state); \
362 if ((state) == MIME_STATE_BODY) \
363 (ptr)->body_offset = 0; \
364 else \
365 (ptr)->head_offset = 0; \
366 } while (0)
367
368 /*
369 * MIME encodings and domains. We intentionally use the same codes for
370 * encodings and domains, so that we can easily find out whether a content
371 * transfer encoding header specifies a domain or whether it specifies
372 * domain+encoding, which is illegal for multipart/any and message/any.
373 */
374 typedef struct MIME_ENCODING {
375 const char *name; /* external representation */
376 int encoding; /* internal representation */
377 int domain; /* subset of encoding */
378 } MIME_ENCODING;
379
380 #define MIME_ENC_QP 1 /* encoding + domain */
381 #define MIME_ENC_BASE64 2 /* encoding + domain */
382 /* These are defined in mime_state.h as part of the external interface. */
383 #ifndef MIME_ENC_7BIT
384 #define MIME_ENC_7BIT 7 /* domain only */
385 #define MIME_ENC_8BIT 8 /* domain only */
386 #define MIME_ENC_BINARY 9 /* domain only */
387 #endif
388
389 static const MIME_ENCODING mime_encoding_map[] = { /* RFC 2045 */
390 "7bit", MIME_ENC_7BIT, MIME_ENC_7BIT, /* domain */
391 "8bit", MIME_ENC_8BIT, MIME_ENC_8BIT, /* domain */
392 "binary", MIME_ENC_BINARY, MIME_ENC_BINARY, /* domain */
393 "base64", MIME_ENC_BASE64, MIME_ENC_7BIT, /* encoding */
394 "quoted-printable", MIME_ENC_QP, MIME_ENC_7BIT, /* encoding */
395 0,
396 };
397
398 /*
399 * Silly Little Macros.
400 */
401 #define STR(x) vstring_str(x)
402 #define LEN(x) VSTRING_LEN(x)
403 #define END(x) vstring_end(x)
404 #define CU_CHAR_PTR(x) ((const unsigned char *) (x))
405
406 #define REPORT_ERROR_LEN(state, err_type, text, len) do { \
407 if ((state->err_flags & err_type) == 0) { \
408 if (state->err_print != 0) \
409 state->err_print(state->app_context, err_type, text, len); \
410 state->err_flags |= err_type; \
411 } \
412 } while (0)
413
414 #define REPORT_ERROR(state, err_type, text) do { \
415 const char *_text = text; \
416 ssize_t _len = strlen(text); \
417 REPORT_ERROR_LEN(state, err_type, _text, _len); \
418 } while (0)
419
420 #define REPORT_ERROR_BUF(state, err_type, buf) \
421 REPORT_ERROR_LEN(state, err_type, STR(buf), LEN(buf))
422
423
424 /*
425 * Outputs and state changes are interleaved, so we must maintain separate
426 * offsets for header and body segments.
427 */
428 #define HEAD_OUT(ptr, info, len) do { \
429 if ((ptr)->head_out) { \
430 (ptr)->head_out((ptr)->app_context, (ptr)->curr_state, \
431 (info), (ptr)->output_buffer, (ptr)->head_offset); \
432 (ptr)->head_offset += (len) + 1; \
433 } \
434 } while(0)
435
436 #define BODY_OUT(ptr, rec_type, text, len) do { \
437 if ((ptr)->body_out) { \
438 (ptr)->body_out((ptr)->app_context, (rec_type), \
439 (text), (len), (ptr)->body_offset); \
440 (ptr)->body_offset += (len) + 1; \
441 } \
442 } while(0)
443
444 /* mime_state_push - push boundary onto stack */
445
mime_state_push(MIME_STATE * state,int def_ctype,int def_stype,const char * boundary)446 static void mime_state_push(MIME_STATE *state, int def_ctype, int def_stype,
447 const char *boundary)
448 {
449 MIME_STACK *stack;
450
451 /*
452 * RFC 2046 mandates that a boundary string be up to 70 characters long.
453 * Some MTAs, including Postfix, include the fully-qualified MTA name
454 * which can be longer, so we are willing to handle boundary strings that
455 * exceed the RFC specification. We allow for message headers of up to
456 * var_header_limit characters. In order to avoid denial of service, we
457 * have to impose a configurable limit on the amount of text that we are
458 * willing to store as a boundary string. Despite this truncation way we
459 * will still correctly detect all intermediate boundaries and all the
460 * message headers that follow those boundaries.
461 */
462 state->nesting_level += 1;
463 stack = (MIME_STACK *) mymalloc(sizeof(*stack));
464 stack->def_ctype = def_ctype;
465 stack->def_stype = def_stype;
466 if ((stack->bound_len = strlen(boundary)) > var_mime_bound_len)
467 stack->bound_len = var_mime_bound_len;
468 stack->boundary = mystrndup(boundary, stack->bound_len);
469 stack->next = state->stack;
470 state->stack = stack;
471 if (msg_verbose)
472 msg_info("PUSH boundary %s", stack->boundary);
473 }
474
475 /* mime_state_pop - pop boundary from stack */
476
mime_state_pop(MIME_STATE * state)477 static void mime_state_pop(MIME_STATE *state)
478 {
479 MIME_STACK *stack;
480
481 if ((stack = state->stack) == 0)
482 msg_panic("mime_state_pop: there is no stack");
483 if (msg_verbose)
484 msg_info("POP boundary %s", stack->boundary);
485 state->nesting_level -= 1;
486 state->stack = stack->next;
487 myfree(stack->boundary);
488 myfree((void *) stack);
489 }
490
491 /* mime_state_alloc - create MIME state machine */
492
mime_state_alloc(int flags,MIME_STATE_HEAD_OUT head_out,MIME_STATE_ANY_END head_end,MIME_STATE_BODY_OUT body_out,MIME_STATE_ANY_END body_end,MIME_STATE_ERR_PRINT err_print,void * context)493 MIME_STATE *mime_state_alloc(int flags,
494 MIME_STATE_HEAD_OUT head_out,
495 MIME_STATE_ANY_END head_end,
496 MIME_STATE_BODY_OUT body_out,
497 MIME_STATE_ANY_END body_end,
498 MIME_STATE_ERR_PRINT err_print,
499 void *context)
500 {
501 MIME_STATE *state;
502
503 state = (MIME_STATE *) mymalloc(sizeof(*state));
504
505 /* Volatile members. */
506 state->err_flags = 0;
507 state->body_offset = 0; /* XXX */
508 SET_MIME_STATE(state, MIME_STATE_PRIMARY,
509 MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
510 MIME_ENC_7BIT, MIME_ENC_7BIT);
511 state->output_buffer = vstring_alloc(100);
512 state->prev_rec_type = 0;
513 state->stack = 0;
514 state->token_buffer = vstring_alloc(1);
515 state->nesting_level = -1; /* BC Fix 20170512 */
516
517 /* Static members. */
518 state->static_flags = flags;
519 state->head_out = head_out;
520 state->head_end = head_end;
521 state->body_out = body_out;
522 state->body_end = body_end;
523 state->err_print = err_print;
524 state->app_context = context;
525 return (state);
526 }
527
528 /* mime_state_free - destroy MIME state machine */
529
mime_state_free(MIME_STATE * state)530 MIME_STATE *mime_state_free(MIME_STATE *state)
531 {
532 vstring_free(state->output_buffer);
533 while (state->stack)
534 mime_state_pop(state);
535 if (state->token_buffer)
536 vstring_free(state->token_buffer);
537 myfree((void *) state);
538 return (0);
539 }
540
541 /* mime_state_content_type - process content-type header */
542
mime_state_content_type(MIME_STATE * state,const HEADER_OPTS * header_info)543 static void mime_state_content_type(MIME_STATE *state,
544 const HEADER_OPTS *header_info)
545 {
546 const char *cp;
547 ssize_t tok_count;
548 int def_ctype;
549 int def_stype;
550
551 #define TOKEN_MATCH(tok, text) \
552 ((tok).type == HEADER_TOK_TOKEN && strcasecmp((tok).u.value, (text)) == 0)
553
554 #define RFC2045_TSPECIALS "()<>@,;:\\\"/[]?="
555
556 #define PARSE_CONTENT_TYPE_HEADER(state, ptr) \
557 header_token(state->token, MIME_MAX_TOKEN, \
558 state->token_buffer, ptr, RFC2045_TSPECIALS, ';')
559
560 cp = STR(state->output_buffer) + strlen(header_info->name) + 1;
561 if ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) > 0) {
562
563 /*
564 * text/whatever. Right now we don't really care if it is plain or
565 * not, but we may want to recognize subtypes later, and then this
566 * code can serve as an example.
567 */
568 if (TOKEN_MATCH(state->token[0], "text")) {
569 state->curr_ctype = MIME_CTYPE_TEXT;
570 if (tok_count >= 3
571 && state->token[1].type == '/'
572 && TOKEN_MATCH(state->token[2], "plain"))
573 state->curr_stype = MIME_STYPE_PLAIN;
574 else
575 state->curr_stype = MIME_STYPE_OTHER;
576 return;
577 }
578
579 /*
580 * message/whatever body parts start with another block of message
581 * headers that we may want to look at. The partial and external-body
582 * subtypes cannot be subjected to 8-bit -> 7-bit conversion, so we
583 * must properly recognize them.
584 */
585 if (TOKEN_MATCH(state->token[0], "message")) {
586 state->curr_ctype = MIME_CTYPE_MESSAGE;
587 state->curr_stype = MIME_STYPE_OTHER;
588 if (tok_count >= 3
589 && state->token[1].type == '/') {
590 if (TOKEN_MATCH(state->token[2], "rfc822"))
591 state->curr_stype = MIME_STYPE_RFC822;
592 else if (TOKEN_MATCH(state->token[2], "partial"))
593 state->curr_stype = MIME_STYPE_PARTIAL;
594 else if (TOKEN_MATCH(state->token[2], "external-body"))
595 state->curr_stype = MIME_STYPE_EXTERN_BODY;
596 else if (TOKEN_MATCH(state->token[2], "global"))
597 state->curr_stype = MIME_STYPE_GLOBAL;
598 }
599 return;
600 }
601
602 /*
603 * multipart/digest has default content type message/rfc822,
604 * multipart/whatever has default content type text/plain.
605 */
606 if (TOKEN_MATCH(state->token[0], "multipart")) {
607 state->curr_ctype = MIME_CTYPE_MULTIPART;
608 if (tok_count >= 3
609 && state->token[1].type == '/'
610 && TOKEN_MATCH(state->token[2], "digest")) {
611 def_ctype = MIME_CTYPE_MESSAGE;
612 def_stype = MIME_STYPE_RFC822;
613 } else {
614 def_ctype = MIME_CTYPE_TEXT;
615 def_stype = MIME_STYPE_PLAIN;
616 }
617
618 /*
619 * Yes, this is supposed to capture multiple boundary strings,
620 * which are illegal and which could be used to hide content in
621 * an implementation dependent manner. The code below allows us
622 * to find embedded message headers as long as the sender uses
623 * only one of these same-level boundary strings.
624 *
625 * Yes, this is supposed to ignore the boundary value type.
626 */
627 while ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) >= 0) {
628 if (tok_count >= 3
629 && TOKEN_MATCH(state->token[0], "boundary")
630 && state->token[1].type == '=') {
631 if (state->nesting_level > var_mime_maxdepth) {
632 if (state->static_flags & MIME_OPT_REPORT_NESTING)
633 REPORT_ERROR_BUF(state, MIME_ERR_NESTING,
634 state->output_buffer);
635 } else {
636 mime_state_push(state, def_ctype, def_stype,
637 state->token[2].u.value);
638 }
639 }
640 }
641 }
642 return;
643 }
644
645 /*
646 * other/whatever.
647 */
648 else {
649 state->curr_ctype = MIME_CTYPE_OTHER;
650 return;
651 }
652 }
653
654 /* mime_state_content_encoding - process content-transfer-encoding header */
655
mime_state_content_encoding(MIME_STATE * state,const HEADER_OPTS * header_info)656 static void mime_state_content_encoding(MIME_STATE *state,
657 const HEADER_OPTS *header_info)
658 {
659 const char *cp;
660 const MIME_ENCODING *cmp;
661
662 #define PARSE_CONTENT_ENCODING_HEADER(state, ptr) \
663 header_token(state->token, 1, state->token_buffer, ptr, (char *) 0, 0)
664
665 /*
666 * Do content-transfer-encoding header. Never set the encoding domain to
667 * something other than 7bit, 8bit or binary, even if we don't recognize
668 * the input.
669 */
670 cp = STR(state->output_buffer) + strlen(header_info->name) + 1;
671 if (PARSE_CONTENT_ENCODING_HEADER(state, &cp) > 0
672 && state->token[0].type == HEADER_TOK_TOKEN) {
673 for (cmp = mime_encoding_map; cmp->name != 0; cmp++) {
674 if (strcasecmp(state->token[0].u.value, cmp->name) == 0) {
675 state->curr_encoding = cmp->encoding;
676 state->curr_domain = cmp->domain;
677 break;
678 }
679 }
680 }
681 }
682
683 /* mime_state_enc_name - encoding to printable form */
684
mime_state_enc_name(int encoding)685 static const char *mime_state_enc_name(int encoding)
686 {
687 const MIME_ENCODING *cmp;
688
689 for (cmp = mime_encoding_map; cmp->name != 0; cmp++)
690 if (encoding == cmp->encoding)
691 return (cmp->name);
692 return ("unknown");
693 }
694
695 /* mime_state_downgrade - convert 8-bit data to quoted-printable */
696
mime_state_downgrade(MIME_STATE * state,int rec_type,const char * text,ssize_t len)697 static void mime_state_downgrade(MIME_STATE *state, int rec_type,
698 const char *text, ssize_t len)
699 {
700 static char hexchars[] = "0123456789ABCDEF";
701 const unsigned char *cp;
702 int ch;
703
704 #define QP_ENCODE(buffer, ch) { \
705 VSTRING_ADDCH(buffer, '='); \
706 VSTRING_ADDCH(buffer, hexchars[(ch >> 4) & 0xff]); \
707 VSTRING_ADDCH(buffer, hexchars[ch & 0xf]); \
708 }
709
710 /*
711 * Insert a soft line break when the output reaches a critical length
712 * before we reach a hard line break.
713 */
714 for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++) {
715 /* Critical length before hard line break. */
716 if (LEN(state->output_buffer) > 72) {
717 VSTRING_ADDCH(state->output_buffer, '=');
718 VSTRING_TERMINATE(state->output_buffer);
719 BODY_OUT(state, REC_TYPE_NORM,
720 STR(state->output_buffer),
721 LEN(state->output_buffer));
722 VSTRING_RESET(state->output_buffer);
723 }
724 /* Append the next character. */
725 ch = *cp;
726 if ((ch < 32 && ch != '\t') || ch == '=' || ch > 126) {
727 QP_ENCODE(state->output_buffer, ch);
728 } else {
729 VSTRING_ADDCH(state->output_buffer, ch);
730 }
731 }
732
733 /*
734 * Flush output after a hard line break (i.e. the end of a REC_TYPE_NORM
735 * record). Fix trailing whitespace as per the RFC: in the worst case,
736 * the output length will grow from 73 characters to 75 characters.
737 */
738 if (rec_type == REC_TYPE_NORM) {
739 if (LEN(state->output_buffer) > 0
740 && ((ch = END(state->output_buffer)[-1]) == ' ' || ch == '\t')) {
741 vstring_truncate(state->output_buffer,
742 LEN(state->output_buffer) - 1);
743 QP_ENCODE(state->output_buffer, ch);
744 }
745 VSTRING_TERMINATE(state->output_buffer);
746 BODY_OUT(state, REC_TYPE_NORM,
747 STR(state->output_buffer),
748 LEN(state->output_buffer));
749 VSTRING_RESET(state->output_buffer);
750 }
751 }
752
753 /* mime_state_update - update MIME state machine */
754
mime_state_update(MIME_STATE * state,int rec_type,const char * text,ssize_t len)755 int mime_state_update(MIME_STATE *state, int rec_type,
756 const char *text, ssize_t len)
757 {
758 int input_is_text = (rec_type == REC_TYPE_NORM
759 || rec_type == REC_TYPE_CONT);
760 MIME_STACK *sp;
761 const HEADER_OPTS *header_info;
762 const unsigned char *cp;
763
764 #define SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type) do { \
765 state->prev_rec_type = rec_type; \
766 return (state->err_flags); \
767 } while (0)
768
769 /*
770 * Be sure to flush any partial output line that might still be buffered
771 * up before taking any other "end of input" actions.
772 */
773 if (!input_is_text && state->prev_rec_type == REC_TYPE_CONT)
774 mime_state_update(state, REC_TYPE_NORM, "", 0);
775
776 /*
777 * This message state machine is kept simple for the sake of robustness.
778 * Standards evolve over time, and we want to be able to correctly
779 * process messages that are not yet defined. This state machine knows
780 * about headers and bodies, understands that multipart/whatever has
781 * multiple body parts with a header and body, and that message/whatever
782 * has message headers at the start of a body part.
783 */
784 switch (state->curr_state) {
785
786 /*
787 * First, deal with header information that we have accumulated from
788 * previous input records. Discard text that does not fit in a header
789 * buffer. Our limit is quite generous; Sendmail will refuse mail
790 * with only 32kbyte in all the message headers combined.
791 */
792 case MIME_STATE_PRIMARY:
793 case MIME_STATE_MULTIPART:
794 case MIME_STATE_NESTED:
795 if (LEN(state->output_buffer) > 0) {
796 if (input_is_text) {
797 if (state->prev_rec_type == REC_TYPE_CONT) {
798 if (LEN(state->output_buffer) < var_header_limit) {
799 vstring_strncat(state->output_buffer, text, len);
800 } else {
801 if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER)
802 REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER,
803 state->output_buffer);
804 }
805 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
806 }
807 if (IS_SPACE_TAB(*text)) {
808 if (LEN(state->output_buffer) < var_header_limit) {
809 vstring_strcat(state->output_buffer, "\n");
810 vstring_strncat(state->output_buffer, text, len);
811 } else {
812 if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER)
813 REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER,
814 state->output_buffer);
815 }
816 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
817 }
818 }
819
820 /*
821 * The input is (the beginning of) another message header, or is
822 * not a message header, or is not even a text record. With no
823 * more input to append to this saved header, do output
824 * processing and reset the saved header buffer. Hold on to the
825 * content transfer encoding header if we have to do a 8->7
826 * transformation, because the proper information depends on the
827 * content type header: message and multipart require a domain,
828 * leaf entities have either a transformation or a domain.
829 */
830 if (LEN(state->output_buffer) > 0) {
831 header_info = header_opts_find(STR(state->output_buffer));
832 if (!(state->static_flags & MIME_OPT_DISABLE_MIME)
833 && header_info != 0) {
834 if (header_info->type == HDR_CONTENT_TYPE)
835 mime_state_content_type(state, header_info);
836 if (header_info->type == HDR_CONTENT_TRANSFER_ENCODING)
837 mime_state_content_encoding(state, header_info);
838 }
839 if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_HEADER) != 0
840 && (state->err_flags & MIME_ERR_8BIT_IN_HEADER) == 0) {
841 for (cp = CU_CHAR_PTR(STR(state->output_buffer));
842 cp < CU_CHAR_PTR(END(state->output_buffer)); cp++)
843 if (*cp & 0200) {
844 REPORT_ERROR_BUF(state, MIME_ERR_8BIT_IN_HEADER,
845 state->output_buffer);
846 break;
847 }
848 }
849 /* Output routine is explicitly allowed to change the data. */
850 if (header_info == 0
851 || header_info->type != HDR_CONTENT_TRANSFER_ENCODING
852 || (state->static_flags & MIME_OPT_DOWNGRADE) == 0
853 || state->curr_domain == MIME_ENC_7BIT)
854 HEAD_OUT(state, header_info, len);
855 state->prev_rec_type = 0;
856 VSTRING_RESET(state->output_buffer);
857 }
858 }
859
860 /*
861 * With past header information moved out of the way, proceed with a
862 * clean slate.
863 */
864 if (input_is_text) {
865 ssize_t header_len;
866
867 /*
868 * See if this input is (the beginning of) a message header.
869 *
870 * Normalize obsolete "name space colon" syntax to "name colon".
871 * Things would be too confusing otherwise.
872 *
873 * Don't assume that the input is null terminated.
874 */
875 if ((header_len = is_header_buf(text, len)) > 0) {
876 vstring_strncpy(state->output_buffer, text, header_len);
877 for (text += header_len, len -= header_len;
878 len > 0 && IS_SPACE_TAB(*text);
879 text++, len--)
880 /* void */ ;
881 vstring_strncat(state->output_buffer, text, len);
882 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
883 }
884 }
885
886 /*
887 * This input terminates a block of message headers. When converting
888 * 8-bit to 7-bit mail, this is the right place to emit the correct
889 * content-transfer-encoding header. With message or multipart we
890 * specify 7bit, with leaf entities we specify quoted-printable.
891 *
892 * We're not going to convert non-text data into base 64. If they send
893 * arbitrary binary data as 8-bit text, then the data is already
894 * broken beyond recovery, because the Postfix SMTP server sanitizes
895 * record boundaries, treating broken record boundaries as CRLF.
896 *
897 * Clear the output buffer, we will need it for storage of the
898 * conversion result.
899 */
900 if ((state->static_flags & MIME_OPT_DOWNGRADE)
901 && state->curr_domain != MIME_ENC_7BIT) {
902 if ((state->curr_ctype == MIME_CTYPE_MESSAGE
903 && state->curr_stype != MIME_STYPE_GLOBAL)
904 || state->curr_ctype == MIME_CTYPE_MULTIPART)
905 cp = CU_CHAR_PTR("7bit");
906 else
907 cp = CU_CHAR_PTR("quoted-printable");
908 vstring_sprintf(state->output_buffer,
909 "Content-Transfer-Encoding: %s", cp);
910 HEAD_OUT(state, (HEADER_OPTS *) 0, len);
911 VSTRING_RESET(state->output_buffer);
912 }
913
914 /*
915 * This input terminates a block of message headers. Call the
916 * optional header end routine at the end of the first header block.
917 */
918 if (state->curr_state == MIME_STATE_PRIMARY && state->head_end)
919 state->head_end(state->app_context);
920
921 /*
922 * This is the right place to check if the sender specified an
923 * appropriate identity encoding (7bit, 8bit, binary) for multipart
924 * and for message.
925 */
926 if (state->static_flags & MIME_OPT_REPORT_ENCODING_DOMAIN) {
927 if (state->curr_ctype == MIME_CTYPE_MESSAGE) {
928 if (state->curr_stype == MIME_STYPE_PARTIAL
929 || state->curr_stype == MIME_STYPE_EXTERN_BODY) {
930 if (state->curr_domain != MIME_ENC_7BIT)
931 REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
932 mime_state_enc_name(state->curr_encoding));
933 }
934 /* EAI: message/global allows non-identity encoding. */
935 else if (state->curr_stype == MIME_STYPE_RFC822) {
936 if (state->curr_encoding != state->curr_domain)
937 REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
938 mime_state_enc_name(state->curr_encoding));
939 }
940 } else if (state->curr_ctype == MIME_CTYPE_MULTIPART) {
941 if (state->curr_encoding != state->curr_domain)
942 REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
943 mime_state_enc_name(state->curr_encoding));
944 }
945 }
946
947 /*
948 * Find out if the next body starts with its own message headers. In
949 * aggressive mode, examine headers of partial and external-body
950 * messages. Otherwise, treat such headers as part of the "body". Set
951 * the proper encoding information for the multipart prolog.
952 *
953 * XXX We parse headers inside message/* content even when the encoding
954 * is invalid (encoding != domain). With base64 we won't recognize
955 * any headers, and with quoted-printable we won't recognize MIME
956 * boundary strings, but the MIME processor will still resynchronize
957 * when it runs into the higher-level boundary string at the end of
958 * the message/* content. Although we will treat some headers as body
959 * text, we will still do a better job than if we were treating the
960 * entire message/* content as body text.
961 *
962 * XXX This changes state to MIME_STATE_NESTED and then outputs a body
963 * line, so that the body offset is not properly reset.
964 *
965 * Don't assume that the input is null terminated.
966 */
967 if (input_is_text) {
968 if (len == 0) {
969 state->body_offset = 0; /* XXX */
970 if (state->curr_ctype == MIME_CTYPE_MESSAGE) {
971 if (state->curr_stype == MIME_STYPE_RFC822)
972 SET_MIME_STATE(state, MIME_STATE_NESTED,
973 MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
974 MIME_ENC_7BIT, MIME_ENC_7BIT);
975 else if (state->curr_stype == MIME_STYPE_GLOBAL
976 && ((state->static_flags & MIME_OPT_DOWNGRADE) == 0
977 || state->curr_domain == MIME_ENC_7BIT))
978 /* XXX EAI: inspect encoded message/global. */
979 SET_MIME_STATE(state, MIME_STATE_NESTED,
980 MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
981 MIME_ENC_7BIT, MIME_ENC_7BIT);
982 else
983 SET_CURR_STATE(state, MIME_STATE_BODY);
984 } else if (state->curr_ctype == MIME_CTYPE_MULTIPART) {
985 SET_MIME_STATE(state, MIME_STATE_BODY,
986 MIME_CTYPE_OTHER, MIME_STYPE_OTHER,
987 MIME_ENC_7BIT, MIME_ENC_7BIT);
988 } else {
989 SET_CURR_STATE(state, MIME_STATE_BODY);
990 }
991 }
992
993 /*
994 * Invalid input. Force output of one blank line and jump to the
995 * body state, leaving all other state alone.
996 *
997 * We don't break legitimate mail by inserting a blank line
998 * separator between primary headers and a non-empty body. Many
999 * MTA's don't even record the presence or absence of this
1000 * separator, nor does the Milter protocol pass it on to Milter
1001 * applications.
1002 *
1003 * XXX We don't insert a blank line separator into attachments, to
1004 * avoid breaking digital signatures. Postfix shall not do a
1005 * worse mail delivery job than MTAs that can't even parse MIME.
1006 * We switch to body state anyway, to avoid treating body text as
1007 * header text, and mis-interpreting or truncating it. The code
1008 * below for initial From_ lines is for educational purposes.
1009 *
1010 * Sites concerned about MIME evasion can use a MIME normalizer.
1011 * Postfix has a different mission.
1012 */
1013 else {
1014 if (msg_verbose)
1015 msg_info("garbage in %s header",
1016 state->curr_state == MIME_STATE_MULTIPART ? "multipart" :
1017 state->curr_state == MIME_STATE_PRIMARY ? "primary" :
1018 state->curr_state == MIME_STATE_NESTED ? "nested" :
1019 "other");
1020 switch (state->curr_state) {
1021 case MIME_STATE_PRIMARY:
1022 BODY_OUT(state, REC_TYPE_NORM, "", 0);
1023 SET_CURR_STATE(state, MIME_STATE_BODY);
1024 break;
1025 #if 0
1026 case MIME_STATE_NESTED:
1027 if (state->body_offset <= 1
1028 && rec_type == REC_TYPE_NORM
1029 && len > 7
1030 && (strncmp(text + (*text == '>'), "From ", 5) == 0
1031 || strncmp(text, "=46rom ", 7) == 0))
1032 break;
1033 /* FALLTHROUGH */
1034 #endif
1035 default:
1036 SET_CURR_STATE(state, MIME_STATE_BODY);
1037 break;
1038 }
1039 }
1040 }
1041
1042 /*
1043 * This input is not text. Go to body state, unconditionally.
1044 */
1045 else {
1046 SET_CURR_STATE(state, MIME_STATE_BODY);
1047 }
1048 /* FALLTHROUGH */
1049
1050 /*
1051 * Body text. Look for message boundaries, and recover from missing
1052 * boundary strings. Missing boundaries can happen in aggressive mode
1053 * with text/rfc822-headers or with message/partial. Ignore non-space
1054 * cruft after --boundary or --boundary--, because some MUAs do, and
1055 * because only perverse software would take advantage of this to
1056 * escape detection. We have to ignore trailing cruft anyway, because
1057 * our saved copy of the boundary string may have been truncated for
1058 * safety reasons.
1059 *
1060 * Optionally look for 8-bit data in content that was announced as, or
1061 * that defaults to, 7-bit. Unfortunately, we cannot turn this on by
1062 * default. Majordomo sends requests for approval that do not
1063 * propagate the MIME information from the enclosed message to the
1064 * message headers of the approval request.
1065 *
1066 * Set the proper state information after processing a message boundary
1067 * string.
1068 *
1069 * Don't look for boundary strings at the start of a continued record.
1070 *
1071 * Don't assume that the input is null terminated.
1072 */
1073 case MIME_STATE_BODY:
1074 if (input_is_text) {
1075 if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_7BIT_BODY) != 0
1076 && state->curr_encoding == MIME_ENC_7BIT
1077 && (state->err_flags & MIME_ERR_8BIT_IN_7BIT_BODY) == 0) {
1078 for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++)
1079 if (*cp & 0200) {
1080 REPORT_ERROR_LEN(state, MIME_ERR_8BIT_IN_7BIT_BODY,
1081 text, len);
1082 break;
1083 }
1084 }
1085 if (state->stack && state->prev_rec_type != REC_TYPE_CONT
1086 && len > 2 && text[0] == '-' && text[1] == '-') {
1087 for (sp = state->stack; sp != 0; sp = sp->next) {
1088 if (len >= 2 + sp->bound_len &&
1089 strncmp(text + 2, sp->boundary, sp->bound_len) == 0) {
1090 while (sp != state->stack)
1091 mime_state_pop(state);
1092 if (len >= 4 + sp->bound_len &&
1093 strncmp(text + 2 + sp->bound_len, "--", 2) == 0) {
1094 mime_state_pop(state);
1095 SET_MIME_STATE(state, MIME_STATE_BODY,
1096 MIME_CTYPE_OTHER, MIME_STYPE_OTHER,
1097 MIME_ENC_7BIT, MIME_ENC_7BIT);
1098 } else {
1099 SET_MIME_STATE(state, MIME_STATE_MULTIPART,
1100 sp->def_ctype, sp->def_stype,
1101 MIME_ENC_7BIT, MIME_ENC_7BIT);
1102 }
1103 break;
1104 }
1105 }
1106 }
1107 /* Put last for consistency with header output routine. */
1108 if ((state->static_flags & MIME_OPT_DOWNGRADE)
1109 && state->curr_domain != MIME_ENC_7BIT)
1110 mime_state_downgrade(state, rec_type, text, len);
1111 else
1112 BODY_OUT(state, rec_type, text, len);
1113 }
1114
1115 /*
1116 * The input is not a text record. Inform the application that this
1117 * is the last opportunity to send any pending output.
1118 */
1119 else {
1120 if (state->body_end)
1121 state->body_end(state->app_context);
1122 }
1123 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
1124
1125 /*
1126 * Oops. This can't happen.
1127 */
1128 default:
1129 msg_panic("mime_state_update: unknown state: %d", state->curr_state);
1130 }
1131 }
1132
1133 /*
1134 * Mime error to (DSN, text) mapping. Order matters; more serious errors
1135 * must precede less serious errors, because the error-to-text conversion
1136 * can report only one error.
1137 */
1138 static const MIME_STATE_DETAIL mime_err_detail[] = {
1139 MIME_ERR_NESTING, "5.6.0", "MIME nesting exceeds safety limit",
1140 MIME_ERR_TRUNC_HEADER, "5.6.0", "message header length exceeds safety limit",
1141 MIME_ERR_8BIT_IN_HEADER, "5.6.0", "improper use of 8-bit data in message header",
1142 MIME_ERR_8BIT_IN_7BIT_BODY, "5.6.0", "improper use of 8-bit data in message body",
1143 MIME_ERR_ENCODING_DOMAIN, "5.6.0", "invalid message/* or multipart/* encoding domain",
1144 0,
1145 };
1146
1147 /* mime_state_error - error code to string */
1148
mime_state_error(int error_code)1149 const char *mime_state_error(int error_code)
1150 {
1151 const MIME_STATE_DETAIL *mp;
1152
1153 if (error_code == 0)
1154 msg_panic("mime_state_error: there is no error");
1155 for (mp = mime_err_detail; mp->code; mp++)
1156 if (mp->code & error_code)
1157 return (mp->text);
1158 msg_panic("mime_state_error: unknown error code %d", error_code);
1159 }
1160
1161 /* mime_state_detail - error code to table entry with assorted data */
1162
mime_state_detail(int error_code)1163 const MIME_STATE_DETAIL *mime_state_detail(int error_code)
1164 {
1165 const MIME_STATE_DETAIL *mp;
1166
1167 if (error_code == 0)
1168 msg_panic("mime_state_detail: there is no error");
1169 for (mp = mime_err_detail; mp->code; mp++)
1170 if (mp->code & error_code)
1171 return (mp);
1172 msg_panic("mime_state_detail: unknown error code %d", error_code);
1173 }
1174
1175 #ifdef TEST
1176
1177 #include <stdlib.h>
1178 #include <stringops.h>
1179 #include <vstream.h>
1180 #include <msg_vstream.h>
1181 #include <rec_streamlf.h>
1182
1183 /*
1184 * Stress test the REC_TYPE_CONT/NORM handling, but don't break header
1185 * labels.
1186 */
1187 /*#define REC_LEN 40*/
1188
1189 #define REC_LEN 1024
1190
head_out(void * context,int class,const HEADER_OPTS * unused_info,VSTRING * buf,off_t offset)1191 static void head_out(void *context, int class, const HEADER_OPTS *unused_info,
1192 VSTRING *buf, off_t offset)
1193 {
1194 VSTREAM *stream = (VSTREAM *) context;
1195
1196 vstream_fprintf(stream, "%s %ld\t|%s\n",
1197 class == MIME_HDR_PRIMARY ? "MAIN" :
1198 class == MIME_HDR_MULTIPART ? "MULT" :
1199 class == MIME_HDR_NESTED ? "NEST" :
1200 "ERROR", (long) offset, STR(buf));
1201 }
1202
head_end(void * context)1203 static void head_end(void *context)
1204 {
1205 VSTREAM *stream = (VSTREAM *) context;
1206
1207 vstream_fprintf(stream, "HEADER END\n");
1208 }
1209
body_out(void * context,int rec_type,const char * buf,ssize_t len,off_t offset)1210 static void body_out(void *context, int rec_type, const char *buf, ssize_t len,
1211 off_t offset)
1212 {
1213 VSTREAM *stream = (VSTREAM *) context;
1214
1215 vstream_fprintf(stream, "BODY %c %ld\t|", rec_type, (long) offset);
1216 vstream_fwrite(stream, buf, len);
1217 if (rec_type == REC_TYPE_NORM)
1218 VSTREAM_PUTC('\n', stream);
1219 }
1220
body_end(void * context)1221 static void body_end(void *context)
1222 {
1223 VSTREAM *stream = (VSTREAM *) context;
1224
1225 vstream_fprintf(stream, "BODY END\n");
1226 }
1227
err_print(void * unused_context,int err_flag,const char * text,ssize_t len)1228 static void err_print(void *unused_context, int err_flag,
1229 const char *text, ssize_t len)
1230 {
1231 msg_warn("%s: %.*s", mime_state_error(err_flag),
1232 len < 100 ? (int) len : 100, text);
1233 }
1234
1235 int var_header_limit = 2000;
1236 int var_mime_maxdepth = 20;
1237 int var_mime_bound_len = 2000;
1238 char *var_drop_hdrs = DEF_DROP_HDRS;
1239
main(int unused_argc,char ** argv)1240 int main(int unused_argc, char **argv)
1241 {
1242 int rec_type;
1243 int last = 0;
1244 VSTRING *buf;
1245 MIME_STATE *state;
1246 int err;
1247
1248 /*
1249 * Initialize.
1250 */
1251 #define MIME_OPTIONS \
1252 (MIME_OPT_REPORT_8BIT_IN_7BIT_BODY \
1253 | MIME_OPT_REPORT_8BIT_IN_HEADER \
1254 | MIME_OPT_REPORT_ENCODING_DOMAIN \
1255 | MIME_OPT_REPORT_TRUNC_HEADER \
1256 | MIME_OPT_REPORT_NESTING \
1257 | MIME_OPT_DOWNGRADE)
1258
1259 msg_vstream_init(basename(argv[0]), VSTREAM_OUT);
1260 msg_verbose = 1;
1261 buf = vstring_alloc(10);
1262 state = mime_state_alloc(MIME_OPTIONS,
1263 head_out, head_end,
1264 body_out, body_end,
1265 err_print,
1266 (void *) VSTREAM_OUT);
1267
1268 /*
1269 * Main loop.
1270 */
1271 do {
1272 rec_type = rec_streamlf_get(VSTREAM_IN, buf, REC_LEN);
1273 VSTRING_TERMINATE(buf);
1274 err = mime_state_update(state, last = rec_type, STR(buf), LEN(buf));
1275 vstream_fflush(VSTREAM_OUT);
1276 } while (rec_type > 0);
1277
1278 /*
1279 * Error reporting.
1280 */
1281 if (err & MIME_ERR_TRUNC_HEADER)
1282 msg_warn("message header length exceeds safety limit");
1283 if (err & MIME_ERR_NESTING)
1284 msg_warn("MIME nesting exceeds safety limit");
1285 if (err & MIME_ERR_8BIT_IN_HEADER)
1286 msg_warn("improper use of 8-bit data in message header");
1287 if (err & MIME_ERR_8BIT_IN_7BIT_BODY)
1288 msg_warn("improper use of 8-bit data in message body");
1289 if (err & MIME_ERR_ENCODING_DOMAIN)
1290 msg_warn("improper message/* or multipart/* encoding domain");
1291
1292 /*
1293 * Cleanup.
1294 */
1295 mime_state_free(state);
1296 vstring_free(buf);
1297 exit(0);
1298 }
1299
1300 #endif
1301