1 /*	$NetBSD: cleanup_message.c,v 1.4 2022/10/08 16:12:45 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	cleanup_message 3
6 /* SUMMARY
7 /*	process message segment
8 /* SYNOPSIS
9 /*	#include "cleanup.h"
10 /*
11 /*	void	cleanup_message(state, type, buf, len)
12 /*	CLEANUP_STATE *state;
13 /*	int	type;
14 /*	const char *buf;
15 /*	ssize_t	len;
16 /* DESCRIPTION
17 /*	This module processes message content records and copies the
18 /*	result to the queue file.  It validates the input, rewrites
19 /*	sender/recipient addresses to canonical form, inserts missing
20 /*	message headers, and extracts information from message headers
21 /*	to be used later when generating the extracted output segment.
22 /*	This routine absorbs but does not emit the content to extracted
23 /*	boundary record.
24 /*
25 /*	Arguments:
26 /* .IP state
27 /*	Queue file and message processing state. This state is updated
28 /*	as records are processed and as errors happen.
29 /* .IP type
30 /*	Record type.
31 /* .IP buf
32 /*	Record content.
33 /* .IP len
34 /*	Record content length.
35 /* LICENSE
36 /* .ad
37 /* .fi
38 /*	The Secure Mailer license must be distributed with this software.
39 /* AUTHOR(S)
40 /*	Wietse Venema
41 /*	IBM T.J. Watson Research
42 /*	P.O. Box 704
43 /*	Yorktown Heights, NY 10598, USA
44 /*
45 /*	Wietse Venema
46 /*	Google, Inc.
47 /*	111 8th Avenue
48 /*	New York, NY 10011, USA
49 /*--*/
50 
51 /* System library. */
52 
53 #include <sys_defs.h>
54 #include <ctype.h>
55 #include <string.h>
56 #include <time.h>
57 #include <unistd.h>
58 
59 #ifdef STRCASECMP_IN_STRINGS_H
60 #include <strings.h>
61 #endif
62 
63 /* Utility library. */
64 
65 #include <msg.h>
66 #include <vstring.h>
67 #include <vstream.h>
68 #include <argv.h>
69 #include <split_at.h>
70 #include <mymalloc.h>
71 #include <stringops.h>
72 #include <nvtable.h>
73 
74 /* Global library. */
75 
76 #include <record.h>
77 #include <rec_type.h>
78 #include <cleanup_user.h>
79 #include <tok822.h>
80 #include <lex_822.h>
81 #include <header_opts.h>
82 #include <quote_822_local.h>
83 #include <mail_params.h>
84 #include <mail_date.h>
85 #include <mail_addr.h>
86 #include <is_header.h>
87 #include <ext_prop.h>
88 #include <mail_proto.h>
89 #include <mime_state.h>
90 #include <lex_822.h>
91 #include <dsn_util.h>
92 #include <conv_time.h>
93 #include <info_log_addr_form.h>
94 #include <hfrom_format.h>
95 
96 /* Application-specific. */
97 
98 #include "cleanup.h"
99 
100 /* cleanup_fold_header - wrap address list header */
101 
cleanup_fold_header(CLEANUP_STATE * state,VSTRING * header_buf)102 static void cleanup_fold_header(CLEANUP_STATE *state, VSTRING *header_buf)
103 {
104     char   *start_line = vstring_str(header_buf);
105     char   *end_line;
106     char   *next_line;
107     char   *line;
108 
109     /*
110      * A rewritten address list contains one address per line. The code below
111      * replaces newlines by spaces, to fit as many addresses on a line as
112      * possible (without rearranging the order of addresses). Prepending
113      * white space to the beginning of lines is delegated to the output
114      * routine.
115      */
116     for (line = start_line; line != 0; line = next_line) {
117 	end_line = line + strcspn(line, "\n");
118 	if (line > start_line) {
119 	    if (end_line - start_line < 70) {	/* TAB counts as one */
120 		line[-1] = ' ';
121 	    } else {
122 		start_line = line;
123 	    }
124 	}
125 	next_line = *end_line ? end_line + 1 : 0;
126     }
127     cleanup_out_header(state, header_buf);
128 }
129 
130 /* cleanup_extract_internal - save unquoted copy of extracted address */
131 
cleanup_extract_internal(VSTRING * buffer,TOK822 * addr)132 static char *cleanup_extract_internal(VSTRING *buffer, TOK822 *addr)
133 {
134 
135     /*
136      * A little routine to stash away a copy of an address that we extracted
137      * from a message header line.
138      */
139     tok822_internalize(buffer, addr->head, TOK822_STR_DEFL);
140     return (mystrdup(vstring_str(buffer)));
141 }
142 
143 /* cleanup_rewrite_sender - sender address rewriting */
144 
cleanup_rewrite_sender(CLEANUP_STATE * state,const HEADER_OPTS * hdr_opts,VSTRING * header_buf)145 static void cleanup_rewrite_sender(CLEANUP_STATE *state,
146 				           const HEADER_OPTS *hdr_opts,
147 				           VSTRING *header_buf)
148 {
149     TOK822 *tree;
150     TOK822 **addr_list;
151     TOK822 **tpp;
152     int     did_rewrite = 0;
153 
154     if (msg_verbose)
155 	msg_info("rewrite_sender: %s", hdr_opts->name);
156 
157     /*
158      * Parse the header line, rewrite each address found, and regenerate the
159      * header line. Finally, pipe the result through the header line folding
160      * routine.
161      */
162     tree = tok822_parse_limit(vstring_str(header_buf)
163 			      + strlen(hdr_opts->name) + 1,
164 			      var_token_limit);
165     addr_list = tok822_grep(tree, TOK822_ADDR);
166     for (tpp = addr_list; *tpp; tpp++) {
167 	did_rewrite |= cleanup_rewrite_tree(state->hdr_rewrite_context, *tpp);
168 	if (state->flags & CLEANUP_FLAG_MAP_OK) {
169 	    if (cleanup_send_canon_maps
170 		&& (cleanup_send_canon_flags & CLEANUP_CANON_FLAG_HDR_FROM))
171 		did_rewrite |=
172 		    cleanup_map11_tree(state, *tpp, cleanup_send_canon_maps,
173 				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
174 	    if (cleanup_comm_canon_maps
175 		&& (cleanup_comm_canon_flags & CLEANUP_CANON_FLAG_HDR_FROM))
176 		did_rewrite |=
177 		    cleanup_map11_tree(state, *tpp, cleanup_comm_canon_maps,
178 				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
179 	    if (cleanup_masq_domains
180 		&& (cleanup_masq_flags & CLEANUP_MASQ_FLAG_HDR_FROM))
181 		did_rewrite |=
182 		    cleanup_masquerade_tree(state, *tpp, cleanup_masq_domains);
183 	}
184     }
185     if (did_rewrite) {
186 	vstring_truncate(header_buf, strlen(hdr_opts->name));
187 	vstring_strcat(header_buf, ": ");
188 	tok822_externalize(header_buf, tree, TOK822_STR_HEAD);
189     }
190     myfree((void *) addr_list);
191     tok822_free_tree(tree);
192     if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
193 	if (did_rewrite)
194 	    cleanup_fold_header(state, header_buf);
195 	else
196 	    cleanup_out_header(state, header_buf);
197     }
198 }
199 
200 /* cleanup_rewrite_recip - recipient address rewriting */
201 
cleanup_rewrite_recip(CLEANUP_STATE * state,const HEADER_OPTS * hdr_opts,VSTRING * header_buf)202 static void cleanup_rewrite_recip(CLEANUP_STATE *state,
203 				          const HEADER_OPTS *hdr_opts,
204 				          VSTRING *header_buf)
205 {
206     TOK822 *tree;
207     TOK822 **addr_list;
208     TOK822 **tpp;
209     int     did_rewrite = 0;
210 
211     if (msg_verbose)
212 	msg_info("rewrite_recip: %s", hdr_opts->name);
213 
214     /*
215      * Parse the header line, rewrite each address found, and regenerate the
216      * header line. Finally, pipe the result through the header line folding
217      * routine.
218      */
219     tree = tok822_parse_limit(vstring_str(header_buf)
220 			      + strlen(hdr_opts->name) + 1,
221 			      var_token_limit);
222     addr_list = tok822_grep(tree, TOK822_ADDR);
223     for (tpp = addr_list; *tpp; tpp++) {
224 	did_rewrite |= cleanup_rewrite_tree(state->hdr_rewrite_context, *tpp);
225 	if (state->flags & CLEANUP_FLAG_MAP_OK) {
226 	    if (cleanup_rcpt_canon_maps
227 		&& (cleanup_rcpt_canon_flags & CLEANUP_CANON_FLAG_HDR_RCPT))
228 		did_rewrite |=
229 		    cleanup_map11_tree(state, *tpp, cleanup_rcpt_canon_maps,
230 				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
231 	    if (cleanup_comm_canon_maps
232 		&& (cleanup_comm_canon_flags & CLEANUP_CANON_FLAG_HDR_RCPT))
233 		did_rewrite |=
234 		    cleanup_map11_tree(state, *tpp, cleanup_comm_canon_maps,
235 				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
236 	    if (cleanup_masq_domains
237 		&& (cleanup_masq_flags & CLEANUP_MASQ_FLAG_HDR_RCPT))
238 		did_rewrite |=
239 		    cleanup_masquerade_tree(state, *tpp, cleanup_masq_domains);
240 	}
241     }
242     if (did_rewrite) {
243 	vstring_truncate(header_buf, strlen(hdr_opts->name));
244 	vstring_strcat(header_buf, ": ");
245 	tok822_externalize(header_buf, tree, TOK822_STR_HEAD);
246     }
247     myfree((void *) addr_list);
248     tok822_free_tree(tree);
249     if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
250 	if (did_rewrite)
251 	    cleanup_fold_header(state, header_buf);
252 	else
253 	    cleanup_out_header(state, header_buf);
254     }
255 }
256 
257 /* cleanup_act_log - log action with context */
258 
cleanup_act_log(CLEANUP_STATE * state,const char * action,const char * class,const char * content,const char * text)259 static void cleanup_act_log(CLEANUP_STATE *state,
260 			            const char *action, const char *class,
261 			            const char *content, const char *text)
262 {
263     const char *attr;
264 
265     if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_ORIGIN)) == 0)
266 	attr = "unknown";
267     vstring_sprintf(state->temp1, "%s: %s: %s %.200s from %s;",
268 		    state->queue_id, action, class, content, attr);
269     if (state->sender)
270 	vstring_sprintf_append(state->temp1, " from=<%s>",
271 			       info_log_addr_form_sender(state->sender));
272     if (state->recip)
273 	vstring_sprintf_append(state->temp1, " to=<%s>",
274 			       info_log_addr_form_recipient(state->recip));
275     if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_PROTO_NAME)) != 0)
276 	vstring_sprintf_append(state->temp1, " proto=%s", attr);
277     if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_HELO_NAME)) != 0)
278 	vstring_sprintf_append(state->temp1, " helo=<%s>", attr);
279     if (text && *text)
280 	vstring_sprintf_append(state->temp1, ": %s", text);
281     msg_info("%s", vstring_str(state->temp1));
282 }
283 
284 #define CLEANUP_ACT_CTXT_HEADER	"header"
285 #define CLEANUP_ACT_CTXT_BODY	"body"
286 #define CLEANUP_ACT_CTXT_ANY	"content"
287 
288 /* cleanup_act - act upon a header/body match */
289 
cleanup_act(CLEANUP_STATE * state,char * context,const char * buf,const char * value,const char * map_class)290 static const char *cleanup_act(CLEANUP_STATE *state, char *context,
291 			               const char *buf, const char *value,
292 			               const char *map_class)
293 {
294     const char *optional_text = value + strcspn(value, " \t");
295     int     command_len = optional_text - value;
296 
297 #ifdef DELAY_ACTION
298     int     defer_delay;
299 
300 #endif
301 
302     while (*optional_text && ISSPACE(*optional_text))
303 	optional_text++;
304 
305 #define STREQUAL(x,y,l) (strncasecmp((x), (y), (l)) == 0 && (y)[l] == 0)
306 #define CLEANUP_ACT_DROP 0
307 
308     /*
309      * CLEANUP_STAT_CONT and CLEANUP_STAT_DEFER both update the reason
310      * attribute, but CLEANUP_STAT_DEFER takes precedence. It terminates
311      * queue record processing, and prevents bounces from being sent.
312      */
313     if (STREQUAL(value, "REJECT", command_len)) {
314 	const CLEANUP_STAT_DETAIL *detail;
315 
316 	if (state->reason)
317 	    myfree(state->reason);
318 	if (*optional_text) {
319 	    state->reason = dsn_prepend("5.7.1", optional_text);
320 	    if (*state->reason != '4' && *state->reason != '5') {
321 		msg_warn("bad DSN action in %s -- need 4.x.x or 5.x.x",
322 			 optional_text);
323 		*state->reason = '4';
324 	    }
325 	} else {
326 	    detail = cleanup_stat_detail(CLEANUP_STAT_CONT);
327 	    state->reason = dsn_prepend(detail->dsn, detail->text);
328 	}
329 	if (*state->reason == '4')
330 	    state->errs |= CLEANUP_STAT_DEFER;
331 	else
332 	    state->errs |= CLEANUP_STAT_CONT;
333 	state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
334 	cleanup_act_log(state, "reject", context, buf, state->reason);
335 	return (buf);
336     }
337     if (STREQUAL(value, "WARN", command_len)) {
338 	cleanup_act_log(state, "warning", context, buf, optional_text);
339 	return (buf);
340     }
341     if (STREQUAL(value, "INFO", command_len)) {
342 	cleanup_act_log(state, "info", context, buf, optional_text);
343 	return (buf);
344     }
345     if (STREQUAL(value, "FILTER", command_len)) {
346 	if (*optional_text == 0) {
347 	    msg_warn("missing FILTER command argument in %s map", map_class);
348 	} else if (strchr(optional_text, ':') == 0) {
349 	    msg_warn("bad FILTER command %s in %s -- "
350 		     "need transport:destination",
351 		     optional_text, map_class);
352 	} else {
353 	    if (state->filter)
354 		myfree(state->filter);
355 	    state->filter = mystrdup(optional_text);
356 	    cleanup_act_log(state, "filter", context, buf, optional_text);
357 	}
358 	return (buf);
359     }
360     if (STREQUAL(value, "PASS", command_len)) {
361 	cleanup_act_log(state, "pass", context, buf, optional_text);
362 	state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
363 	return (buf);
364     }
365     if (STREQUAL(value, "DISCARD", command_len)) {
366 	cleanup_act_log(state, "discard", context, buf, optional_text);
367 	state->flags |= CLEANUP_FLAG_DISCARD;
368 	state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
369 	return (buf);
370     }
371     if (STREQUAL(value, "HOLD", command_len)) {
372 	if ((state->flags & (CLEANUP_FLAG_HOLD | CLEANUP_FLAG_DISCARD)) == 0) {
373 	    cleanup_act_log(state, "hold", context, buf, optional_text);
374 	    state->flags |= CLEANUP_FLAG_HOLD;
375 	}
376 	return (buf);
377     }
378 
379     /*
380      * The DELAY feature is disabled because it has too many problems. 1) It
381      * does not work on some remote file systems; 2) mail will be delivered
382      * anyway with "sendmail -q" etc.; 3) while the mail is queued it bogs
383      * down the deferred queue scan with huge amounts of useless disk I/O
384      * operations.
385      */
386 #ifdef DELAY_ACTION
387     if (STREQUAL(value, "DELAY", command_len)) {
388 	if ((state->flags & (CLEANUP_FLAG_HOLD | CLEANUP_FLAG_DISCARD)) == 0) {
389 	    if (*optional_text == 0) {
390 		msg_warn("missing DELAY argument in %s map", map_class);
391 	    } else if (conv_time(optional_text, &defer_delay, 's') == 0) {
392 		msg_warn("ignoring bad DELAY argument %s in %s map",
393 			 optional_text, map_class);
394 	    } else {
395 		cleanup_act_log(state, "delay", context, buf, optional_text);
396 		state->defer_delay = defer_delay;
397 	    }
398 	}
399 	return (buf);
400     }
401 #endif
402     if (STREQUAL(value, "PREPEND", command_len)) {
403 	if (*optional_text == 0) {
404 	    msg_warn("PREPEND action without text in %s map", map_class);
405 	} else if (strcmp(context, CLEANUP_ACT_CTXT_HEADER) == 0) {
406 	    if (!is_header(optional_text)) {
407 		msg_warn("bad PREPEND header text \"%s\" in %s map -- "
408 			 "need \"headername: headervalue\"",
409 			 optional_text, map_class);
410 	    }
411 
412 	    /*
413 	     * By design, cleanup_out_header() may modify content. Play safe
414 	     * and prepare for future developments.
415 	     */
416 	    else {
417 		VSTRING *temp;
418 
419 		cleanup_act_log(state, "prepend", context, buf, optional_text);
420 		temp = vstring_strcpy(vstring_alloc(strlen(optional_text)),
421 				      optional_text);
422 		cleanup_out_header(state, temp);
423 		vstring_free(temp);
424 	    }
425 	} else {
426 	    cleanup_act_log(state, "prepend", context, buf, optional_text);
427 	    cleanup_out_string(state, REC_TYPE_NORM, optional_text);
428 	}
429 	return (buf);
430     }
431     if (STREQUAL(value, "REPLACE", command_len)) {
432 	if (*optional_text == 0) {
433 	    msg_warn("REPLACE action without text in %s map", map_class);
434 	    return (buf);
435 	} else if (strcmp(context, CLEANUP_ACT_CTXT_HEADER) == 0
436 		   && !is_header(optional_text)) {
437 	    msg_warn("bad REPLACE header text \"%s\" in %s map -- "
438 		     "need \"headername: headervalue\"",
439 		     optional_text, map_class);
440 	    return (buf);
441 	} else {
442 	    cleanup_act_log(state, "replace", context, buf, optional_text);
443 	    return (mystrdup(optional_text));
444 	}
445     }
446     if (STREQUAL(value, "REDIRECT", command_len)) {
447 	if (strchr(optional_text, '@') == 0) {
448 	    msg_warn("bad REDIRECT target \"%s\" in %s map -- "
449 		     "need user@domain",
450 		     optional_text, map_class);
451 	} else {
452 	    if (state->redirect)
453 		myfree(state->redirect);
454 	    state->redirect = mystrdup(optional_text);
455 	    cleanup_act_log(state, "redirect", context, buf, optional_text);
456 	    state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
457 	}
458 	return (buf);
459     }
460     if (STREQUAL(value, "BCC", command_len)) {
461 	if (strchr(optional_text, '@') == 0) {
462 	    msg_warn("bad BCC address \"%s\" in %s map -- "
463 		     "need user@domain",
464 		     optional_text, map_class);
465 	} else {
466 	    if (state->hbc_rcpt == 0)
467 		state->hbc_rcpt = argv_alloc(1);
468 	    argv_add(state->hbc_rcpt, optional_text, (char *) 0);
469 	    cleanup_act_log(state, "bcc", context, buf, optional_text);
470 	}
471 	return (buf);
472     }
473     if (STREQUAL(value, "STRIP", command_len)) {
474 	cleanup_act_log(state, "strip", context, buf, optional_text);
475 	return (CLEANUP_ACT_DROP);
476     }
477     /* Allow and ignore optional text after the action. */
478 
479     if (STREQUAL(value, "IGNORE", command_len))
480 	return (CLEANUP_ACT_DROP);
481 
482     if (STREQUAL(value, "DUNNO", command_len))	/* preferred */
483 	return (buf);
484 
485     if (STREQUAL(value, "OK", command_len))	/* compat */
486 	return (buf);
487 
488     msg_warn("unknown command in %s map: %s", map_class, value);
489     return (buf);
490 }
491 
492 /* cleanup_header_callback - process one complete header line */
493 
cleanup_header_callback(void * context,int header_class,const HEADER_OPTS * hdr_opts,VSTRING * header_buf,off_t unused_offset)494 static void cleanup_header_callback(void *context, int header_class,
495 				            const HEADER_OPTS *hdr_opts,
496 				            VSTRING *header_buf,
497 				            off_t unused_offset)
498 {
499     CLEANUP_STATE *state = (CLEANUP_STATE *) context;
500     const char *myname = "cleanup_header_callback";
501     char   *hdrval;
502     struct code_map {
503 	const char *name;
504 	const char *encoding;
505     };
506     static struct code_map code_map[] = {	/* RFC 2045 */
507 	"7bit", MAIL_ATTR_ENC_7BIT,
508 	"8bit", MAIL_ATTR_ENC_8BIT,
509 	"binary", MAIL_ATTR_ENC_8BIT,	/* XXX Violation */
510 	"quoted-printable", MAIL_ATTR_ENC_7BIT,
511 	"base64", MAIL_ATTR_ENC_7BIT,
512 	0,
513     };
514     struct code_map *cmp;
515     MAPS   *checks;
516     const char *map_class;
517 
518     if (msg_verbose)
519 	msg_info("%s: '%.200s'", myname, vstring_str(header_buf));
520 
521     /*
522      * Crude header filtering. This stops malware that isn't sophisticated
523      * enough to use fancy header encodings.
524      */
525 #define CHECK(class, maps, var_name) \
526 	(header_class == class && (map_class = var_name, checks = maps) != 0)
527 
528     if (hdr_opts && (hdr_opts->flags & HDR_OPT_MIME))
529 	header_class = MIME_HDR_MULTIPART;
530 
531     /* Update the Received: header count before maybe dropping headers below. */
532     if (hdr_opts && hdr_opts->type == HDR_RECEIVED)
533 	state->hop_count += 1;
534 
535     if ((state->flags & CLEANUP_FLAG_FILTER)
536 	&& (CHECK(MIME_HDR_PRIMARY, cleanup_header_checks, VAR_HEADER_CHECKS)
537     || CHECK(MIME_HDR_MULTIPART, cleanup_mimehdr_checks, VAR_MIMEHDR_CHECKS)
538     || CHECK(MIME_HDR_NESTED, cleanup_nesthdr_checks, VAR_NESTHDR_CHECKS))) {
539 	char   *header = vstring_str(header_buf);
540 	const char *value;
541 
542 	if ((value = maps_find(checks, header, 0)) != 0) {
543 	    const char *result;
544 
545 	    if ((result = cleanup_act(state, CLEANUP_ACT_CTXT_HEADER,
546 				      header, value, map_class))
547 		== CLEANUP_ACT_DROP) {
548 		return;
549 	    } else if (result != header) {
550 		vstring_strcpy(header_buf, result);
551 		hdr_opts = header_opts_find(result);
552 		myfree((void *) result);
553 	    }
554 	} else if (checks->error) {
555 	    msg_warn("%s: %s map lookup problem -- "
556 		     "message not accepted, try again later",
557 		     state->queue_id, checks->title);
558 	    state->errs |= CLEANUP_STAT_WRITE;
559 	}
560     }
561 
562     /*
563      * If this is an "unknown" header, just copy it to the output without
564      * even bothering to fold long lines. cleanup_out() will split long
565      * headers that do not fit a REC_TYPE_NORM record.
566      */
567     if (hdr_opts == 0) {
568 	cleanup_out_header(state, header_buf);
569 	return;
570     }
571 
572     /*
573      * Allow 8-bit type info to override 7-bit type info. XXX Should reuse
574      * the effort that went into MIME header parsing.
575      */
576     hdrval = vstring_str(header_buf) + strlen(hdr_opts->name) + 1;
577     while (ISSPACE(*hdrval))
578 	hdrval++;
579     /* trimblanks(hdrval, 0)[0] = 0; */
580     if (var_auto_8bit_enc_hdr
581 	&& hdr_opts->type == HDR_CONTENT_TRANSFER_ENCODING) {
582 	for (cmp = code_map; cmp->name != 0; cmp++) {
583 	    if (strcasecmp(hdrval, cmp->name) == 0) {
584 		if (strcasecmp(cmp->encoding, MAIL_ATTR_ENC_8BIT) == 0)
585 		    nvtable_update(state->attr, MAIL_ATTR_ENCODING,
586 				   cmp->encoding);
587 		break;
588 	    }
589 	}
590     }
591 
592     /*
593      * Copy attachment etc. header blocks without further inspection.
594      */
595     if (header_class != MIME_HDR_PRIMARY) {
596 	cleanup_out_header(state, header_buf);
597 	return;
598     }
599 
600     /*
601      * Known header. Remember that we have seen at least one. Find out what
602      * we should do with this header: delete, count, rewrite. Note that we
603      * should examine headers even when they will be deleted from the output,
604      * because the addresses in those headers might be needed elsewhere.
605      *
606      * XXX 2821: Return-path breakage.
607      *
608      * RFC 821 specifies: When the receiver-SMTP makes the "final delivery" of a
609      * message it inserts at the beginning of the mail data a return path
610      * line.  The return path line preserves the information in the
611      * <reverse-path> from the MAIL command.  Here, final delivery means the
612      * message leaves the SMTP world.  Normally, this would mean it has been
613      * delivered to the destination user, but in some cases it may be further
614      * processed and transmitted by another mail system.
615      *
616      * And that is what Postfix implements. Delivery agents prepend
617      * Return-Path:. In order to avoid cluttering up the message with
618      * possibly inconsistent Return-Path: information (the sender can change
619      * as the result of mail forwarding or mailing list delivery), Postfix
620      * removes any existing Return-Path: headers.
621      *
622      * RFC 2821 Section 4.4 specifies:    A message-originating SMTP system
623      * SHOULD NOT send a message that already contains a Return-path header.
624      * SMTP servers performing a relay function MUST NOT inspect the message
625      * data, and especially not to the extent needed to determine if
626      * Return-path headers are present. SMTP servers making final delivery
627      * MAY remove Return-path headers before adding their own.
628      */
629     else {
630 	state->headers_seen |= (1 << hdr_opts->type);
631 	if (hdr_opts->type == HDR_MESSAGE_ID)
632 	    msg_info("%s: message-id=%s", state->queue_id, hdrval);
633 	if (hdr_opts->type == HDR_RESENT_MESSAGE_ID)
634 	    msg_info("%s: resent-message-id=%s", state->queue_id, hdrval);
635 	if (hdr_opts->type == HDR_RECEIVED) {
636 	    if (state->hop_count >= var_hopcount_limit) {
637 		msg_warn("%s: message rejected: hopcount exceeded",
638 			 state->queue_id);
639 		state->errs |= CLEANUP_STAT_HOPS;
640 	    }
641 	    /* Save our Received: header after maybe updating headers above. */
642 	    if (state->hop_count == 1)
643 		argv_add(state->auto_hdrs, vstring_str(header_buf), ARGV_END);
644 	}
645 	if (CLEANUP_OUT_OK(state)) {
646 	    if (hdr_opts->flags & HDR_OPT_RR)
647 		state->resent = "Resent-";
648 	    if ((hdr_opts->flags & HDR_OPT_SENDER)
649 		&& state->hdr_rewrite_context) {
650 		cleanup_rewrite_sender(state, hdr_opts, header_buf);
651 	    } else if ((hdr_opts->flags & HDR_OPT_RECIP)
652 		       && state->hdr_rewrite_context) {
653 		cleanup_rewrite_recip(state, hdr_opts, header_buf);
654 	    } else if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
655 		cleanup_out_header(state, header_buf);
656 	    }
657 	}
658     }
659 }
660 
661 /* cleanup_header_done_callback - insert missing message headers */
662 
cleanup_header_done_callback(void * context)663 static void cleanup_header_done_callback(void *context)
664 {
665     const char *myname = "cleanup_header_done_callback";
666     CLEANUP_STATE *state = (CLEANUP_STATE *) context;
667     char    time_stamp[1024];		/* XXX locale dependent? */
668     struct tm *tp;
669     TOK822 *token;
670     TOK822 *dummy_token;
671     time_t  tv;
672 
673     /*
674      * XXX Workaround: when we reach the end of headers, mime_state_update()
675      * may execute up to three call-backs before returning to the caller:
676      * head_out(), head_end(), and body_out() or body_end(). As long as
677      * call-backs don't return a result, each call-back has to check for
678      * itself if the previous call-back experienced a problem.
679      */
680     if (CLEANUP_OUT_OK(state) == 0)
681 	return;
682 
683     /*
684      * Future proofing: the Milter client's header suppression algorithm
685      * assumes that the MTA prepends its own Received: header. This
686      * assumption may be violated after some source-code update. The
687      * following check ensures consistency, at least for local submission.
688      */
689     if (state->hop_count < 1) {
690 	msg_warn("%s: message rejected: no Received: header",
691 		 state->queue_id);
692 	state->errs |= CLEANUP_STAT_BAD;
693 	return;
694     }
695 
696     /*
697      * Add a missing (Resent-)Message-Id: header. The message ID gives the
698      * time in GMT units, plus the local queue ID.
699      *
700      * XXX Message-Id is not a required message header (RFC 822 and RFC 2822).
701      *
702      * XXX It is the queue ID non-inode bits that prevent messages from getting
703      * the same Message-Id within the same second.
704      *
705      * XXX An arbitrary amount of time may pass between the start of the mail
706      * transaction and the creation of a queue file. Since we guarantee queue
707      * ID uniqueness only within a second, we must ensure that the time in
708      * the message ID matches the queue ID creation time, as long as we use
709      * the queue ID in the message ID.
710      *
711      * XXX We log a dummy name=value record so that we (hopefully) don't break
712      * compatibility with existing logfile analyzers, and so that we don't
713      * complicate future code that wants to log more name=value attributes.
714      */
715     if ((state->hdr_rewrite_context || var_always_add_hdrs)
716 	&& (state->headers_seen & (1 << (state->resent[0] ?
717 			   HDR_RESENT_MESSAGE_ID : HDR_MESSAGE_ID))) == 0) {
718 	if (var_long_queue_ids) {
719 	    vstring_sprintf(state->temp1, "%s@%s",
720 			    state->queue_id, var_myhostname);
721 	} else {
722 	    tv = state->handle->ctime.tv_sec;
723 	    tp = gmtime(&tv);
724 	    strftime(time_stamp, sizeof(time_stamp), "%Y%m%d%H%M%S", tp);
725 	    vstring_sprintf(state->temp1, "%s.%s@%s",
726 			    time_stamp, state->queue_id, var_myhostname);
727 	}
728 	cleanup_out_format(state, REC_TYPE_NORM, "%sMessage-Id: <%s>",
729 			   state->resent, vstring_str(state->temp1));
730 	msg_info("%s: %smessage-id=<%s>",
731 		 state->queue_id, *state->resent ? "resent-" : "",
732 		 vstring_str(state->temp1));
733 	state->headers_seen |= (1 << (state->resent[0] ?
734 				   HDR_RESENT_MESSAGE_ID : HDR_MESSAGE_ID));
735     }
736     if ((state->headers_seen & (1 << HDR_MESSAGE_ID)) == 0)
737 	msg_info("%s: message-id=<>", state->queue_id);
738 
739     /*
740      * Add a missing (Resent-)Date: header. The date is in local time units,
741      * with the GMT offset at the end.
742      */
743     if ((state->hdr_rewrite_context || var_always_add_hdrs)
744 	&& (state->headers_seen & (1 << (state->resent[0] ?
745 				       HDR_RESENT_DATE : HDR_DATE))) == 0) {
746 	cleanup_out_format(state, REC_TYPE_NORM, "%sDate: %s",
747 		      state->resent, mail_date(state->arrival_time.tv_sec));
748     }
749 
750     /*
751      * Add a missing (Resent-)From: header.
752      */
753     if ((state->hdr_rewrite_context || var_always_add_hdrs)
754 	&& (state->headers_seen & (1 << (state->resent[0] ?
755 				       HDR_RESENT_FROM : HDR_FROM))) == 0) {
756 	quote_822_local(state->temp1, *state->sender ?
757 			state->sender : MAIL_ADDR_MAIL_DAEMON);
758 	if (*state->sender && state->fullname && *state->fullname) {
759 	    char   *cp;
760 
761 	    /* Enforce some sanity on full name content. */
762 	    while ((cp = strchr(state->fullname, '\r')) != 0
763 		   || (cp = strchr(state->fullname, '\n')) != 0)
764 		*cp = ' ';
765 
766 	    /*
767 	     * "From: phrase <route-addr>". Quote the phrase if it contains
768 	     * specials or the "%!" legacy address operators.
769 	     */
770 	    if (cleanup_hfrom_format == HFROM_FORMAT_CODE_STD) {
771 		vstring_sprintf(state->temp2, "%sFrom: ", state->resent);
772 		if (state->fullname[strcspn(state->fullname,
773 					    "%!" LEX_822_SPECIALS)] == 0) {
774 		    /* Normalize whitespace. */
775 		    token = tok822_scan_limit(state->fullname, &dummy_token,
776 					      var_token_limit);
777 		} else {
778 		    token = tok822_alloc(TOK822_QSTRING, state->fullname);
779 		}
780 		if (token) {
781 		    tok822_externalize(state->temp2, token, TOK822_STR_NONE);
782 		    tok822_free(token);
783 		    vstring_strcat(state->temp2, " ");
784 		}
785 		vstring_sprintf_append(state->temp2, "<%s>",
786 				       vstring_str(state->temp1));
787 	    }
788 
789 	    /*
790 	     * "From: addr-spec (ctext)". This is the obsolete form.
791 	     */
792 	    else {
793 		vstring_sprintf(state->temp2, "%sFrom: %s ",
794 				state->resent, vstring_str(state->temp1));
795 		vstring_sprintf(state->temp1, "(%s)", state->fullname);
796 		token = tok822_parse(vstring_str(state->temp1));
797 		tok822_externalize(state->temp2, token, TOK822_STR_NONE);
798 		tok822_free_tree(token);
799 	    }
800 	}
801 
802 	/*
803 	 * "From: addr-spec". This is the form in the absence of full name
804 	 * information, also used for mail from mailer-daemon.
805 	 */
806 	else {
807 	    vstring_sprintf(state->temp2, "%sFrom: %s",
808 			    state->resent, vstring_str(state->temp1));
809 	}
810 	CLEANUP_OUT_BUF(state, REC_TYPE_NORM, state->temp2);
811     }
812 
813     /*
814      * XXX 2821: Appendix B: The return address in the MAIL command SHOULD,
815      * if possible, be derived from the system's identity for the submitting
816      * (local) user, and the "From:" header field otherwise. If there is a
817      * system identity available, it SHOULD also be copied to the Sender
818      * header field if it is different from the address in the From header
819      * field.  (Any Sender field that was already there SHOULD be removed.)
820      * Similar wording appears in RFC 2822 section 3.6.2.
821      *
822      * Postfix presently does not insert a Sender: header if envelope and From:
823      * address differ. Older Postfix versions assumed that the envelope
824      * sender address specifies the system identity and inserted Sender:
825      * whenever envelope and From: differed. This was wrong with relayed
826      * mail, and was often not even desirable with original submissions.
827      *
828      * XXX 2822 Section 3.6.2, as well as RFC 822 Section 4.1: FROM headers can
829      * contain multiple addresses. If this is the case, then a Sender: header
830      * must be provided with a single address.
831      *
832      * Postfix does not count the number of addresses in a From: header
833      * (although doing so is trivial, once the address is parsed).
834      */
835 
836     /*
837      * Add a missing destination header.
838      */
839 #define VISIBLE_RCPT	((1 << HDR_TO) | (1 << HDR_RESENT_TO) \
840 			| (1 << HDR_CC) | (1 << HDR_RESENT_CC))
841 
842     if ((state->hdr_rewrite_context || var_always_add_hdrs)
843 	&& (state->headers_seen & VISIBLE_RCPT) == 0 && *var_rcpt_witheld) {
844 	if (!is_header(var_rcpt_witheld)) {
845 	    msg_warn("bad %s header text \"%s\" -- "
846 		     "need \"headername: headervalue\"",
847 		     VAR_RCPT_WITHELD, var_rcpt_witheld);
848 	} else {
849 	    cleanup_out_format(state, REC_TYPE_NORM, "%s", var_rcpt_witheld);
850 	}
851     }
852 
853     /*
854      * Place a dummy PTR record right after the last header so that we can
855      * append headers without having to worry about clobbering the
856      * end-of-content marker.
857      */
858     if (state->milters || cleanup_milters) {
859 	if ((state->append_hdr_pt_offset = vstream_ftell(state->dst)) < 0)
860 	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
861 	cleanup_out_format(state, REC_TYPE_PTR, REC_TYPE_PTR_FORMAT, 0L);
862 	if ((state->append_hdr_pt_target = vstream_ftell(state->dst)) < 0)
863 	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
864 	state->body_offset = state->append_hdr_pt_target;
865     }
866 }
867 
868 /* cleanup_body_callback - output one body record */
869 
cleanup_body_callback(void * context,int type,const char * buf,ssize_t len,off_t offset)870 static void cleanup_body_callback(void *context, int type,
871 				          const char *buf, ssize_t len,
872 				          off_t offset)
873 {
874     CLEANUP_STATE *state = (CLEANUP_STATE *) context;
875 
876     /*
877      * XXX Workaround: when we reach the end of headers, mime_state_update()
878      * may execute up to three call-backs before returning to the caller:
879      * head_out(), head_end(), and body_out() or body_end(). As long as
880      * call-backs don't return a result, each call-back has to check for
881      * itself if the previous call-back experienced a problem.
882      */
883     if (CLEANUP_OUT_OK(state) == 0)
884 	return;
885 
886     /*
887      * Crude message body content filter for emergencies. This code has
888      * several problems: it sees one line at a time; it looks at long lines
889      * only in chunks of line_length_limit (2048) characters; it is easily
890      * bypassed with encodings and other tricks.
891      */
892     if ((state->flags & CLEANUP_FLAG_FILTER)
893 	&& cleanup_body_checks
894 	&& (var_body_check_len == 0 || offset < var_body_check_len)) {
895 	const char *value;
896 
897 	if ((value = maps_find(cleanup_body_checks, buf, 0)) != 0) {
898 	    const char *result;
899 
900 	    if ((result = cleanup_act(state, CLEANUP_ACT_CTXT_BODY,
901 				      buf, value, VAR_BODY_CHECKS))
902 		== CLEANUP_ACT_DROP) {
903 		return;
904 	    } else if (result != buf) {
905 		cleanup_out(state, type, result, strlen(result));
906 		myfree((void *) result);
907 		return;
908 	    }
909 	} else if (cleanup_body_checks->error) {
910 	    msg_warn("%s: %s map lookup problem -- "
911 		     "message not accepted, try again later",
912 		     state->queue_id, cleanup_body_checks->title);
913 	    state->errs |= CLEANUP_STAT_WRITE;
914 	}
915     }
916     cleanup_out(state, type, buf, len);
917 }
918 
919 /* cleanup_message_headerbody - process message content, header and body */
920 
cleanup_message_headerbody(CLEANUP_STATE * state,int type,const char * buf,ssize_t len)921 static void cleanup_message_headerbody(CLEANUP_STATE *state, int type,
922 				               const char *buf, ssize_t len)
923 {
924     const char *myname = "cleanup_message_headerbody";
925     const MIME_STATE_DETAIL *detail;
926     const char *cp;
927     char   *dst;
928 
929     /*
930      * Reject unwanted characters.
931      *
932      * XXX Possible optimization: simplify the loop when the "reject" set
933      * contains only one character.
934      */
935     if ((state->flags & CLEANUP_FLAG_FILTER) && cleanup_reject_chars) {
936 	for (cp = buf; cp < buf + len; cp++) {
937 	    if (memchr(vstring_str(cleanup_reject_chars),
938 		       *(const unsigned char *) cp,
939 		       VSTRING_LEN(cleanup_reject_chars))) {
940 		cleanup_act(state, CLEANUP_ACT_CTXT_ANY,
941 			    buf, "REJECT disallowed character",
942 			    "character reject");
943 		return;
944 	    }
945 	}
946     }
947 
948     /*
949      * Strip unwanted characters. Don't overwrite the input.
950      *
951      * XXX Possible space+time optimization: use a bitset.
952      *
953      * XXX Possible optimization: simplify the loop when the "strip" set
954      * contains only one character.
955      *
956      * XXX Possible optimization: copy the input only if we really have to.
957      */
958     if ((state->flags & CLEANUP_FLAG_FILTER) && cleanup_strip_chars) {
959 	VSTRING_RESET(state->stripped_buf);
960 	VSTRING_SPACE(state->stripped_buf, len + 1);
961 	dst = vstring_str(state->stripped_buf);
962 	for (cp = buf; cp < buf + len; cp++)
963 	    if (!memchr(vstring_str(cleanup_strip_chars),
964 			*(const unsigned char *) cp,
965 			VSTRING_LEN(cleanup_strip_chars)))
966 		*dst++ = *cp;
967 	*dst = 0;
968 	buf = vstring_str(state->stripped_buf);
969 	len = dst - buf;
970     }
971 
972     /*
973      * Copy text record to the output.
974      */
975     if (type == REC_TYPE_NORM || type == REC_TYPE_CONT) {
976 	state->mime_errs = mime_state_update(state->mime_state, type, buf, len);
977     }
978 
979     /*
980      * If we have reached the end of the message content segment, record the
981      * current file position so we can compute the message size lateron.
982      */
983     else if (type == REC_TYPE_XTRA) {
984 	state->mime_errs = mime_state_update(state->mime_state, type, buf, len);
985 	if (state->milters || cleanup_milters)
986 	    /* Make room for body modification. */
987 	    cleanup_out_format(state, REC_TYPE_PTR, REC_TYPE_PTR_FORMAT, 0L);
988 	/* Ignore header truncation after primary message headers. */
989 	state->mime_errs &= ~MIME_ERR_TRUNC_HEADER;
990 	if (state->mime_errs && state->reason == 0) {
991 	    state->errs |= CLEANUP_STAT_CONT;
992 	    detail = mime_state_detail(state->mime_errs);
993 	    state->reason = dsn_prepend(detail->dsn, detail->text);
994 	}
995 	state->mime_state = mime_state_free(state->mime_state);
996 	if ((state->xtra_offset = vstream_ftell(state->dst)) < 0)
997 	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
998 	state->cont_length = state->xtra_offset - state->data_offset;
999 	state->action = cleanup_extracted;
1000     }
1001 
1002     /*
1003      * This should never happen.
1004      */
1005     else {
1006 	msg_warn("%s: message rejected: "
1007 	      "unexpected record type %d in message content", myname, type);
1008 	state->errs |= CLEANUP_STAT_BAD;
1009     }
1010 }
1011 
1012 /* cleanup_mime_error_callback - error report call-back routine */
1013 
cleanup_mime_error_callback(void * context,int err_code,const char * text,ssize_t len)1014 static void cleanup_mime_error_callback(void *context, int err_code,
1015 				              const char *text, ssize_t len)
1016 {
1017     CLEANUP_STATE *state = (CLEANUP_STATE *) context;
1018     const char *origin;
1019 
1020     /*
1021      * Message header too large errors are handled after the end of the
1022      * primary message headers.
1023      */
1024     if ((err_code & ~MIME_ERR_TRUNC_HEADER) != 0) {
1025 	if ((origin = nvtable_find(state->attr, MAIL_ATTR_LOG_ORIGIN)) == 0)
1026 	    origin = MAIL_ATTR_ORG_NONE;
1027 #define TEXT_LEN (len < 100 ? (int) len : 100)
1028 	msg_info("%s: reject: mime-error %s: %.*s from %s; from=<%s> to=<%s>",
1029 		 state->queue_id, mime_state_error(err_code), TEXT_LEN, text,
1030 		 origin, info_log_addr_form_sender(state->sender),
1031 		 info_log_addr_form_recipient(state->recip ?
1032 					      state->recip : "unknown"));
1033     }
1034 }
1035 
1036 /* cleanup_message - initialize message content segment */
1037 
cleanup_message(CLEANUP_STATE * state,int type,const char * buf,ssize_t len)1038 void    cleanup_message(CLEANUP_STATE *state, int type, const char *buf, ssize_t len)
1039 {
1040     const char *myname = "cleanup_message";
1041     int     mime_options;
1042 
1043     /*
1044      * Write the start-of-content segment marker.
1045      */
1046     cleanup_out_string(state, REC_TYPE_MESG, "");
1047     if ((state->data_offset = vstream_ftell(state->dst)) < 0)
1048 	msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
1049 
1050     /*
1051      * Set up MIME processing options, if any. MIME_OPT_DISABLE_MIME disables
1052      * special processing of Content-Type: headers, and thus, causes all text
1053      * after the primary headers to be treated as the message body.
1054      */
1055     mime_options = 0;
1056     if (var_disable_mime_input) {
1057 	mime_options |= MIME_OPT_DISABLE_MIME;
1058     } else {
1059 	/* Turn off content checks if bouncing or forwarding mail. */
1060 	if (state->flags & CLEANUP_FLAG_FILTER) {
1061 	    if (var_strict_8bitmime || var_strict_7bit_hdrs)
1062 		mime_options |= MIME_OPT_REPORT_8BIT_IN_HEADER;
1063 	    if (var_strict_8bitmime || var_strict_8bit_body)
1064 		mime_options |= MIME_OPT_REPORT_8BIT_IN_7BIT_BODY;
1065 	    if (var_strict_encoding)
1066 		mime_options |= MIME_OPT_REPORT_ENCODING_DOMAIN;
1067 	    if (var_strict_8bitmime || var_strict_7bit_hdrs
1068 		|| var_strict_8bit_body || var_strict_encoding
1069 		|| *var_header_checks || *var_mimehdr_checks
1070 		|| *var_nesthdr_checks)
1071 		mime_options |= MIME_OPT_REPORT_NESTING;
1072 	}
1073     }
1074     state->mime_state = mime_state_alloc(mime_options,
1075 					 cleanup_header_callback,
1076 					 cleanup_header_done_callback,
1077 					 cleanup_body_callback,
1078 					 (MIME_STATE_ANY_END) 0,
1079 					 cleanup_mime_error_callback,
1080 					 (void *) state);
1081 
1082     /*
1083      * XXX Workaround: truncate a long message header so that we don't exceed
1084      * the default Sendmail libmilter request size limit of 65535.
1085      */
1086 #define KLUDGE_HEADER_LIMIT	60000
1087     if ((cleanup_milters || state->milters)
1088 	&& var_header_limit > KLUDGE_HEADER_LIMIT)
1089 	var_header_limit = KLUDGE_HEADER_LIMIT;
1090 
1091     /*
1092      * Pass control to the header processing routine.
1093      */
1094     state->action = cleanup_message_headerbody;
1095     cleanup_message_headerbody(state, type, buf, len);
1096 }
1097