1;# $Id$ 2;# 3;# Copyright (c) 1990-2006, Raphael Manfredi 4;# 5;# You may redistribute only under the terms of the Artistic License, 6;# as specified in the README file that comes with the distribution. 7;# You may reuse parts of this distribution only within the terms of 8;# that same Artistic License; a copy of which may be found at the root 9;# of the source tree for mailagent 3.0. 10;# 11;# $Log: parse.pl,v $ 12;# Revision 3.0.1.16 2001/03/17 18:13:15 ram 13;# patch72: use the "domain" config var instead of mydomain 14;# 15;# Revision 3.0.1.15 2001/03/13 13:15:43 ram 16;# patch71: added fix for broken continuations in parse_mail() 17;# 18;# Revision 3.0.1.14 2001/01/10 16:55:56 ram 19;# patch69: allow direct IP numbers in Received fields 20;# 21;# Revision 3.0.1.13 1999/07/12 13:53:30 ram 22;# patch66: weird Received: logging moved to higher levels 23;# 24;# Revision 3.0.1.12 1998/07/28 17:04:44 ram 25;# patch62: become even more knowledgeable about Received lines 26;# 27;# Revision 3.0.1.11 1998/03/31 15:25:16 ram 28;# patch59: when "tofake" is turned off, disable faking of To: 29;# patch59: allow for missing "host1" in the Received: line parsing 30;# 31;# Revision 3.0.1.10 1997/09/15 15:16:00 ram 32;# patch57: improved Received: line parsing logic 33;# 34;# Revision 3.0.1.9 1997/02/20 11:45:34 ram 35;# patch55: improved Received: header parsing 36;# 37;# Revision 3.0.1.8 1997/01/07 18:33:09 ram 38;# patch52: now pre-extend memory by using existing message size 39;# patch52: enhanced Received: lines parsing 40;# 41;# Revision 3.0.1.7 1996/12/24 14:57:30 ram 42;# patch45: new relay_list() routine to parse Received lines 43;# patch45: now creates two pseudo headers: Envelope and Relayed 44;# 45;# Revision 3.0.1.6 1995/03/21 12:57:06 ram 46;# patch35: now allows spaces between header field name and the ':' delimiter 47;# 48;# Revision 3.0.1.5 1995/02/16 14:35:15 ram 49;# patch32: new routines header_prepend and header_append 50;# patch32: can now fake a missing From: line in header 51;# 52;# Revision 3.0.1.4 1995/01/25 15:27:08 ram 53;# patch27: ported to perl 5.0 PL0 54;# 55;# Revision 3.0.1.3 1994/09/22 14:33:38 ram 56;# patch12: builtins handled in &run_builtins to allow re-entrance 57;# 58;# Revision 3.0.1.2 1994/07/01 15:04:02 ram 59;# patch8: now systematically escape leading From if fromall is ON 60;# 61;# Revision 3.0.1.1 1994/04/25 15:18:14 ram 62;# patch7: global fix for From line escapes to make them configurable 63;# 64;# Revision 3.0 1993/11/29 13:49:05 ram 65;# Baseline for mailagent 3.0 netwide release. 66;# 67;# 68# 69# Parsing mail 70# 71 72# Parse the mail and fill-in the Header associative array. The special entries 73# All, Body and Head respectively hold the whole message, the body and the 74# header of the message. 75sub parse_mail { 76 local($file_name) = shift(@_); # Where mail is stored ("" for stdin) 77 local($head_only) = shift(@_); # Optional parameter: parse only header 78 local($last_header) = ""; # Name of last header (for continuations) 79 local($first_from) = ""; # The first From line in mails 80 local($lines) = 0; # Number of lines in the body 81 local($length) = 0; # Length of body, in bytes 82 local($last_was_nl) = 1; # True when last line was a '\n' (1 for EOH) 83 local($fd) = STDIN; # Where does the mail come from ? 84 local($field, $value); # Field and value for current line 85 local($_); 86 local($preext) = 0; 87 local($added) = 0; 88 local($curlen) = 0; 89 undef %Header; # Reset the whole structure holding message 90 91 if ($file_name ne '') { # Mail spooled in a file 92 unless(open(MAIL, $file_name)) { 93 &add_log("ERROR cannot open $file_name: $!"); 94 return; 95 } 96 $fd = MAIL; 97 $preext = -s MAIL; 98 } 99 $Userpath = ""; # Reset path from possible previous @PATH 100 101 # Pre-extend 'All', 'Body' and 'Head' 102 if ($preext <= 0) { 103 $preext = 100_000; 104 &add_log("preext uses fixed value ($preext)") if $loglvl > 19; 105 } else { 106 &add_log("preext uses file size ($preext)") if $loglvl > 19; 107 } 108 $preext += 500; # Extra room for From --> >From, etc... 109 110 $Header{'All'} = ' ' x $preext; 111 $Header{'Body'} = ' ' x $preext; 112 $Header{'Head'} = ' ' x 500; 113 $Header{'All'} = ''; 114 $Header{'Body'} = ''; 115 $Header{'Head'} = ''; 116 117 &add_log ("parsing mail" . ($head_only ? " header" : "")) if $loglvl > 18; 118 while (<$fd>) { 119 $added += length($_); 120 121 # If string extension goes beyond the pre-allocated space, re-extend 122 # by a big amount instead of letting perl realloc space. 123 if ($added > $preext) { 124 $curlen = length($Header{'All'}); 125 &add_log ("extended after $curlen bytes") if $loglvl > 19; 126 $Header{'All'} .= ' ' x $preext; 127 substr($Header{'All'}, $curlen) = ''; 128 $curlen = length($Header{'Body'}); 129 $Header{'Body'} .= ' ' x $preext; 130 substr($Header{'Body'}, $curlen) = ''; 131 $added = $added - $preext; 132 } 133 134 $Header{'All'} .= $_; 135 if (1../^$/) { # EOH is a blank line 136 next if /^$/; # Skip EOH marker 137 chop; 138 139 if (/^\s/) { # It is a continuation line 140 my $val = $_; 141 $val =~ s/^\s+/ /; # Swallow multiple spaces 142 $Header{$last_header} .= $val if $last_header ne ''; 143 &add_log("WARNING bad continuation in header, line $.") 144 if $last_header eq '' && $loglvl > 4; 145 } elsif (($field, $value) = /^([!-9;-~\w-]+):\s*(.*)/) { 146 # We found a new header field (i.e. it is not a continuation). 147 # Guarantee only one From: header line. If multiple From: are 148 # found, keep the last one. 149 # Multiple headers like 'Received' are separated by a new- 150 # line character. All headers end on a non new-line. 151 # Case is normalized before recording, so apparently-to will 152 # be recorded as Apparently-To but header is not changed. 153 $last_header = &header'normalize($field); # Normalize case 154 if ($last_header eq 'From' && defined $Header{$last_header}) { 155 $Header{$last_header} = $value; 156 &add_log("WARNING duplicate From in header, line $.") 157 if $loglvl > 4; 158 } elsif ($Header{$last_header} ne '') { 159 $Header{$last_header} .= "\n" . $value; 160 } else { 161 $Header{$last_header} .= $value; 162 } 163 } elsif (/^From\s+(\S+)/) { # The very first From line 164 $first_from = $1; 165 } else { 166 # Did not identify a header field nor a continuation 167 # Maybe there was a wrong header split somewhere? 168 # If we did not encounter a header yet, we're seeing garbage. 169 if ($last_header eq '') { 170 &add_log("ERROR ignoring header garbage, line $.: $_") 171 if $loglvl > 1; 172 next; # Skip insertion to 'Head' 173 } else { 174 &add_log("WARNING ". 175 "faking continuation for $last_header, line $." 176 ) if $loglvl > 4; 177 $_ = " " . $_; # Patch line for 'Head' 178 $Header{$last_header} .= $_; 179 } 180 } 181 182 $Header{'Head'} .= $_ . "\n"; # Record line in header 183 184 } else { 185 last if $head_only; # Stop parsing if only header wanted 186 $lines++; # One more line in body 187 $length += length($_); # Update length of message 188 # Protect potentially dangerous lines when asked to do so 189 # From could normally be mis-interpreted only after a blank line, 190 # but some "broken" User Agents also look for them everywhere... 191 # That's where fromall must be set to ON to escape all of them. 192 s/^From(\s)/>From$1/ if $last_was_nl && $cf'fromesc =~ /on/i; 193 $last_was_nl = /^$/ || $cf'fromall =~ /on/i; 194 $Header{'Body'} .= $_; 195 } 196 } 197 close MAIL if $file_name ne ''; 198 &header_prepend("$FAKE_FROM\n") unless $first_from; 199 &body_check unless $head_only; 200 &header_check($first_from, $lines); # Sanity checks 201} 202 203# Parse given header string into the supplied hash ref. 204# Do that silently if told to do so via $silent. 205# Returns: the value of the first From line, and fills %$href. 206sub header_parse { 207 my ($headers, $href, $silent) = @_; 208 # There is some code duplication with parse_mail() above 209 local($first_from); # First From line records sender 210 local($last_header); # Current normalized header field 211 local($value); # Value of current field 212 my $missing_warned = 0; 213 foreach (split(/\n/, $headers)) { 214 if (/^\s/) { # It is a continuation line 215 s/^\s+/ /; # Swallow multiple spaces 216 $href->{$last_header} .= $_ if $last_header ne ''; 217 } elsif (/^([!-9;-~\w-]+):\s*(.*)/) { # We found a new header 218 $value = $2; # Bug in perl 4.0 PL19 219 $last_header = &header'normalize($1); 220 $missing_warned = 0; 221 # Multiple headers like 'Received' are separated by a new- 222 # line character. All headers end on a non new-line. 223 if ($href->{$last_header} ne '') { 224 $href->{$last_header} .= "\n$value"; 225 } else { 226 $href->{$last_header} .= $value; 227 } 228 } elsif (/^From\s+(\S+)/) { # The very first From line 229 $first_from = $1; 230 } else { 231 # Did not identify a header field nor a continuation 232 # Maybe there was a wrong header split somewhere? 233 if ($last_header eq '') { 234 &add_log("ERROR ignoring leading header garbage: $_") 235 if $loglvl > 1 && !$silent; 236 } else { 237 &add_log("ERROR missing continuation for $last_header: $_") 238 if !$missing_warned && $loglvl > 1 && !$silent; 239 $href->{$last_header} .= " " . $_; 240 $missing_warned++; 241 } 242 } 243 } 244 return $first_from; 245} 246 247# Compute amount of lines listed in the header 248# We do NOT use $Header{'Lines'} here since this is a filtering value which 249# represents the number of lines in the *decoded* body, not the physical 250# number of lines in the message which the Lines header in the message is 251# supposed to represent. 252sub header_lines { 253 my ($lines) = $Header{'Head'} =~ /^Lines:\s*(\d+)/im; 254 return $lines; 255} 256 257# Set number of Lines in body and body Length to reflect reality 258# If the headers were physically present in the message, they are 259# updated as well. 260sub header_update_size { 261 # Cannot trust %Header to indicate whether the headers were present 262 # since we add these entries in any case... Use a crude way to detect 263 # presence then... 264 my $had_lines = $Header{'Head'} =~ /^Lines:/im; 265 my $had_length = $Header{'Head'} =~ /^Length:/im; 266 267 my $lines = $Header{'Body'} =~ tr/\n/\n/; 268 my $length = length($Header{'Body'}); 269 my $is_mime = exists $Header{'Mime-Version'}; 270 271 if ($had_lines && $lines != &header_lines) { 272 alter_header("Lines", $HD_STRIP); 273 header_append(header'format("Lines: $lines\n")); 274 } 275 276 # For filtering, use the *decoded* body! 277 $Header{'Lines'} = ${$Header{'=Body='}} =~ tr/\n/\n/; 278 $Header{'Length'} = length ${$Header{'=Body='}}; 279 280 if ($had_length) { 281 alter_header("Length", $HD_STRIP); 282 &add_log("NOTICE stripped non-RFC822 Length header") if $loglvl > 5; 283 } 284 285 if ($is_mime && exists $Header{'Content-Length'}) { 286 my $clen = $Header{'Content-Length'}; 287 if ($clen != $length) { 288 alter_header("Content-Length", $HD_STRIP); 289 header_append(header'format("Content-Length: $length\n")); 290 $Header{'Content-Length'} = $length; 291 &add_log("NOTICE adjusted Content-Length from $clen to $length") 292 if $loglvl > 5; 293 } 294 } 295 296 if (!$is_mime && exists $Header{'Content-Length'}) { 297 alter_header("Content-Length", $HD_STRIP); 298 delete $Header{'Content-Length'}; 299 &add_log("NOTICE stripped Content-Length header in non-MIME message") 300 if $loglvl > 5; 301 } 302} 303 304# Check whether the body we got back has received a transfer encoding. 305# If it has and we know about that transfer encoding, decode it. 306# We make sure the "=Body=" header key is a reference to the decoded body: 307# it is either a reference to $Header{'Body'} when we leave it as-is, or 308# a reference to a newly allocated scalar. 309sub body_check { 310 $Header{'=Body='} = \$Header{'Body'}; 311 my $encoding = lc($Header{'Content-Transfer-Encoding'}); 312 my %decode = map { $_ => 1 } qw(base64 quoted-printable); 313 unless (exists $Header{'Mime-Version'}) { 314 return unless length $encoding; 315 if ($decode{$encoding}) { 316 &add_log("WARNING ignoring $encoding body transfer encoding") 317 if $loglvl > 3; 318 } else { 319 alter_header("Content-Transfer-Encoding", $HD_STRIP); 320 delete $Header{'Content-Transfer-Encoding'}; 321 &add_log("NOTICE stripped $encoding encoding in non-MIME message") 322 if $loglvl > 6; 323 } 324 return; 325 } 326 my %enc = map { $_ => 1 } qw(7bit 8bit binary base64 quoted-printable); 327 $encoding =~ s/\s*;$//; # Strip (wrong) spurious trailing separator 328 if (length $encoding) { 329 &'add_log("WARNING unknown content transfer encoding \"$encoding\"") 330 if $'loglvl > 5 && !$enc{$encoding}; 331 } 332 return unless $decode{$encoding}; 333 my @data = split(/\r?\n/, $Header{'Body'}); 334 my $error; 335 my $output; 336 if ($encoding eq "base64") { 337 base64'reset(length $Header{'Body'}); 338 foreach my $d (@data) { 339 base64'decode($d); 340 } 341 $error = base64'error_msg(); 342 $output = base64'output(); 343 } elsif ($encoding eq "quoted-printable") { 344 qp'reset(length $Header{'Body'}); 345 foreach my $d (@data) { 346 qp'decode($d); 347 } 348 $error = qp'error_msg(); 349 $output = qp'output(); 350 } 351 if (length $error) { 352 &'add_log("WARNING could not decode $encoding body: $error") 353 if $'loglvl > 5; 354 } else { 355 if ($'loglvl > 9) { 356 my $len = length $$output; 357 &'add_log("decoded $encoding body into $len bytes"); 358 } 359 $Header{'=Body='} = $output; # Reference 360 } 361 &header_update_size; 362} 363 364# Force recoding of the body to a new encoding. 365# The $Header{'Body'} variable is supposed to hold the decoded version. 366sub body_recode_with { 367 my ($encoding) = @_; 368 $Header{'=Body='} = \$Header{'Body'}; # The decoded version! 369 my @data = split(/\r?\n/, $Header{'Body'}); 370 my $error; 371 my $output; 372 if ($encoding eq "base64") { 373 base64'reset(length($Header{'Body'}) * 4/3); 374 foreach my $d (@data) { 375 base64'encode($d); 376 } 377 $error = base64'error_msg(); 378 $output = base64'output(); 379 } elsif ($encoding eq "quoted-printable") { 380 qp'reset(length $Header{'Body'} * 1.1); 381 foreach my $d (@data) { 382 qp'encode($d); 383 } 384 $error = qp'error_msg(); 385 $output = qp'output(); 386 } 387 if (length $error) { 388 &'add_log("WARNING could not recode $encoding body: $error") 389 if $'loglvl > 5; 390 } else { 391 if ($'loglvl > 9) { 392 my $len = length $$output; 393 &'add_log("recoded $encoding body into $len bytes") if $'loglvl > 7; 394 } 395 delete $Header{'Body'}; # $Header{'=Body='} ref still points to it 396 $Header{'Body'} = $$output; # Transfer-Encoded version of the body 397 # The body changed, must update the "All" key... 398 $Header{'All'} = $Header{'Head'} . "\n" . $Header{'Body'}; 399 &header_update_size; 400 } 401} 402 403# When coming from a feeback routine such as PASS, we have a new body that 404# maybe we need to recode to match the original encoding... 405sub body_recode { 406 $Header{'=Body='} = \$Header{'Body'}; # The decoded version! 407 my $encoding = lc($Header{'Content-Transfer-Encoding'}); 408 return unless length $encoding; 409 unless (exists $Header{'Mime-Version'}) { 410 &add_log("WARNING not recoding body in $encoding: no MIME header") 411 if $loglvl > 3; 412 alter_header("Content-Transfer-Encoding", $HD_STRIP); 413 delete $Header{'Content-Transfer-Encoding'}; 414 return; 415 } 416 my %recode = map { $_ => 1 } qw(base64 quoted-printable); 417 return unless $recode{$encoding}; 418 body_recode_with($encoding); 419} 420 421# When coming back from a FEED, check whether the content transfer encoding 422# is suitable and replace it with the optimal one if not. 423# Upon entry, we expect =Body= to point to the decoded versions and headers 424# of the message to have been parsed in %Header (read: properly resync-ed). 425# Both the header and the body of the message are updated if the encoding 426# is changed. 427# Return TRUE if body was recoded (implying caller should RESYNC the headers). 428sub body_recode_optimally { 429 my $encoding = lc($Header{'Content-Transfer-Encoding'}) || "none"; 430 my $optimal = best_body_encoding($Header{'=Body='}); 431 my %encoded = map { $_ => 1 } qw(base64 quoted-printable); 432 my $recoded = 0; 433 if ($optimal ne $encoding) { 434 &add_log("converting body encoded with $encoding to optimal $optimal") 435 if $'loglvl > 7; 436 if ($encoded{$optimal}) { 437 $Header{'Body'} = ${$Header{'=Body='}}; 438 $Header{'=Body='} = \$Header{'Body'}; # The decoded version! 439 body_recode_with($optimal); 440 } 441 alter_header("Content-Transfer-Encoding", $HD_STRIP); 442 header_append(header'format("Content-Transfer-Encoding: $optimal\n")); 443 $recoded = 1; 444 } 445 return $recoded; 446} 447 448# Whenever we got a new set of headers in $Header{'Head'} we need to ensure 449# the new vision is consistent with the body encoding. If they strip the 450# Content-Transfer-Encoding header for instance, we have to use the old 451# decoded version we had instead of the original body. 452# If they add a Content-Transfer-Encoding header, we have to recode the body! 453sub header_check_body_encoding { 454 my $plain = \$Header{'Body'} == $Header{'=Body='}; # No encoding 455 if ($plain && $Header{'Head'} !~ /^Content-Transfer-Encoding:/mi) { 456 # No encoding and no header indicating a transfer encodig... 457 return; # Nothing to change 458 } 459 my %new; 460 header_parse($Header{'Head'}, \%new, 1); # Silently parse new headers 461 my $encoding = $Header{'Content-Transfer-Encoding'} || "none"; 462 my $new_encoding = lc($new{'Content-Transfer-Encoding'}) || "none"; 463 return if lc($encoding) eq $new_encoding; # No change occurred 464 465 &add_log( 466 "WARNING body transfer encoding changed from $encoding to $new_encoding" 467 ) if $loglvl > 3; 468 469 470 $Header{'Body'} = ${$Header{'=Body='}}; # Restore decoded version 471 my %encode = map { $_ => 1 } qw(base64 quoted-printable); 472 unless ($encode{$new_encoding}) { 473 $Header{'=Body='} = \$Header{'Body'}; 474 return; 475 } 476 body_recode_with($new_encoding); # Then re-encode it 477 478 # At some point a RESYNC will be needed, caller will decide when it is 479 # necessary to do it. 480} 481 482# Now do some sanity checks: 483# - if there is no From: header, fill it in with the first From 484# - if there is no To: but an Apparently-To:, copy it also as a To: 485# - if an Envelope field was defined in the header, override it (sorry) 486# - likewise for Relayed, which is the list of relaying hosts, first one first. 487# 488# We guarantee the following header entries (to select on in rules): 489# Envelope: the actual sender of the message, empty if cannot compute 490# From: the value of the From field 491# To: to whom the mail was sent 492# Lines: number of lines in the message (*decoded* version) 493# Length: number of bytes in the message body (*decoded* version) 494# Relayed: the list of relaying hosts deduced from Received: lines 495# Reply-To: the address we may use to reply 496# Sender: the value of the Sender field, same as From usually 497# 498# NB: When the $lines parameter is set, we parsed the whole message initially. 499# When it is undef, we're resyncing, possibly after an external messaging of 500# the message. 501sub header_check { 502 local($first_from, $lines) = @_; # First From line, number of lines 503 unless (defined $Header{'From'}) { 504 &add_log("WARNING no From: field, assuming $first_from") if $loglvl > 4; 505 $Header{'From'} = $first_from; 506 # Fake a From: header line unless prevented to do so. That way, when 507 # saving in an MH or MMDF folder (where the leading From is stripped), 508 # the user will still be able to identify the source of the message! 509 if ($first_from && $cf'fromfake !~ /^off/i) { 510 &add_log("NOTICE faking a From: header line") if $loglvl > 5; 511 &header_append("From: $first_from\n"); 512 } 513 } 514 515 # There is usually one Apparently-To line per address. Remove all new lines 516 # in the header line and replace them with ','. Likewise for To: and Cc:. 517 # although it is far less likely to occur. 518 foreach $field ('Apparently-To', 'To', 'Cc') { 519 $Header{$field} =~ s/\n/,/gm; # Remove new-lines 520 $Header{$field} =~ s/,$/\n/m; # Restore last new-line 521 } 522 523 # If no To: field, then maybe there is an Apparently-To: instead. If so, 524 # make them identical. Otherwise, assume the mail was directed to the user. 525 # 526 # This changes the way filtering is done, so it's not always a good idea 527 # to do it. Some people may want to explicitely check that there is no 528 # To: line, but if we fake one, they'll never know. So check for tofake, 529 # and if OFF, don't do anything. 530 unless ($cf'tofake =~ /^off/i) { 531 if (!$Header{'To'} && $Header{'Apparently-To'}) { 532 $Header{'To'} = $Header{'Apparently-To'}; 533 } 534 unless ($Header{'To'}) { 535 &add_log("WARNING no To: field, assuming $cf'user") if $loglvl > 4; 536 $Header{'To'} = $cf'user; 537 } 538 } 539 540 # Update length information 541 # No warning is emitted unless $lines was defined, indicating initial 542 # parsing of the message we get. 543 my $length = $Header{'Content-Length'}; 544 &header_update_size; # Update number of lines and length... 545 my $count = &header_lines; 546 &add_log("NOTICE adjusted number of lines from $lines to $count") 547 if $loglvl > 5 && 548 defined($lines) && defined($count) && $count != $lines; 549 $count = $Header{'Content-Length'}; 550 &add_log("NOTICE adjusted Content-Length from $length to $count") 551 if $loglvl > 5 && defined($lines) && $count != $length; 552 553 # If there is no Reply-To: line, then take the address in From, if any. 554 # Otherwise use the address found in the return-path 555 if (!$Header{'Reply-To'}) { 556 local($tmp) = (&parse_address($Header{'From'}))[0]; 557 $Header{'Reply-To'} = $tmp if $tmp ne ''; 558 $Header{'Reply-To'} = (&parse_address($Header{'Return-Path'}))[0] 559 if $tmp eq ''; 560 } 561 562 # Unless there is already a sender line, fake one using From field 563 if (!$Header{'Sender'}) { 564 $Header{'Sender'} = $first_from; 565 $Header{'Sender'} = $Header{'From'} unless $first_from; 566 } 567 568 # Now override any Envelope header and grab it from the first From field 569 # If such a field was defined in the message header, then sorry but it 570 # was a mistake: RFC 822 doesn't define it, so it should have been 571 # an X-Envelope instead. 572 573 $Header{'Envelope'} = $first_from; 574 575 # Finally, compute the list of relaying hosts. The first host which saw 576 # this message comes first, the last one (normally the machine receiving 577 # the mail) coming last. 578 579 unless ($Header{'Relayed'} = &relay_list) { 580 &add_log("NOTICE no valid Received: indication") if $loglvl > 6; 581 } 582} 583 584# Compute the relaying hosts by looking at the Received: lines and parsing 585# them to deduce which host saw and relayed the message. We parse things 586# like this: 587# 588# Received: from host1 (host2 [xx.yy.zz.tt]) by host3 589# Received: from host1 ([xx.yy.zz.tt]) by host3 590# Received: from ?host1? ([xx.yy.zz.tt]) by host3 591# Received: from host1 by host3 592# Received: from (host2 [xx.yy.zz.tt]) by host3 593# Received: from (host1) [xx.yy.zz.tt] by host3 594# Received: from host1 [xx.yy.zz.tt] by host3 595# Received: from host2 [xx.yy.zz.tt] (host1) by host3 596# Received: from (user@host1) by host3 597# 598# The host2, when present, is the reverse DNS mapping of the IP address. 599# It can be different from host1 in case of local /etc/host aliasing for 600# instance. This is used when present, otherwise we must trust host1. 601# The host3 information is never used here. It is possible for host1 to 602# be a simple IP address [xx.yy.zz.tt]. 603# 604# The latest Received: line inserted in the header is the one added by 605# the host receiving the message. For local messages, it may be the 606# only line present. It is the only line for which host3 is used, since 607# it is probable we can trust our local delivery mailer. 608# 609# The returned comma-separated list is sorted to have the first relaying 610# host come first (whilst Received headers are normally prepended, which 611# yields a reverse host chain). 612sub relay_list { 613 local(@received) = split(/\n/, $Header{'Received'}); 614 return '' unless @received; 615 local(@hosts); # List of relaying hosts 616 local($host, $real); 617 local($islast) = 1; # First line we see is the "last" inserted 618 local($received); # Received line, verbatim 619 local($i); 620 local($_); 621 622 # All the known top-level domains as of 2006-08-15 623 # with the addition of "loc", "localdomain" and "private". 624 # See http://data.iana.org/TLD/tlds-alpha-by-domain.txt 625 my $tlds_re = qr/ 626 a(?:ero|rpa|[c-gil-oq-uwxz])| 627 b(?:iz|[abd-jmnorstvwyz])| 628 c(?:at|o(?:m|op)|[acdf-ik-oruvxyz])| 629 d[ejkmoz]| 630 e(?:du|[cegr-u])| 631 f[ijkmor]| 632 g(?:ov|[abd-ilmnp-uwy])| 633 h[kmnrtu]| 634 i(?:n(?:fo|t)|[del-oq-t])| 635 j(?:obs|[emop])| 636 k[eghimnrwyz]| 637 l(?:[abcikr-vy]|o(?:c|caldomain))| 638 m(?:il|obi|useum|[acdghk-z])| 639 n(?:ame|et|[acefgilopruz])| 640 o(?:m|rg)| 641 p(?:r(?:ivate|o)|[ae-hk-nrstwy])| 642 qa| 643 r[eouw]| 644 s[a-eg-ortuvyz]| 645 t(?:ravel|[cdfghj-prtvwz])| 646 u[agkmsyz]| 647 v[aceginu]| 648 w[fs]| 649 y[etu]| 650 z[amw] 651 /ix; 652 653 for ($i = 0; $i < @received; $i++) { 654 $received = $_ = $received[$i]; 655 656 # Handle first Received line (the last one added) specially. 657 if ($islast) { 658 if ( 659 /\bby\s+(\[\d+\.\d+\.\d+\.\d+\])/i || 660 /\bby\s+([\w-.]+)/i 661 ) { 662 $host = $1; 663 $host .= ".$cf::domain" 664 if $host =~ /^\w/ && $host !~ /\.$tlds_re$/; 665 push(@hosts, $host); 666 } else { 667 &add_log("WARNING no by in first Received: line '$received'") 668 if $loglvl > 4; 669 } 670 $islast = 0; 671 } 672 673 next unless s/^\s*from\s+//i; 674 next if s/^by\s+//i; # Host name missing 675 676 # Look for host1, which must be there somehow since we found a 'from' 677 # Some sendmails like to add a leading 'login@' before the address, 678 # so strip that out before being fancy... 679 # The only case host1 was seen to be missing was when it is replaced 680 # by an (host2 [ip]) specification instead. 681 682 s/^\w+\@//; 683 # [xx.yy.zz.tt] 684 if (s/^(\[\d+\.\d+\.\d+\.\d+\])\s*//) { 685 $host = $1; # IP address [xx.yy.zz.tt] 686 } 687 # ?xx.yy.zz.tt? ( [XX.YY.ZZ.TT]) 688 elsif (s/^\?[\d\.]+\?\s*\(\s*(\[\d+\.\d+\.\d+\.\d+\])\s*\)\s*//) { 689 $host = $1; 690 } 691 # foo.domain.com (optional) 692 elsif (s/^([\w-.]+)(\(\S+\))?\s*//) { 693 $host = $1; # host name 694 } 695 # (user@foo.domain.com) 696 elsif (s/^\(\w+\@([\w-.]+)\)\s*//) { 697 $host = $1; # host name 698 } 699 # (foo.domain.com) [xx.yy.zz.tt] 700 # foo.domain.com [xx.yy.zz.tt] 701 elsif (s/^\(?([\w-.]+)\)?\s*\[\d+\.\d+\.\d+\.\d+\]\s*//) { 702 $host = $1; # host name 703 } 704 # Unrecognized, but starting with a parenthesis, hinting for host2... 705 elsif (m/^\(/) { 706 $host = undef; # host1 missing, but host2 should be there 707 } else { 708 &add_log("WARNING invalid from in Received: line '$received'") 709 if $loglvl > 4; 710 next; 711 } 712 713 # There may be an IP or reverse DNS mapping, which will be used to 714 # supersede the current $host if found. Note that some (local) mailers 715 # insert host as login@host, so we remove the login part. 716 # Also handle things like (really foo.com) or (actually real.host), i.e 717 # allow an adjective to qualify the real host name. 718 # 719 # Note: we don't anchor the match at the beginning of the string 720 # since we want to parse the 'user@255.190.143.3' as in: 721 # from foo.net (HELO master.foo.org) (user@255.190.143.3) by bar.net 722 # and it may not come first... Later on, we'll remove all remaining 723 # leading unrecognized () information. 724 # 725 # The cryptic regexps below attempt to recognize things like: 726 # (user@foo.domain.com [xx.yy.zz.tt]) 727 # (WORD user@foo.domain.com [xx.yy.zz.tt]) 728 729 $real = ''; 730 $real = $1 eq '' ? $2 : $1 if 731 s/\(([\w-.@]*)?\s*(\[\d+\.\d+\.\d+\.\d+\])?\)\s*// || 732 s/\(\w+\s+([\w-.@]*)?\s*(\[\d+\.\d+\.\d+\.\d+\])?\)\s*//; 733 $real =~ s/^.*\@//; 734 $real = '' if $real =~ /^[\d.]+$/; # A sendmail version number! 735 736 # Supersede the host name computed in the previous parsing only 737 # if the "real" host name we attempted to guess is an IP address 738 # or looks like a fully qualified domain name. 739 740 $host = $real if $real =~ /\.$tlds_re$/ || $real =~ /^\[[\d.]+\]$/; 741 742 if ($host eq '') { 743 &add_log("NOTICE no relaying origin in Received: line '$received'") 744 if $loglvl > 6; 745 next; 746 } 747 748 # If we have not recognized anything above, then we don't want to 749 # handle anything between () that may follow the original host name. 750 # There are just too many formats out there and we can't definitively 751 # parse them all. There may even be multiple such occurrences like: 752 # from foo.net (HELO master.foo.org) (user@255.190.143.3) by bar.net 753 # Just skip them. 754 755 s/^\([^)]*\)\s+//g; 756 757 # At this point, we should have a 'by ' string somewhere, or an EOS. 758 # We're not checking the 'by' immediately (as in /^by/) because some 759 # mailers like inserting comments such as 'with ESMTP' or 'via xyzt'. 760 # Also, I have seen stange things like 'from xxx from xxx by yyy'. 761 # 762 # Otherwise we have an unknown Received line format. 763 # This is not as bad as not being able to deduce host1 or host2. 764 # The full line is logged, so that we may improve our fuzzy matching 765 # policy. 766 # 767 # Note: the lack of 'by' is only allowed for the first Received line 768 # stacked, i.e. the last one we parse here... 769 770 unless (/\s*by\s+/i || /^\s*$/ || $i == $#received) { 771 &add_log("weird Received: line '$received'") if $loglvl > 8; 772 } 773 774 # Validate the host. It must be either an internet [xx.yy.zz.tt] form, 775 # or a domain name. This also skips things like 'localhost'. We 776 # also accept pure xx.yy.zz.tt (i.e. without surrounding brackets) 777 778 unless ( 779 $host =~ /^\[[\d.]+\]$/ || 780 $host =~ /^[\w-.]+\.$tlds_re$/ || 781 $host =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ 782 ) { 783 next if $host =~ /^[\w-]+$/; # No message for unqualified hosts 784 &add_log("ignoring bad host $host in Received: line '$received'") 785 if $loglvl > 6; 786 next; 787 } 788 789 push(@hosts, $host); 790 } 791 792 # Remove duplicate consecutive hosts in the list, since this is probably 793 # an internal relaying (where we don't have real names but only aliases, 794 # otherwise the message would have looped forever!) and does not bring 795 # us much. 796 797 local($last, $dup); 798 local(@unique) = grep(($dup = $last ne $_, $last = $_, $dup), @hosts); 799 800 return join(', ', reverse @unique); 801} 802 803# Append given field to the header structure, updating the whole mail 804# text at the same time, hence keeping the %Header table. 805# The argument must be a valid formatted RFC-822 mail header field. 806sub header_append { 807 local($hline) = @_; 808 $Header{'Head'} .= $hline; 809 $Header{'All'} = $Header{'Head'} . "\n" . $Header{'Body'}; 810} 811 812# Prepend given field to the whole mail, updating %Header fields accordingly. 813sub header_prepend { 814 local($hline) = @_; 815 $Header{'Head'} = $hline . $Header{'Head'}; 816 $Header{'All'} = $hline . $Header{'All'}; 817} 818 819# Scan the supplied scalar reference (containing a mail body without any 820# content transfer encoding) and determine what is the proper encoding 821# for that body: "7bit", "quoted-printable" or "base64". 822sub best_body_encoding { 823 my ($body) = @_; 824 my $size = 0; 825 my $largest_line = 0; 826 my $qp_escaped = 0; 827 my $non_7bit = 0; 828 829 foreach my $l (split(/\r?\n/, $$body)) { 830 my $len = length($l); 831 $size += $len; 832 $largest_line = $len if $largest_line < $len; 833 $non_7bit += $l =~ tr/[\x80-\xff]/[\x80-\xff]/; 834 $non_7bit += $l =~ tr/[\x0]/[\x0]/; # NUL never allowed in "7bit" 835 $l =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])//g; 836 $qp_escaped = $len - length($l); 837 } 838 839 return "7bit" if $largest_line <= 998 && $non_7bit == 0; 840 841 my $size_qp = $size + 2 * $qp_escaped; 842 my $size_base64 = $size * 4 / 3; 843 844 return "base64" if $size_base64 <= $size_qp; 845 return "quoted-printable" if $qp_escaped * 8 < $size; # Less than 1/8th 846 return "base64"; 847} 848 849