1package Sisimai::RFC5322; 2use feature ':5.10'; 3use strict; 4use warnings; 5use constant HEADERTABLE => { 6 'messageid' => ['message-id'], 7 'subject' => ['subject'], 8 'listid' => ['list-id'], 9 'date' => [qw|date posted-date posted resent-date|], 10 'addresser' => [qw|from return-path reply-to errors-to reverse-path x-postfix-sender envelope-from x-envelope-from|], 11 'recipient' => [qw|to delivered-to forward-path envelope-to x-envelope-to resent-to apparently-to|], 12}; 13 14# Regular expression of valid RFC-5322 email address(<addr-spec>) 15my $Re = { 'rfc5322' => undef, 'ignored' => undef, 'domain' => undef, }; 16BUILD_REGULAR_EXPRESSIONS: { 17 # See http://www.ietf.org/rfc/rfc5322.txt 18 # or http://www.ex-parrot.com/pdw/Mail-RFC822-Address.html ... 19 # addr-spec = local-part "@" domain 20 # local-part = dot-atom / quoted-string / obs-local-part 21 # domain = dot-atom / domain-literal / obs-domain 22 # domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] 23 # dcontent = dtext / quoted-pair 24 # dtext = NO-WS-CTL / ; Non white space controls 25 # %d33-90 / ; The rest of the US-ASCII 26 # %d94-126 ; characters not including "[", 27 # ; "]", or "\" 28 my $atom = qr;[a-zA-Z0-9_!#\$\%&'*+/=?\^`{}~|\-]+;o; 29 my $quoted_string = qr/"(?:\\[^\r\n]|[^\\"])*"/o; 30 my $domain_literal = qr/\[(?:\\[\x01-\x09\x0B-\x0c\x0e-\x7f]|[\x21-\x5a\x5e-\x7e])*\]/o; 31 my $dot_atom = qr/$atom(?:[.]$atom)*/o; 32 my $local_part = qr/(?:$dot_atom|$quoted_string)/o; 33 my $domain = qr/(?:$dot_atom|$domain_literal)/o; 34 35 $Re->{'rfc5322'} = qr/\A$local_part[@]$domain\z/o; 36 $Re->{'ignored'} = qr/\A$local_part[.]*[@]$domain\z/o; 37 $Re->{'domain'} = qr/\A$domain\z/o; 38} 39 40my $HEADERINDEX = {}; 41BUILD_FLATTEN_RFC822HEADER_LIST: { 42 # Convert $HEADER: hash reference to flatten hash reference for being 43 # called from Sisimai::Lhost::* 44 for my $v ( values %{ HEADERTABLE() } ) { 45 $HEADERINDEX->{ $_ } = 1 for @$v; 46 } 47} 48 49sub HEADERFIELDS { 50 # Grouped RFC822 headers 51 # @param [String] group RFC822 Header group name 52 # @return [Array,Hash] RFC822 Header list 53 my $class = shift; 54 my $group = shift || return $HEADERINDEX; 55 return HEADERTABLE->{ $group } if exists HEADERTABLE->{ $group }; 56 return HEADERTABLE; 57} 58 59sub LONGFIELDS { 60 # Fields that might be long 61 # @return [Hash] Long filed(email header) list 62 return { 'to' => 1, 'from' => 1, 'subject' => 1, 'message-id' => 1 }; 63} 64 65sub is_emailaddress { 66 # Check that the argument is an email address or not 67 # @param [String] email Email address string 68 # @return [Integer] 0: Not email address 69 # 1: Email address 70 my $class = shift; 71 my $email = shift // return 0; 72 73 return 0 if $email =~ /(?:[\x00-\x1f]|\x1f)/; 74 return 0 if length $email > 254; 75 return 1 if $email =~ $Re->{'ignored'}; 76 return 0; 77} 78 79sub is_mailerdaemon { 80 # Check that the argument is mailer-daemon or not 81 # @param [String] email Email address 82 # @return [Integer] 0: Not mailer-daemon 83 # 1: Mailer-daemon 84 my $class = shift; 85 my $email = shift // return 0; 86 state $match = qr{(?> 87 (?:mailer-daemon|postmaster)[@] 88 |[<(](?:mailer-daemon|postmaster)[)>] 89 |\A(?:mailer-daemon|postmaster)\z 90 |[ ]?mailer-daemon[ ] 91 ) 92 }x; 93 return 1 if lc($email) =~ $match; 94 return 0; 95} 96 97sub received { 98 # Convert Received headers to a structured data 99 # @param [String] argv1 Received header 100 # @return [Array] Received header as a structured data 101 my $class = shift; 102 my $argv1 = shift || return []; 103 my $hosts = []; 104 my $value = { 'from' => '', 'by' => '' }; 105 106 # Received: (qmail 10000 invoked by uid 999); 24 Apr 2013 00:00:00 +0900 107 return [] if $argv1 =~ /qmail[ \t]+.+invoked[ \t]+/; 108 109 if( $argv1 =~ /\Afrom[ \t]+(.+)[ \t]+by[ \t]+([^ ]+)/ ) { 110 # Received: from localhost (localhost) 111 # by nijo.example.jp (V8/cf) id s1QB5ma0018057; 112 # Wed, 26 Feb 2014 06:05:48 -0500 113 $value->{'from'} = $1; 114 $value->{'by'} = $2; 115 116 } elsif( $argv1 =~ /\bby[ \t]+([^ ]+)(.+)/ ) { 117 # Received: by 10.70.22.98 with SMTP id c2mr1838265pdf.3; Fri, 18 Jul 2014 118 # 00:31:02 -0700 (PDT) 119 $value->{'from'} = $1.$2; 120 $value->{'by'} = $1; 121 } 122 123 if( $value->{'from'} =~ / / ) { 124 # Received: from [10.22.22.222] (smtp-gateway.kyoto.ocn.ne.jp [192.0.2.222]) 125 # (authenticated bits=0) 126 # by nijo.example.jp (V8/cf) with ESMTP id s1QB5ka0018055; 127 # Wed, 26 Feb 2014 06:05:47 -0500 128 my @received = split(' ', $value->{'from'}); 129 my @namelist; 130 my @addrlist; 131 my $hostname = ''; 132 my $hostaddr = ''; 133 134 for my $e ( @received ) { 135 # Received: from [10.22.22.222] (smtp-gateway.kyoto.ocn.ne.jp [192.0.2.222]) 136 if( $e =~ /\A[(\[]\d+[.]\d+[.]\d+[.]\d+[)\]]\z/ ) { 137 # [192.0.2.1] or (192.0.2.1) 138 $e =~ y/[]()//d; 139 push @addrlist, $e; 140 141 } else { 142 # hostname 143 $e =~ y/()//d; 144 push @namelist, $e; 145 } 146 } 147 148 for my $e ( @namelist ) { 149 # 1. Hostname takes priority over all other IP addresses 150 next unless rindex($e, '.') > -1; 151 $hostname = $e; 152 last; 153 } 154 155 unless( $hostname ) { 156 # 2. Use IP address as a remote host name 157 for my $e ( @addrlist ) { 158 # Skip if the address is a private address 159 next if index($e, '10.') == 0; 160 next if index($e, '127.') == 0; 161 next if index($e, '192.168.') == 0; 162 next if $e =~ /\A172[.](?:1[6-9]|2[0-9]|3[0-1])[.]/; 163 $hostaddr = $e; 164 last; 165 } 166 } 167 $value->{'from'} = $hostname || $hostaddr || $addrlist[-1]; 168 } 169 170 for my $e ('from', 'by') { 171 # Copy entries into $hosts 172 next unless defined $value->{ $e }; 173 $value->{ $e } =~ y/()[];?//d; 174 push @$hosts, $value->{ $e }; 175 } 176 return $hosts; 177} 178 179sub fillet { 180 # Split given entire message body into error message lines and the original 181 # message part only include email headers 182 # @param [String] mbody Entire message body 183 # @param [Regexp] regex Regular expression of the message/rfc822 or the 184 # beginning of the original message part 185 # @return [Array] [Error message lines, The original message] 186 # @since v4.25.5 187 my $class = shift; 188 my $mbody = shift || return undef; 189 my $regex = shift || return undef; 190 191 my ($a, $b) = split($regex, $$mbody, 2); $b ||= ''; 192 if( length $b ) { 193 # Remove blank lines, the message body of the original message, and 194 # append "\n" at the end of the original message headers 195 # 1. Remove leading blank lines 196 # 2. Remove text after the first blank line: \n\n 197 # 3. Append "\n" at the end of test block when the last character is not "\n" 198 $b =~ s/\A[\r\n\s]+//m; 199 substr($b, index($b, "\n\n") + 1, length($b), '') if index($b, "\n\n") > 0; 200 $b .= "\n" unless $b =~ /\n\z/; 201 } 202 return [$a, $b]; 203} 204 2051; 206__END__ 207 208=encoding utf-8 209 210=head1 NAME 211 212Sisimai::RFC5322 - Email address related utilities 213 214=head1 SYNOPSIS 215 216 use Sisimai::RFC5322; 217 218 print Sisimai::RFC5322->is_emailaddress('neko@example.jp'); # 1 219 print Sisimai::RFC5322->is_domainpart('example.jp'); # 1 220 print Sisimai::RFC5322->is_mailerdaemon('neko@example.jp'); # 0 221 222=head1 DESCRIPTION 223 224Sisimai::RFC5322 provide methods for checking email address. 225 226=head1 CLASS METHODS 227 228=head2 C<B<is_emailaddress(I<email address>)>> 229 230C<is_emailaddress()> checks the argument is valid email address or not. 231 232 print Sisimai::RFC5322->is_emailaddress('neko@example.jp'); # 1 233 print Sisimai::RFC5322->is_emailaddress('neko%example.jp'); # 0 234 235 my $addr_with_name = [ 236 'Stray cat <neko@example.jp', 237 '=?UTF-8?B?55m954yr?= <shironeko@example.co.jp>', 238 ]; 239 for my $e ( @$addr_with_name ) { 240 print Sisimai::RFC5322->is_emailaddress($e); # 1 241 } 242 243=head2 C<B<is_domainpart(I<Domain>)>> 244 245C<is_domainpart()> checks the argument is valid domain part of an email address 246or not. 247 248 print Sisimai::RFC5322->is_domainpart('neko@example.jp'); # 0 249 print Sisimai::RFC5322->is_domainpart('neko.example.jp'); # 1 250 251=head2 C<B<is_domainpart(I<Domain>)>> 252 253C<is_mailerdaemon()> checks the argument is mailer-daemon or not. 254 255 print Sisimai::RFC5322->is_mailerdaemon('neko@example.jp'); # 0 256 print Sisimai::RFC5322->is_mailerdaemon('mailer-daemon@example.jp'); # 1 257 258=head2 C<B<received(I<String>)>> 259 260C<received()> returns array reference which include host names in the Received 261header. 262 263 my $v = 'from mx.example.org (c1.example.net [192.0.2.1]) by mx.example.jp'; 264 my $r = Sisimai::RFC5322->received($v); 265 266 warn Dumper $r; 267 $VAR1 = [ 268 'mx.example.org', 269 'mx.example.jp' 270 ]; 271 272=head2 C<B<fillet(I<String>, I<RegExp>)>> 273 274C<fillet()> returns array reference which include error message lines of given 275message body and the original message part split by the 2nd argument. 276 277 my $v = 'Error message here 278 Content-Type: message/rfc822 279 Return-Path: <neko@libsisimai.org>'; 280 my $r = Sisimai::RFC5322->fillet(\$v, qr|^Content-Type:[ ]message/rfc822|m); 281 282 warn Dumper $r; 283 $VAR1 = [ 284 'Error message here', 285 'Return-Path: <neko@libsisimai.org>'; 286 ]; 287 288=head1 AUTHOR 289 290azumakuniyuki 291 292=head1 COPYRIGHT 293 294Copyright (C) 2014-2021 azumakuniyuki, All rights reserved. 295 296=head1 LICENSE 297 298This software is distributed under The BSD 2-Clause License. 299 300=cut 301