1package Sisimai::RFC5322;
2use feature ':5.10';
3use strict;
4use warnings;
5use constant HEADERTABLE => {
6    'messageid' => ['message-id'],
7    'subject'   => ['subject'],
8    'listid'    => ['list-id'],
9    'date'      => [qw|date posted-date posted resent-date|],
10    'addresser' => [qw|from return-path reply-to errors-to reverse-path x-postfix-sender envelope-from x-envelope-from|],
11    'recipient' => [qw|to delivered-to forward-path envelope-to x-envelope-to resent-to apparently-to|],
12};
13
14# Regular expression of valid RFC-5322 email address(<addr-spec>)
15my $Re = { 'rfc5322' => undef, 'ignored' => undef, 'domain' => undef, };
16BUILD_REGULAR_EXPRESSIONS: {
17    # See http://www.ietf.org/rfc/rfc5322.txt
18    #  or http://www.ex-parrot.com/pdw/Mail-RFC822-Address.html ...
19    #   addr-spec       = local-part "@" domain
20    #   local-part      = dot-atom / quoted-string / obs-local-part
21    #   domain          = dot-atom / domain-literal / obs-domain
22    #   domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
23    #   dcontent        = dtext / quoted-pair
24    #   dtext           = NO-WS-CTL /     ; Non white space controls
25    #                     %d33-90 /       ; The rest of the US-ASCII
26    #                     %d94-126        ;  characters not including "[",
27    #                                     ;  "]", or "\"
28    my $atom           = qr;[a-zA-Z0-9_!#\$\%&'*+/=?\^`{}~|\-]+;o;
29    my $quoted_string  = qr/"(?:\\[^\r\n]|[^\\"])*"/o;
30    my $domain_literal = qr/\[(?:\\[\x01-\x09\x0B-\x0c\x0e-\x7f]|[\x21-\x5a\x5e-\x7e])*\]/o;
31    my $dot_atom       = qr/$atom(?:[.]$atom)*/o;
32    my $local_part     = qr/(?:$dot_atom|$quoted_string)/o;
33    my $domain         = qr/(?:$dot_atom|$domain_literal)/o;
34
35    $Re->{'rfc5322'} = qr/\A$local_part[@]$domain\z/o;
36    $Re->{'ignored'} = qr/\A$local_part[.]*[@]$domain\z/o;
37    $Re->{'domain'}  = qr/\A$domain\z/o;
38}
39
40my $HEADERINDEX = {};
41BUILD_FLATTEN_RFC822HEADER_LIST: {
42    # Convert $HEADER: hash reference to flatten hash reference for being
43    # called from Sisimai::Lhost::*
44    for my $v ( values %{ HEADERTABLE() } ) {
45        $HEADERINDEX->{ $_ } = 1 for @$v;
46    }
47}
48
49sub HEADERFIELDS {
50    # Grouped RFC822 headers
51    # @param    [String] group  RFC822 Header group name
52    # @return   [Array,Hash]    RFC822 Header list
53    my $class = shift;
54    my $group = shift || return $HEADERINDEX;
55    return HEADERTABLE->{ $group } if exists HEADERTABLE->{ $group };
56    return HEADERTABLE;
57}
58
59sub LONGFIELDS {
60    # Fields that might be long
61    # @return   [Hash] Long filed(email header) list
62    return { 'to' => 1, 'from' => 1, 'subject' => 1, 'message-id' => 1 };
63}
64
65sub is_emailaddress {
66    # Check that the argument is an email address or not
67    # @param    [String] email  Email address string
68    # @return   [Integer]       0: Not email address
69    #                           1: Email address
70    my $class = shift;
71    my $email = shift // return 0;
72
73    return 0 if $email =~ /(?:[\x00-\x1f]|\x1f)/;
74    return 0 if length $email > 254;
75    return 1 if $email =~ $Re->{'ignored'};
76    return 0;
77}
78
79sub is_mailerdaemon {
80    # Check that the argument is mailer-daemon or not
81    # @param    [String] email  Email address
82    # @return   [Integer]       0: Not mailer-daemon
83    #                           1: Mailer-daemon
84    my $class = shift;
85    my $email = shift // return 0;
86    state $match = qr{(?>
87         (?:mailer-daemon|postmaster)[@]
88        |[<(](?:mailer-daemon|postmaster)[)>]
89        |\A(?:mailer-daemon|postmaster)\z
90        |[ ]?mailer-daemon[ ]
91        )
92    }x;
93    return 1 if lc($email) =~ $match;
94    return 0;
95}
96
97sub received {
98    # Convert Received headers to a structured data
99    # @param    [String] argv1  Received header
100    # @return   [Array]         Received header as a structured data
101    my $class = shift;
102    my $argv1 = shift || return [];
103    my $hosts = [];
104    my $value = { 'from' => '', 'by'   => '' };
105
106    # Received: (qmail 10000 invoked by uid 999); 24 Apr 2013 00:00:00 +0900
107    return [] if $argv1 =~ /qmail[ \t]+.+invoked[ \t]+/;
108
109    if( $argv1 =~ /\Afrom[ \t]+(.+)[ \t]+by[ \t]+([^ ]+)/ ) {
110        # Received: from localhost (localhost)
111        #   by nijo.example.jp (V8/cf) id s1QB5ma0018057;
112        #   Wed, 26 Feb 2014 06:05:48 -0500
113        $value->{'from'} = $1;
114        $value->{'by'}   = $2;
115
116    } elsif( $argv1 =~ /\bby[ \t]+([^ ]+)(.+)/ ) {
117        # Received: by 10.70.22.98 with SMTP id c2mr1838265pdf.3; Fri, 18 Jul 2014
118        #   00:31:02 -0700 (PDT)
119        $value->{'from'} = $1.$2;
120        $value->{'by'}   = $1;
121    }
122
123    if( $value->{'from'} =~ / / ) {
124        # Received: from [10.22.22.222] (smtp-gateway.kyoto.ocn.ne.jp [192.0.2.222])
125        #   (authenticated bits=0)
126        #   by nijo.example.jp (V8/cf) with ESMTP id s1QB5ka0018055;
127        #   Wed, 26 Feb 2014 06:05:47 -0500
128        my @received = split(' ', $value->{'from'});
129        my @namelist;
130        my @addrlist;
131        my $hostname = '';
132        my $hostaddr = '';
133
134        for my $e ( @received ) {
135            # Received: from [10.22.22.222] (smtp-gateway.kyoto.ocn.ne.jp [192.0.2.222])
136            if( $e =~ /\A[(\[]\d+[.]\d+[.]\d+[.]\d+[)\]]\z/ ) {
137                # [192.0.2.1] or (192.0.2.1)
138                $e =~ y/[]()//d;
139                push @addrlist, $e;
140
141            } else {
142                # hostname
143                $e =~ y/()//d;
144                push @namelist, $e;
145            }
146        }
147
148        for my $e ( @namelist ) {
149            # 1. Hostname takes priority over all other IP addresses
150            next unless rindex($e, '.') > -1;
151            $hostname = $e;
152            last;
153        }
154
155        unless( $hostname ) {
156            # 2. Use IP address as a remote host name
157            for my $e ( @addrlist ) {
158                # Skip if the address is a private address
159                next if index($e, '10.') == 0;
160                next if index($e, '127.') == 0;
161                next if index($e, '192.168.') == 0;
162                next if $e =~ /\A172[.](?:1[6-9]|2[0-9]|3[0-1])[.]/;
163                $hostaddr = $e;
164                last;
165            }
166        }
167        $value->{'from'} = $hostname || $hostaddr || $addrlist[-1];
168    }
169
170    for my $e ('from', 'by') {
171        # Copy entries into $hosts
172        next unless defined $value->{ $e };
173        $value->{ $e } =~ y/()[];?//d;
174        push @$hosts, $value->{ $e };
175    }
176    return $hosts;
177}
178
179sub fillet {
180    # Split given entire message body into error message lines and the original
181    # message part only include email headers
182    # @param    [String] mbody  Entire message body
183    # @param    [Regexp] regex  Regular expression of the message/rfc822 or the
184    #                           beginning of the original message part
185    # @return   [Array]         [Error message lines, The original message]
186    # @since    v4.25.5
187    my $class = shift;
188    my $mbody = shift || return undef;
189    my $regex = shift || return undef;
190
191    my ($a, $b) = split($regex, $$mbody, 2); $b ||= '';
192    if( length $b ) {
193        # Remove blank lines, the message body of the original message, and
194        # append "\n" at the end of the original message headers
195        # 1. Remove leading blank lines
196        # 2. Remove text after the first blank line: \n\n
197        # 3. Append "\n" at the end of test block when the last character is not "\n"
198        $b =~ s/\A[\r\n\s]+//m;
199        substr($b, index($b, "\n\n") + 1, length($b), '') if index($b, "\n\n") > 0;
200        $b .= "\n" unless $b =~ /\n\z/;
201    }
202    return [$a, $b];
203}
204
2051;
206__END__
207
208=encoding utf-8
209
210=head1 NAME
211
212Sisimai::RFC5322 - Email address related utilities
213
214=head1 SYNOPSIS
215
216    use Sisimai::RFC5322;
217
218    print Sisimai::RFC5322->is_emailaddress('neko@example.jp');    # 1
219    print Sisimai::RFC5322->is_domainpart('example.jp');           # 1
220    print Sisimai::RFC5322->is_mailerdaemon('neko@example.jp');    # 0
221
222=head1 DESCRIPTION
223
224Sisimai::RFC5322 provide methods for checking email address.
225
226=head1 CLASS METHODS
227
228=head2 C<B<is_emailaddress(I<email address>)>>
229
230C<is_emailaddress()> checks the argument is valid email address or not.
231
232    print Sisimai::RFC5322->is_emailaddress('neko@example.jp');  # 1
233    print Sisimai::RFC5322->is_emailaddress('neko%example.jp');  # 0
234
235    my $addr_with_name = [
236        'Stray cat <neko@example.jp',
237        '=?UTF-8?B?55m954yr?= <shironeko@example.co.jp>',
238    ];
239    for my $e ( @$addr_with_name ) {
240        print Sisimai::RFC5322->is_emailaddress($e); # 1
241    }
242
243=head2 C<B<is_domainpart(I<Domain>)>>
244
245C<is_domainpart()> checks the argument is valid domain part of an email address
246or not.
247
248    print Sisimai::RFC5322->is_domainpart('neko@example.jp');  # 0
249    print Sisimai::RFC5322->is_domainpart('neko.example.jp');  # 1
250
251=head2 C<B<is_domainpart(I<Domain>)>>
252
253C<is_mailerdaemon()> checks the argument is mailer-daemon or not.
254
255    print Sisimai::RFC5322->is_mailerdaemon('neko@example.jp');          # 0
256    print Sisimai::RFC5322->is_mailerdaemon('mailer-daemon@example.jp'); # 1
257
258=head2 C<B<received(I<String>)>>
259
260C<received()> returns array reference which include host names in the Received
261header.
262
263    my $v = 'from mx.example.org (c1.example.net [192.0.2.1]) by mx.example.jp';
264    my $r = Sisimai::RFC5322->received($v);
265
266    warn Dumper $r;
267    $VAR1 = [
268        'mx.example.org',
269        'mx.example.jp'
270    ];
271
272=head2 C<B<fillet(I<String>, I<RegExp>)>>
273
274C<fillet()> returns array reference which include error message lines of given
275message body and the original message part split by the 2nd argument.
276
277    my $v = 'Error message here
278    Content-Type: message/rfc822
279    Return-Path: <neko@libsisimai.org>';
280    my $r = Sisimai::RFC5322->fillet(\$v, qr|^Content-Type:[ ]message/rfc822|m);
281
282    warn Dumper $r;
283    $VAR1 = [
284        'Error message here',
285        'Return-Path: <neko@libsisimai.org>';
286    ];
287
288=head1 AUTHOR
289
290azumakuniyuki
291
292=head1 COPYRIGHT
293
294Copyright (C) 2014-2021 azumakuniyuki, All rights reserved.
295
296=head1 LICENSE
297
298This software is distributed under The BSD 2-Clause License.
299
300=cut
301