1# Copyright (c) 2015-2018 by Pali <pali@cpan.org> 2 3package Email::Address::XS; 4 5use 5.006; 6use strict; 7use warnings; 8 9our $VERSION = '1.04'; 10 11use Carp; 12 13use base 'Exporter'; 14our @EXPORT_OK = qw(parse_email_addresses parse_email_groups format_email_addresses format_email_groups compose_address split_address); 15 16use XSLoader; 17XSLoader::load(__PACKAGE__, $VERSION); 18 19=head1 NAME 20 21Email::Address::XS - Parse and format RFC 5322 email addresses and groups 22 23=head1 SYNOPSIS 24 25 use Email::Address::XS; 26 27 my $winstons_address = Email::Address::XS->new(phrase => 'Winston Smith', user => 'winston.smith', host => 'recdep.minitrue', comment => 'Records Department'); 28 print $winstons_address->address(); 29 # winston.smith@recdep.minitrue 30 31 my $julias_address = Email::Address::XS->new('Julia', 'julia@ficdep.minitrue'); 32 print $julias_address->format(); 33 # Julia <julia@ficdep.minitrue> 34 35 my $users_address = Email::Address::XS->parse('user <user@oceania>'); 36 print $users_address->host(); 37 # oceania 38 39 my $goldsteins_address = Email::Address::XS->parse_bare_address('goldstein@brotherhood.oceania'); 40 print $goldsteins_address->user(); 41 # goldstein 42 43 my @addresses = Email::Address::XS->parse('"Winston Smith" <winston.smith@recdep.minitrue> (Records Department), Julia <julia@ficdep.minitrue>'); 44 # ($winstons_address, $julias_address) 45 46 47 use Email::Address::XS qw(format_email_addresses format_email_groups parse_email_addresses parse_email_groups); 48 49 my $addresses_string = format_email_addresses($winstons_address, $julias_address, $users_address); 50 # "Winston Smith" <winston.smith@recdep.minitrue> (Records Department), Julia <julia@ficdep.minitrue>, user <user@oceania> 51 52 my @addresses = map { $_->address() } parse_email_addresses($addresses_string); 53 # ('winston.smith@recdep.minitrue', 'julia@ficdep.minitrue', 'user@oceania') 54 55 my $groups_string = format_email_groups('Brotherhood' => [ $winstons_address, $julias_address ], undef() => [ $users_address ]); 56 # Brotherhood: "Winston Smith" <winston.smith@recdep.minitrue> (Records Department), Julia <julia@ficdep.minitrue>;, user <user@oceania> 57 58 my @groups = parse_email_groups($groups_string); 59 # ('Brotherhood' => [ $winstons_address, $julias_address ], undef() => [ $users_address ]) 60 61 62 use Email::Address::XS qw(compose_address split_address); 63 64 my ($user, $host) = split_address('julia(outer party)@ficdep.minitrue'); 65 # ('julia', 'ficdep.minitrue') 66 67 my $string = compose_address('charrington"@"shop', 'thought.police.oceania'); 68 # "charrington\"@\"shop"@thought.police.oceania 69 70=head1 DESCRIPTION 71 72This module implements L<RFC 5322|https://tools.ietf.org/html/rfc5322> 73parser and formatter of email addresses and groups. It parses an input 74string from email headers which contain a list of email addresses or 75a groups of email addresses (like From, To, Cc, Bcc, Reply-To, Sender, 76...). Also it can generate a string value for those headers from a 77list of email addresses objects. Module is backward compatible with 78L<RFC 2822|https://tools.ietf.org/html/rfc2822> and 79L<RFC 822|https://tools.ietf.org/html/rfc822>. 80 81Parser and formatter functionality is implemented in XS and uses 82shared code from Dovecot IMAP server. 83 84It is a drop-in replacement for L<the Email::Address module|Email::Address> 85which has several security issues. E.g. issue L<CVE-2015-7686 (Algorithmic complexity vulnerability)|https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-7686>, 86which allows remote attackers to cause denial of service, is still 87present in L<Email::Address|Email::Address> version 1.908. 88 89Email::Address::XS module was created to finally fix CVE-2015-7686. 90 91Existing applications that use Email::Address module could be easily 92switched to Email::Address::XS module. In most cases only changing 93C<use Email::Address> to C<use Email::Address::XS> and replacing every 94C<Email::Address> occurrence with C<Email::Address::XS> is sufficient. 95 96So unlike L<Email::Address|Email::Address>, this module does not use 97regular expressions for parsing but instead native XS implementation 98parses input string sequentially according to RFC 5322 grammar. 99 100Additionally it has support also for named groups and so can be use 101instead of L<the Email::Address::List module|Email::Address::List>. 102 103If you are looking for the module which provides object representation 104for the list of email addresses suitable for the MIME email headers, 105see L<Email::MIME::Header::AddressList|Email::MIME::Header::AddressList>. 106 107=head2 EXPORT 108 109None by default. Exportable functions are: 110L<C<parse_email_addresses>|/parse_email_addresses>, 111L<C<parse_email_groups>|/parse_email_groups>, 112L<C<format_email_addresses>|/format_email_addresses>, 113L<C<format_email_groups>|/format_email_groups>, 114L<C<compose_address>|/compose_address>, 115L<C<split_address>|/split_address>. 116 117=head2 Exportable Functions 118 119=over 4 120 121=item format_email_addresses 122 123 use Email::Address::XS qw(format_email_addresses); 124 125 my $winstons_address = Email::Address::XS->new(phrase => 'Winston Smith', address => 'winston@recdep.minitrue'); 126 my $julias_address = Email::Address::XS->new(phrase => 'Julia', address => 'julia@ficdep.minitrue'); 127 my @addresses = ($winstons_address, $julias_address); 128 my $string = format_email_addresses(@addresses); 129 print $string; 130 # "Winston Smith" <winston@recdep.minitrue>, Julia <julia@ficdep.minitrue> 131 132Takes a list of email address objects and returns one formatted string 133of those email addresses. 134 135=cut 136 137sub format_email_addresses { 138 my (@args) = @_; 139 return format_email_groups(undef, \@args); 140} 141 142=item format_email_groups 143 144 use Email::Address::XS qw(format_email_groups); 145 146 my $winstons_address = Email::Address::XS->new(phrase => 'Winston Smith', user => 'winston.smith', host => 'recdep.minitrue'); 147 my $julias_address = Email::Address::XS->new('Julia', 'julia@ficdep.minitrue'); 148 my $users_address = Email::Address::XS->new(address => 'user@oceania'); 149 150 my $groups_string = format_email_groups('Brotherhood' => [ $winstons_address, $julias_address ], undef() => [ $users_address ]); 151 print $groups_string; 152 # Brotherhood: "Winston Smith" <winston.smith@recdep.minitrue>, Julia <julia@ficdep.minitrue>;, user@oceania 153 154 my $undisclosed_string = format_email_groups('undisclosed-recipients' => []); 155 print $undisclosed_string; 156 # undisclosed-recipients:; 157 158Like L<C<format_email_addresses>|/format_email_addresses> but this 159method takes pairs which consist of a group display name and a 160reference to address list. If a group is not undef then address 161list is formatted inside named group. 162 163=item parse_email_addresses 164 165 use Email::Address::XS qw(parse_email_addresses); 166 167 my $string = '"Winston Smith" <winston.smith@recdep.minitrue>, Julia <julia@ficdep.minitrue>, user@oceania'; 168 my @addresses = parse_email_addresses($string); 169 # @addresses now contains three Email::Address::XS objects, one for each address 170 171Parses an input string and returns a list of Email::Address::XS 172objects. Optional second string argument specifies class name for 173blessing new objects. 174 175=cut 176 177sub parse_email_addresses { 178 my (@args) = @_; 179 my $t = 1; 180 return map { @{$_} } grep { $t ^= 1 } parse_email_groups(@args); 181} 182 183=item parse_email_groups 184 185 use Email::Address::XS qw(parse_email_groups); 186 187 my $string = 'Brotherhood: "Winston Smith" <winston.smith@recdep.minitrue>, Julia <julia@ficdep.minitrue>;, user@oceania, undisclosed-recipients:;'; 188 my @groups = parse_email_groups($string); 189 # @groups now contains list ('Brotherhood' => [ $winstons_object, $julias_object ], undef() => [ $users_object ], 'undisclosed-recipients' => []) 190 191Like L<C<parse_email_addresses>|/parse_email_addresses> but this 192function returns a list of pairs: a group display name and a 193reference to a list of addresses which belongs to that named group. 194An undef value for a group means that a following list of addresses 195is not inside any named group. An output is in a same format as a 196input for the function L<C<format_email_groups>|/format_email_groups>. 197This function preserves order of groups and does not do any 198de-duplication or merging. 199 200=item compose_address 201 202 use Email::Address::XS qw(compose_address); 203 my $string_address = compose_address($user, $host); 204 205Takes an unescaped user part and unescaped host part of an address 206and returns escaped address. 207 208Available since version 1.01. 209 210=item split_address 211 212 use Email::Address::XS qw(split_address); 213 my ($user, $host) = split_address($string_address); 214 215Takes an escaped address and split it into pair of unescaped user 216part and unescaped host part of address. If splitting input address 217into these two parts is not possible then this function returns 218pair of undefs. 219 220Available since version 1.01. 221 222=back 223 224=head2 Class Methods 225 226=over 4 227 228=item new 229 230 my $empty_address = Email::Address::XS->new(); 231 my $winstons_address = Email::Address::XS->new(phrase => 'Winston Smith', user => 'winston.smith', host => 'recdep.minitrue', comment => 'Records Department'); 232 my $julias_address = Email::Address::XS->new('Julia', 'julia@ficdep.minitrue'); 233 my $users_address = Email::Address::XS->new(address => 'user@oceania'); 234 my $only_name = Email::Address::XS->new(phrase => 'Name'); 235 my $copy_of_winstons_address = Email::Address::XS->new(copy => $winstons_address); 236 237Constructs and returns a new C<Email::Address::XS> object. Takes named 238list of arguments: phrase, address, user, host, comment and copy. 239An argument address takes precedence over user and host. 240 241When an argument copy is specified then it is expected an 242Email::Address::XS object and a cloned copy of that object is 243returned. All other parameters are ignored. 244 245Old syntax L<from the Email::Address module|Email::Address/new> is 246supported too. Takes one to four positional arguments: phrase, address 247comment, and original string. Passing an argument original is 248deprecated, ignored and throws a warning. 249 250=cut 251 252sub new { 253 my ($class, @args) = @_; 254 255 my %hash_keys = (phrase => 1, address => 1, user => 1, host => 1, comment => 1, copy => 1); 256 my $is_hash; 257 if ( scalar @args == 2 and defined $args[0] ) { 258 $is_hash = 1 if exists $hash_keys{$args[0]}; 259 } elsif ( scalar @args == 4 and defined $args[0] and defined $args[2] ) { 260 $is_hash = 1 if exists $hash_keys{$args[0]} and exists $hash_keys{$args[2]}; 261 } elsif ( scalar @args > 4 ) { 262 $is_hash = 1; 263 } 264 265 my %args; 266 if ( $is_hash ) { 267 %args = @args; 268 } else { 269 carp 'Argument original is deprecated and ignored' if scalar @args > 3; 270 $args{comment} = $args[2] if scalar @args > 2; 271 $args{address} = $args[1] if scalar @args > 1; 272 $args{phrase} = $args[0] if scalar @args > 0; 273 } 274 275 my $invalid; 276 my $original; 277 if ( exists $args{copy} ) { 278 if ( $class->is_obj($args{copy}) ) { 279 $args{phrase} = $args{copy}->phrase(); 280 $args{comment} = $args{copy}->comment(); 281 $args{user} = $args{copy}->user(); 282 $args{host} = $args{copy}->host(); 283 $invalid = $args{copy}->{invalid}; 284 $original = $args{copy}->{original}; 285 delete $args{address}; 286 } else { 287 carp 'Named argument copy does not contain a valid object'; 288 } 289 } 290 291 my $self = bless {}, $class; 292 293 $self->phrase($args{phrase}); 294 $self->comment($args{comment}); 295 296 if ( exists $args{address} ) { 297 $self->address($args{address}); 298 } else { 299 $self->user($args{user}); 300 $self->host($args{host}); 301 } 302 303 $self->{invalid} = 1 if $invalid; 304 $self->{original} = $original; 305 306 return $self; 307} 308 309=item parse 310 311 my $winstons_address = Email::Address::XS->parse('"Winston Smith" <winston.smith@recdep.minitrue> (Records Department)'); 312 my @users_addresses = Email::Address::XS->parse('user1@oceania, user2@oceania'); 313 314Parses an input string and returns a list of an Email::Address::XS 315objects. Same as the function L<C<parse_email_addresses>|/parse_email_addresses> 316but this one is class method. 317 318In scalar context this function returns just first parsed object. 319If more then one object was parsed then L<C<is_valid>|/is_valid> 320method on returned object returns false. If no object was parsed 321then empty Email::Address::XS object is returned. 322 323Prior to version 1.01 return value in scalar context is undef when 324no object was parsed. 325 326=cut 327 328sub parse { 329 my ($class, $string) = @_; 330 my @addresses = parse_email_addresses($string, $class); 331 return @addresses if wantarray; 332 my $self = @addresses ? $addresses[0] : Email::Address::XS->new(); 333 $self->{invalid} = 1 if scalar @addresses != 1; 334 $self->{original} = $string unless defined $self->{original}; 335 return $self; 336} 337 338=item parse_bare_address 339 340 my $winstons_address = Email::Address::XS->parse_bare_address('winston.smith@recdep.minitrue'); 341 342Parses an input string as one bare email address (addr spec) which 343does not allow phrase part or angle brackets around email address and 344returns an Email::Address::XS object. It is just a wrapper around 345L<C<address>|/address> method. Method L<C<is_valid>|/is_valid> can be 346used to check if parsing was successful. 347 348Available since version 1.01. 349 350=cut 351 352sub parse_bare_address { 353 my ($class, $string) = @_; 354 my $self = $class->new(); 355 if ( defined $string ) { 356 $self->address($string); 357 $self->{original} = $string; 358 } else { 359 carp 'Use of uninitialized value for string'; 360 } 361 return $self; 362} 363 364=back 365 366=head2 Object Methods 367 368=over 4 369 370=item format 371 372 my $string = $address->format(); 373 374Returns formatted Email::Address::XS object as a string. This method 375throws a warning when L<C<user>|/user> or L<C<host>|/host> part of 376the email address is invalid or empty string. 377 378=cut 379 380sub format { 381 my ($self) = @_; 382 return format_email_addresses($self); 383} 384 385=item is_valid 386 387 my $is_valid = $address->is_valid(); 388 389Returns true if the parse function or method which created this 390Email::Address::XS object had not received any syntax error on input 391string and also that L<C<user>|/user> and L<C<host>|/host> part of 392the email address are not empty strings. 393 394Thus this function can be used for checking if Email::Address::XS 395object is valid before calling L<C<format>|/format> method on it. 396 397Available since version 1.01. 398 399=cut 400 401sub is_valid { 402 my ($self) = @_; 403 my $user = $self->user(); 404 my $host = $self->host(); 405 return (defined $user and defined $host and length $host and not $self->{invalid}); 406} 407 408=item phrase 409 410 my $phrase = $address->phrase(); 411 $address->phrase('Winston Smith'); 412 413Accessor and mutator for the phrase (display name). 414 415=cut 416 417sub phrase { 418 my ($self, @args) = @_; 419 return $self->{phrase} unless @args; 420 delete $self->{invalid} if exists $self->{invalid}; 421 return $self->{phrase} = $args[0]; 422} 423 424=item user 425 426 my $user = $address->user(); 427 $address->user('winston.smith'); 428 429Accessor and mutator for the unescaped user (local/mailbox) part of 430an address. 431 432=cut 433 434sub user { 435 my ($self, @args) = @_; 436 return $self->{user} unless @args; 437 delete $self->{cached_address} if exists $self->{cached_address}; 438 delete $self->{invalid} if exists $self->{invalid}; 439 return $self->{user} = $args[0]; 440} 441 442=item host 443 444 my $host = $address->host(); 445 $address->host('recdep.minitrue'); 446 447Accessor and mutator for the unescaped host (domain) part of an address. 448 449Since version 1.03 this method checks if setting a new value is syntactically 450valid. If not undef is set and returned. 451 452=cut 453 454sub host { 455 my ($self, @args) = @_; 456 return $self->{host} unless @args; 457 delete $self->{cached_address} if exists $self->{cached_address}; 458 delete $self->{invalid} if exists $self->{invalid}; 459 if (defined $args[0] and $args[0] =~ /^(?:\[.*\]|[^\x00-\x20\x7F()<>\[\]:;@\\,"]+)$/) { 460 return $self->{host} = $args[0]; 461 } else { 462 return $self->{host} = undef; 463 } 464} 465 466=item address 467 468 my $string_address = $address->address(); 469 $address->address('winston.smith@recdep.minitrue'); 470 471Accessor and mutator for the escaped address (addr spec). 472 473Internally this module stores a user and a host part of an address 474separately. Function L<C<compose_address>|/compose_address> is used 475for composing full address and function L<C<split_address>|/split_address> 476for splitting into a user and a host parts. If splitting new address 477into these two parts is not possible then this method returns undef 478and sets both parts to undef. 479 480=cut 481 482sub address { 483 my ($self, @args) = @_; 484 my $user; 485 my $host; 486 if ( @args ) { 487 delete $self->{invalid} if exists $self->{invalid}; 488 ($user, $host) = split_address($args[0]) if defined $args[0]; 489 if ( not defined $user or not defined $host ) { 490 $user = undef; 491 $host = undef; 492 } 493 $self->{user} = $user; 494 $self->{host} = $host; 495 } else { 496 return $self->{cached_address} if exists $self->{cached_address}; 497 $user = $self->user(); 498 $host = $self->host(); 499 } 500 if ( defined $user and defined $host and length $host ) { 501 return $self->{cached_address} = compose_address($user, $host); 502 } else { 503 return $self->{cached_address} = undef; 504 } 505} 506 507=item comment 508 509 my $comment = $address->comment(); 510 $address->comment('Records Department'); 511 512Accessor and mutator for the comment which is formatted after an 513address. A comment can contain another nested comments in round 514brackets. When setting new comment this method check if brackets are 515balanced. If not undef is set and returned. 516 517=cut 518 519sub comment { 520 my ($self, @args) = @_; 521 return $self->{comment} unless @args; 522 delete $self->{invalid} if exists $self->{invalid}; 523 return $self->{comment} = undef unless defined $args[0]; 524 my $count = 0; 525 my $cleaned = $args[0]; 526 $cleaned =~ s/(?:\\.|[^\(\)\x00])//g; 527 foreach ( split //, $cleaned ) { 528 $count++ if $_ eq '('; 529 $count-- if $_ eq ')'; 530 $count = -1 if $_ eq "\x00"; 531 last if $count < 0; 532 } 533 return $self->{comment} = undef if $count != 0; 534 return $self->{comment} = $args[0]; 535} 536 537=item name 538 539 my $name = $address->name(); 540 541This method tries to return a name which belongs to the address. It 542returns either L<C<phrase>|/phrase> or L<C<comment>|/comment> or 543L<C<user>|/user> part of the address or empty string (first defined 544value in this order). But it never returns undef. 545 546=cut 547 548sub name { 549 my ($self) = @_; 550 my $phrase = $self->phrase(); 551 return $phrase if defined $phrase and length $phrase; 552 my $comment = $self->comment(); 553 return $comment if defined $comment and length $comment; 554 my $user = $self->user(); 555 return $user if defined $user; 556 return ''; 557} 558 559=item as_string 560 561 my $address = Email::Address::XS->new(phrase => 'Winston Smith', address => 'winston.smith@recdep.minitrue'); 562 my $stringified = $address->as_string(); 563 564This method is used for object L<stringification|/stringify>. It 565returns string representation of object. By default object is 566stringified to L<C<format>|/format>. 567 568Available since version 1.01. 569 570=cut 571 572our $STRINGIFY; # deprecated 573 574sub as_string { 575 my ($self) = @_; 576 return $self->format() unless defined $STRINGIFY; 577 carp 'Variable $Email::Address::XS::STRINGIFY is deprecated; subclass instead'; 578 my $method = $self->can($STRINGIFY); 579 croak 'Stringify method ' . $STRINGIFY . ' does not exist' unless defined $method; 580 return $method->($self); 581} 582 583=item original 584 585 my $address = Email::Address::XS->parse('(Winston) "Smith" <winston.smith@recdep.minitrue> (Minitrue)'); 586 my $original = $address->original(); 587 # (Winston) "Smith" <winston.smith@recdep.minitrue> (Minitrue) 588 my $format = $address->format(); 589 # Smith <winston.smith@recdep.minitrue> (Minitrue) 590 591This method returns original part of the string which was used for 592parsing current Email::Address::XS object. If object was not created 593by parsing input string, then this method returns undef. 594 595Note that L<C<format>|/format> method does not have to return same 596original string. 597 598Available since version 1.01. 599 600=cut 601 602sub original { 603 my ($self) = @_; 604 return $self->{original}; 605} 606 607=back 608 609=head2 Overloaded Operators 610 611=over 4 612 613=item stringify 614 615 my $address = Email::Address::XS->new(phrase => 'Winston Smith', address => 'winston.smith@recdep.minitrue'); 616 print "Winston's address is $address."; 617 # Winston's address is "Winston Smith" <winston.smith@recdep.minitrue>. 618 619Stringification is done by method L<C<as_string>|/as_string>. 620 621=cut 622 623use overload '""' => \&as_string; 624 625=back 626 627=head2 Deprecated Functions and Variables 628 629For compatibility with L<the Email::Address module|Email::Address> 630there are defined some deprecated functions and variables. 631Do not use them in new code. Their usage throws warnings. 632 633Altering deprecated variable C<$Email::Address::XS::STRINGIFY> changes 634method which is called for objects stringification. 635 636Deprecated cache functions C<purge_cache>, C<disable_cache> and 637C<enable_cache> are noop and do nothing. 638 639=cut 640 641sub purge_cache { 642 carp 'Function purge_cache is deprecated and does nothing'; 643} 644 645sub disable_cache { 646 carp 'Function disable_cache is deprecated and does nothing'; 647} 648 649sub enable_cache { 650 carp 'Function enable_cache is deprecated and does nothing'; 651} 652 653=head1 SEE ALSO 654 655L<RFC 822|https://tools.ietf.org/html/rfc822>, 656L<RFC 2822|https://tools.ietf.org/html/rfc2822>, 657L<RFC 5322|https://tools.ietf.org/html/rfc5322>, 658L<Email::MIME::Header::AddressList>, 659L<Email::Address>, 660L<Email::Address::List>, 661L<Email::AddressParser> 662 663=head1 AUTHOR 664 665Pali E<lt>pali@cpan.orgE<gt> 666 667=head1 COPYRIGHT AND LICENSE 668 669Copyright (C) 2015-2018 by Pali E<lt>pali@cpan.orgE<gt> 670 671This library is free software; you can redistribute it and/or modify 672it under the same terms as Perl itself, either Perl version 5.6.0 or, 673at your option, any later version of Perl 5 you may have available. 674 675Dovecot parser is licensed under The MIT License and copyrighted by 676Dovecot authors. 677 678=cut 679 6801; 681