1# Copyright (c) 2015-2018 by Pali <pali@cpan.org>
2
3package Email::Address::XS;
4
5use 5.006;
6use strict;
7use warnings;
8
9our $VERSION = '1.04';
10
11use Carp;
12
13use base 'Exporter';
14our @EXPORT_OK = qw(parse_email_addresses parse_email_groups format_email_addresses format_email_groups compose_address split_address);
15
16use XSLoader;
17XSLoader::load(__PACKAGE__, $VERSION);
18
19=head1 NAME
20
21Email::Address::XS - Parse and format RFC 5322 email addresses and groups
22
23=head1 SYNOPSIS
24
25  use Email::Address::XS;
26
27  my $winstons_address = Email::Address::XS->new(phrase => 'Winston Smith', user => 'winston.smith', host => 'recdep.minitrue', comment => 'Records Department');
28  print $winstons_address->address();
29  # winston.smith@recdep.minitrue
30
31  my $julias_address = Email::Address::XS->new('Julia', 'julia@ficdep.minitrue');
32  print $julias_address->format();
33  # Julia <julia@ficdep.minitrue>
34
35  my $users_address = Email::Address::XS->parse('user <user@oceania>');
36  print $users_address->host();
37  # oceania
38
39  my $goldsteins_address = Email::Address::XS->parse_bare_address('goldstein@brotherhood.oceania');
40  print $goldsteins_address->user();
41  # goldstein
42
43  my @addresses = Email::Address::XS->parse('"Winston Smith" <winston.smith@recdep.minitrue> (Records Department), Julia <julia@ficdep.minitrue>');
44  # ($winstons_address, $julias_address)
45
46
47  use Email::Address::XS qw(format_email_addresses format_email_groups parse_email_addresses parse_email_groups);
48
49  my $addresses_string = format_email_addresses($winstons_address, $julias_address, $users_address);
50  # "Winston Smith" <winston.smith@recdep.minitrue> (Records Department), Julia <julia@ficdep.minitrue>, user <user@oceania>
51
52  my @addresses = map { $_->address() } parse_email_addresses($addresses_string);
53  # ('winston.smith@recdep.minitrue', 'julia@ficdep.minitrue', 'user@oceania')
54
55  my $groups_string = format_email_groups('Brotherhood' => [ $winstons_address, $julias_address ], undef() => [ $users_address ]);
56  # Brotherhood: "Winston Smith" <winston.smith@recdep.minitrue> (Records Department), Julia <julia@ficdep.minitrue>;, user <user@oceania>
57
58  my @groups = parse_email_groups($groups_string);
59  # ('Brotherhood' => [ $winstons_address, $julias_address ], undef() => [ $users_address ])
60
61
62  use Email::Address::XS qw(compose_address split_address);
63
64  my ($user, $host) = split_address('julia(outer party)@ficdep.minitrue');
65  # ('julia', 'ficdep.minitrue')
66
67  my $string = compose_address('charrington"@"shop', 'thought.police.oceania');
68  # "charrington\"@\"shop"@thought.police.oceania
69
70=head1 DESCRIPTION
71
72This module implements L<RFC 5322|https://tools.ietf.org/html/rfc5322>
73parser and formatter of email addresses and groups. It parses an input
74string from email headers which contain a list of email addresses or
75a groups of email addresses (like From, To, Cc, Bcc, Reply-To, Sender,
76...). Also it can generate a string value for those headers from a
77list of email addresses objects. Module is backward compatible with
78L<RFC 2822|https://tools.ietf.org/html/rfc2822> and
79L<RFC 822|https://tools.ietf.org/html/rfc822>.
80
81Parser and formatter functionality is implemented in XS and uses
82shared code from Dovecot IMAP server.
83
84It is a drop-in replacement for L<the Email::Address module|Email::Address>
85which has several security issues. E.g. issue L<CVE-2015-7686 (Algorithmic complexity vulnerability)|https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-7686>,
86which allows remote attackers to cause denial of service, is still
87present in L<Email::Address|Email::Address> version 1.908.
88
89Email::Address::XS module was created to finally fix CVE-2015-7686.
90
91Existing applications that use Email::Address module could be easily
92switched to Email::Address::XS module. In most cases only changing
93C<use Email::Address> to C<use Email::Address::XS> and replacing every
94C<Email::Address> occurrence with C<Email::Address::XS> is sufficient.
95
96So unlike L<Email::Address|Email::Address>, this module does not use
97regular expressions for parsing but instead native XS implementation
98parses input string sequentially according to RFC 5322 grammar.
99
100Additionally it has support also for named groups and so can be use
101instead of L<the Email::Address::List module|Email::Address::List>.
102
103If you are looking for the module which provides object representation
104for the list of email addresses suitable for the MIME email headers,
105see L<Email::MIME::Header::AddressList|Email::MIME::Header::AddressList>.
106
107=head2 EXPORT
108
109None by default. Exportable functions are:
110L<C<parse_email_addresses>|/parse_email_addresses>,
111L<C<parse_email_groups>|/parse_email_groups>,
112L<C<format_email_addresses>|/format_email_addresses>,
113L<C<format_email_groups>|/format_email_groups>,
114L<C<compose_address>|/compose_address>,
115L<C<split_address>|/split_address>.
116
117=head2 Exportable Functions
118
119=over 4
120
121=item format_email_addresses
122
123  use Email::Address::XS qw(format_email_addresses);
124
125  my $winstons_address = Email::Address::XS->new(phrase => 'Winston Smith', address => 'winston@recdep.minitrue');
126  my $julias_address = Email::Address::XS->new(phrase => 'Julia', address => 'julia@ficdep.minitrue');
127  my @addresses = ($winstons_address, $julias_address);
128  my $string = format_email_addresses(@addresses);
129  print $string;
130  # "Winston Smith" <winston@recdep.minitrue>, Julia <julia@ficdep.minitrue>
131
132Takes a list of email address objects and returns one formatted string
133of those email addresses.
134
135=cut
136
137sub format_email_addresses {
138	my (@args) = @_;
139	return format_email_groups(undef, \@args);
140}
141
142=item format_email_groups
143
144  use Email::Address::XS qw(format_email_groups);
145
146  my $winstons_address = Email::Address::XS->new(phrase => 'Winston Smith', user => 'winston.smith', host => 'recdep.minitrue');
147  my $julias_address = Email::Address::XS->new('Julia', 'julia@ficdep.minitrue');
148  my $users_address = Email::Address::XS->new(address => 'user@oceania');
149
150  my $groups_string = format_email_groups('Brotherhood' => [ $winstons_address, $julias_address ], undef() => [ $users_address ]);
151  print $groups_string;
152  # Brotherhood: "Winston Smith" <winston.smith@recdep.minitrue>, Julia <julia@ficdep.minitrue>;, user@oceania
153
154  my $undisclosed_string = format_email_groups('undisclosed-recipients' => []);
155  print $undisclosed_string;
156  # undisclosed-recipients:;
157
158Like L<C<format_email_addresses>|/format_email_addresses> but this
159method takes pairs which consist of a group display name and a
160reference to address list. If a group is not undef then address
161list is formatted inside named group.
162
163=item parse_email_addresses
164
165  use Email::Address::XS qw(parse_email_addresses);
166
167  my $string = '"Winston Smith" <winston.smith@recdep.minitrue>, Julia <julia@ficdep.minitrue>, user@oceania';
168  my @addresses = parse_email_addresses($string);
169  # @addresses now contains three Email::Address::XS objects, one for each address
170
171Parses an input string and returns a list of Email::Address::XS
172objects. Optional second string argument specifies class name for
173blessing new objects.
174
175=cut
176
177sub parse_email_addresses {
178	my (@args) = @_;
179	my $t = 1;
180	return map { @{$_} } grep { $t ^= 1 } parse_email_groups(@args);
181}
182
183=item parse_email_groups
184
185  use Email::Address::XS qw(parse_email_groups);
186
187  my $string = 'Brotherhood: "Winston Smith" <winston.smith@recdep.minitrue>, Julia <julia@ficdep.minitrue>;, user@oceania, undisclosed-recipients:;';
188  my @groups = parse_email_groups($string);
189  # @groups now contains list ('Brotherhood' => [ $winstons_object, $julias_object ], undef() => [ $users_object ], 'undisclosed-recipients' => [])
190
191Like L<C<parse_email_addresses>|/parse_email_addresses> but this
192function returns a list of pairs: a group display name and a
193reference to a list of addresses which belongs to that named group.
194An undef value for a group means that a following list of addresses
195is not inside any named group. An output is in a same format as a
196input for the function L<C<format_email_groups>|/format_email_groups>.
197This function preserves order of groups and does not do any
198de-duplication or merging.
199
200=item compose_address
201
202  use Email::Address::XS qw(compose_address);
203  my $string_address = compose_address($user, $host);
204
205Takes an unescaped user part and unescaped host part of an address
206and returns escaped address.
207
208Available since version 1.01.
209
210=item split_address
211
212  use Email::Address::XS qw(split_address);
213  my ($user, $host) = split_address($string_address);
214
215Takes an escaped address and split it into pair of unescaped user
216part and unescaped host part of address. If splitting input address
217into these two parts is not possible then this function returns
218pair of undefs.
219
220Available since version 1.01.
221
222=back
223
224=head2 Class Methods
225
226=over 4
227
228=item new
229
230  my $empty_address = Email::Address::XS->new();
231  my $winstons_address = Email::Address::XS->new(phrase => 'Winston Smith', user => 'winston.smith', host => 'recdep.minitrue', comment => 'Records Department');
232  my $julias_address = Email::Address::XS->new('Julia', 'julia@ficdep.minitrue');
233  my $users_address = Email::Address::XS->new(address => 'user@oceania');
234  my $only_name = Email::Address::XS->new(phrase => 'Name');
235  my $copy_of_winstons_address = Email::Address::XS->new(copy => $winstons_address);
236
237Constructs and returns a new C<Email::Address::XS> object. Takes named
238list of arguments: phrase, address, user, host, comment and copy.
239An argument address takes precedence over user and host.
240
241When an argument copy is specified then it is expected an
242Email::Address::XS object and a cloned copy of that object is
243returned. All other parameters are ignored.
244
245Old syntax L<from the Email::Address module|Email::Address/new> is
246supported too. Takes one to four positional arguments: phrase, address
247comment, and original string. Passing an argument original is
248deprecated, ignored and throws a warning.
249
250=cut
251
252sub new {
253	my ($class, @args) = @_;
254
255	my %hash_keys = (phrase => 1, address => 1, user => 1, host => 1, comment => 1, copy => 1);
256	my $is_hash;
257	if ( scalar @args == 2 and defined $args[0] ) {
258		$is_hash = 1 if exists $hash_keys{$args[0]};
259	} elsif ( scalar @args == 4 and defined $args[0] and defined $args[2] ) {
260		$is_hash = 1 if exists $hash_keys{$args[0]} and exists $hash_keys{$args[2]};
261	} elsif ( scalar @args > 4 ) {
262		$is_hash = 1;
263	}
264
265	my %args;
266	if ( $is_hash ) {
267		%args = @args;
268	} else {
269		carp 'Argument original is deprecated and ignored' if scalar @args > 3;
270		$args{comment} = $args[2] if scalar @args > 2;
271		$args{address} = $args[1] if scalar @args > 1;
272		$args{phrase} = $args[0] if scalar @args > 0;
273	}
274
275	my $invalid;
276	my $original;
277	if ( exists $args{copy} ) {
278		if ( $class->is_obj($args{copy}) ) {
279			$args{phrase} = $args{copy}->phrase();
280			$args{comment} = $args{copy}->comment();
281			$args{user} = $args{copy}->user();
282			$args{host} = $args{copy}->host();
283			$invalid = $args{copy}->{invalid};
284			$original = $args{copy}->{original};
285			delete $args{address};
286		} else {
287			carp 'Named argument copy does not contain a valid object';
288		}
289	}
290
291	my $self = bless {}, $class;
292
293	$self->phrase($args{phrase});
294	$self->comment($args{comment});
295
296	if ( exists $args{address} ) {
297		$self->address($args{address});
298	} else {
299		$self->user($args{user});
300		$self->host($args{host});
301	}
302
303	$self->{invalid} = 1 if $invalid;
304	$self->{original} = $original;
305
306	return $self;
307}
308
309=item parse
310
311  my $winstons_address = Email::Address::XS->parse('"Winston Smith" <winston.smith@recdep.minitrue> (Records Department)');
312  my @users_addresses = Email::Address::XS->parse('user1@oceania, user2@oceania');
313
314Parses an input string and returns a list of an Email::Address::XS
315objects. Same as the function L<C<parse_email_addresses>|/parse_email_addresses>
316but this one is class method.
317
318In scalar context this function returns just first parsed object.
319If more then one object was parsed then L<C<is_valid>|/is_valid>
320method on returned object returns false. If no object was parsed
321then empty Email::Address::XS object is returned.
322
323Prior to version 1.01 return value in scalar context is undef when
324no object was parsed.
325
326=cut
327
328sub parse {
329	my ($class, $string) = @_;
330	my @addresses = parse_email_addresses($string, $class);
331	return @addresses if wantarray;
332	my $self = @addresses ? $addresses[0] : Email::Address::XS->new();
333	$self->{invalid} = 1 if scalar @addresses != 1;
334	$self->{original} = $string unless defined $self->{original};
335	return $self;
336}
337
338=item parse_bare_address
339
340  my $winstons_address = Email::Address::XS->parse_bare_address('winston.smith@recdep.minitrue');
341
342Parses an input string as one bare email address (addr spec) which
343does not allow phrase part or angle brackets around email address and
344returns an Email::Address::XS object. It is just a wrapper around
345L<C<address>|/address> method. Method L<C<is_valid>|/is_valid> can be
346used to check if parsing was successful.
347
348Available since version 1.01.
349
350=cut
351
352sub parse_bare_address {
353	my ($class, $string) = @_;
354	my $self = $class->new();
355	if ( defined $string ) {
356		$self->address($string);
357		$self->{original} = $string;
358	} else {
359		carp 'Use of uninitialized value for string';
360	}
361	return $self;
362}
363
364=back
365
366=head2 Object Methods
367
368=over 4
369
370=item format
371
372  my $string = $address->format();
373
374Returns formatted Email::Address::XS object as a string. This method
375throws a warning when L<C<user>|/user> or L<C<host>|/host> part of
376the email address is invalid or empty string.
377
378=cut
379
380sub format {
381	my ($self) = @_;
382	return format_email_addresses($self);
383}
384
385=item is_valid
386
387  my $is_valid = $address->is_valid();
388
389Returns true if the parse function or method which created this
390Email::Address::XS object had not received any syntax error on input
391string and also that L<C<user>|/user> and L<C<host>|/host> part of
392the email address are not empty strings.
393
394Thus this function can be used for checking if Email::Address::XS
395object is valid before calling L<C<format>|/format> method on it.
396
397Available since version 1.01.
398
399=cut
400
401sub is_valid {
402	my ($self) = @_;
403	my $user = $self->user();
404	my $host = $self->host();
405	return (defined $user and defined $host and length $host and not $self->{invalid});
406}
407
408=item phrase
409
410  my $phrase = $address->phrase();
411  $address->phrase('Winston Smith');
412
413Accessor and mutator for the phrase (display name).
414
415=cut
416
417sub phrase {
418	my ($self, @args) = @_;
419	return $self->{phrase} unless @args;
420	delete $self->{invalid} if exists $self->{invalid};
421	return $self->{phrase} = $args[0];
422}
423
424=item user
425
426  my $user = $address->user();
427  $address->user('winston.smith');
428
429Accessor and mutator for the unescaped user (local/mailbox) part of
430an address.
431
432=cut
433
434sub user {
435	my ($self, @args) = @_;
436	return $self->{user} unless @args;
437	delete $self->{cached_address} if exists $self->{cached_address};
438	delete $self->{invalid} if exists $self->{invalid};
439	return $self->{user} = $args[0];
440}
441
442=item host
443
444  my $host = $address->host();
445  $address->host('recdep.minitrue');
446
447Accessor and mutator for the unescaped host (domain) part of an address.
448
449Since version 1.03 this method checks if setting a new value is syntactically
450valid. If not undef is set and returned.
451
452=cut
453
454sub host {
455	my ($self, @args) = @_;
456	return $self->{host} unless @args;
457	delete $self->{cached_address} if exists $self->{cached_address};
458	delete $self->{invalid} if exists $self->{invalid};
459	if (defined $args[0] and $args[0] =~ /^(?:\[.*\]|[^\x00-\x20\x7F()<>\[\]:;@\\,"]+)$/) {
460		return $self->{host} = $args[0];
461	} else {
462		return $self->{host} = undef;
463	}
464}
465
466=item address
467
468  my $string_address = $address->address();
469  $address->address('winston.smith@recdep.minitrue');
470
471Accessor and mutator for the escaped address (addr spec).
472
473Internally this module stores a user and a host part of an address
474separately. Function L<C<compose_address>|/compose_address> is used
475for composing full address and function L<C<split_address>|/split_address>
476for splitting into a user and a host parts. If splitting new address
477into these two parts is not possible then this method returns undef
478and sets both parts to undef.
479
480=cut
481
482sub address {
483	my ($self, @args) = @_;
484	my $user;
485	my $host;
486	if ( @args ) {
487		delete $self->{invalid} if exists $self->{invalid};
488		($user, $host) = split_address($args[0]) if defined $args[0];
489		if ( not defined $user or not defined $host ) {
490			$user = undef;
491			$host = undef;
492		}
493		$self->{user} = $user;
494		$self->{host} = $host;
495	} else {
496		return $self->{cached_address} if exists $self->{cached_address};
497		$user = $self->user();
498		$host = $self->host();
499	}
500	if ( defined $user and defined $host and length $host ) {
501		return $self->{cached_address} = compose_address($user, $host);
502	} else {
503		return $self->{cached_address} = undef;
504	}
505}
506
507=item comment
508
509  my $comment = $address->comment();
510  $address->comment('Records Department');
511
512Accessor and mutator for the comment which is formatted after an
513address. A comment can contain another nested comments in round
514brackets. When setting new comment this method check if brackets are
515balanced. If not undef is set and returned.
516
517=cut
518
519sub comment {
520	my ($self, @args) = @_;
521	return $self->{comment} unless @args;
522	delete $self->{invalid} if exists $self->{invalid};
523	return $self->{comment} = undef unless defined $args[0];
524	my $count = 0;
525	my $cleaned = $args[0];
526	$cleaned =~ s/(?:\\.|[^\(\)\x00])//g;
527	foreach ( split //, $cleaned ) {
528		$count++ if $_ eq '(';
529		$count-- if $_ eq ')';
530		$count = -1 if $_ eq "\x00";
531		last if $count < 0;
532	}
533	return $self->{comment} = undef if $count != 0;
534	return $self->{comment} = $args[0];
535}
536
537=item name
538
539  my $name = $address->name();
540
541This method tries to return a name which belongs to the address. It
542returns either L<C<phrase>|/phrase> or L<C<comment>|/comment> or
543L<C<user>|/user> part of the address or empty string (first defined
544value in this order). But it never returns undef.
545
546=cut
547
548sub name {
549	my ($self) = @_;
550	my $phrase = $self->phrase();
551	return $phrase if defined $phrase and length $phrase;
552	my $comment = $self->comment();
553	return $comment if defined $comment and length $comment;
554	my $user = $self->user();
555	return $user if defined $user;
556	return '';
557}
558
559=item as_string
560
561  my $address = Email::Address::XS->new(phrase => 'Winston Smith', address => 'winston.smith@recdep.minitrue');
562  my $stringified = $address->as_string();
563
564This method is used for object L<stringification|/stringify>. It
565returns string representation of object. By default object is
566stringified to L<C<format>|/format>.
567
568Available since version 1.01.
569
570=cut
571
572our $STRINGIFY; # deprecated
573
574sub as_string {
575	my ($self) = @_;
576	return $self->format() unless defined $STRINGIFY;
577	carp 'Variable $Email::Address::XS::STRINGIFY is deprecated; subclass instead';
578	my $method = $self->can($STRINGIFY);
579	croak 'Stringify method ' . $STRINGIFY . ' does not exist' unless defined $method;
580	return $method->($self);
581}
582
583=item original
584
585  my $address = Email::Address::XS->parse('(Winston) "Smith"   <winston.smith@recdep.minitrue> (Minitrue)');
586  my $original = $address->original();
587  # (Winston) "Smith"   <winston.smith@recdep.minitrue> (Minitrue)
588  my $format = $address->format();
589  # Smith <winston.smith@recdep.minitrue> (Minitrue)
590
591This method returns original part of the string which was used for
592parsing current Email::Address::XS object. If object was not created
593by parsing input string, then this method returns undef.
594
595Note that L<C<format>|/format> method does not have to return same
596original string.
597
598Available since version 1.01.
599
600=cut
601
602sub original {
603	my ($self) = @_;
604	return $self->{original};
605}
606
607=back
608
609=head2 Overloaded Operators
610
611=over 4
612
613=item stringify
614
615  my $address = Email::Address::XS->new(phrase => 'Winston Smith', address => 'winston.smith@recdep.minitrue');
616  print "Winston's address is $address.";
617  # Winston's address is "Winston Smith" <winston.smith@recdep.minitrue>.
618
619Stringification is done by method L<C<as_string>|/as_string>.
620
621=cut
622
623use overload '""' => \&as_string;
624
625=back
626
627=head2 Deprecated Functions and Variables
628
629For compatibility with L<the Email::Address module|Email::Address>
630there are defined some deprecated functions and variables.
631Do not use them in new code. Their usage throws warnings.
632
633Altering deprecated variable C<$Email::Address::XS::STRINGIFY> changes
634method which is called for objects stringification.
635
636Deprecated cache functions C<purge_cache>, C<disable_cache> and
637C<enable_cache> are noop and do nothing.
638
639=cut
640
641sub purge_cache {
642	carp 'Function purge_cache is deprecated and does nothing';
643}
644
645sub disable_cache {
646	carp 'Function disable_cache is deprecated and does nothing';
647}
648
649sub enable_cache {
650	carp 'Function enable_cache is deprecated and does nothing';
651}
652
653=head1 SEE ALSO
654
655L<RFC 822|https://tools.ietf.org/html/rfc822>,
656L<RFC 2822|https://tools.ietf.org/html/rfc2822>,
657L<RFC 5322|https://tools.ietf.org/html/rfc5322>,
658L<Email::MIME::Header::AddressList>,
659L<Email::Address>,
660L<Email::Address::List>,
661L<Email::AddressParser>
662
663=head1 AUTHOR
664
665Pali E<lt>pali@cpan.orgE<gt>
666
667=head1 COPYRIGHT AND LICENSE
668
669Copyright (C) 2015-2018 by Pali E<lt>pali@cpan.orgE<gt>
670
671This library is free software; you can redistribute it and/or modify
672it under the same terms as Perl itself, either Perl version 5.6.0 or,
673at your option, any later version of Perl 5 you may have available.
674
675Dovecot parser is licensed under The MIT License and copyrighted by
676Dovecot authors.
677
678=cut
679
6801;
681