1 // Copyright 2009 The Archiveopteryx Developers <info@aox.org>
2 
3 #include "addressfield.h"
4 
5 #include "ustring.h"
6 #include "codec.h"
7 
8 
9 /*! \class AddressField addressfield.h
10     Represents a field containing a list of addresses.
11 
12     This simple class encapsulates a List< Address > in a HeaderField.
13     It is responsible for parsing the field (with Address) and setting
14     the correct field value.
15 */
16 
17 
AddressField(HeaderField::Type t)18 AddressField::AddressField( HeaderField::Type t )
19     : HeaderField( t ),
20       a( new List< Address > )
21 {
22 }
23 
24 
25 /*! Constructs an AddressField of \a type, containing the single
26     Address \a address .
27 */
28 
AddressField(HeaderField::Type type,Address * address)29 AddressField::AddressField( HeaderField::Type type, Address * address )
30     : HeaderField( type ), a( new List<Address> )
31 {
32     a->append( address );
33 }
34 
35 
parse(const EString & s)36 void AddressField::parse( const EString &s )
37 {
38     switch ( type() ) {
39     case HeaderField::Sender:
40         parseMailbox( s );
41         if ( !valid() && addresses()->isEmpty() ) {
42             // sender is quite often wrong in otherwise perfectly
43             // legible messages. so we'll nix out the error. Header
44             // will probably remove the field completely, since an
45             // empty Sender field isn't sensible.
46             setError( "" );
47         }
48         break;
49 
50     case HeaderField::ReturnPath:
51         parseMailbox( s );
52         if ( !valid() || addresses()->count() != 1 ||
53              ( addresses()->first()->type() != Address::Bounce &&
54                addresses()->first()->type() != Address::Normal ) ) {
55             // return-path sometimes contains strange addresses when
56             // migrating from older stores. if it does, just kill
57             // it. this never happens when receiving mail, since we'll
58             // make a return-path of our own.
59             setError( "" );
60             a->clear();
61         }
62         break;
63 
64     case HeaderField::ResentSender:
65         parseMailbox( s );
66         break;
67 
68     case HeaderField::From:
69     case HeaderField::ResentFrom:
70         parseMailboxList( s );
71         break;
72 
73     case HeaderField::To:
74     case HeaderField::Cc:
75     case HeaderField::Bcc:
76     case HeaderField::ReplyTo:
77     case HeaderField::ResentTo:
78     case HeaderField::ResentCc:
79     case HeaderField::ResentBcc:
80         parseAddressList( s );
81         if ( type() == HeaderField::Cc && !valid() && a->count() <= 1 ) {
82             // /bin/mail tempts people to type escape, ctrl-d or
83             // similar into the cc field, so we try to recover from
84             // that.
85             uint i = 0;
86             while ( i < s.length() && s[i] >= ' ' && s[i] != 127 )
87                 i++;
88             if ( i < s.length() ) {
89                 setError( "" );
90                 a->clear();
91             }
92         }
93         if ( !valid() && s.simplified().length() == 1 ) {
94             setError( "" );
95             a->clear();
96         }
97         if ( valid() && s.contains( "<>" ) ) {
98             // some spammers attempt to send 'To: asdfsaf <>'.
99             List<Address>::Iterator i( a );
100             uint bounces = 0;
101             uint otherProblems = 0;
102             while ( i ) {
103                 if ( i->type() == Address::Bounce )
104                     bounces++;
105                 else if ( !i->error().isEmpty() )
106                     otherProblems++;
107                 ++i;
108             }
109             if ( bounces && !otherProblems ) {
110                 // there's one or more <>, but nothing else bad.
111                 i = a->first();
112                 while ( i ) {
113                     if ( i->type() == Address::Bounce )
114                         a->take( i );
115                     else
116                         ++i;
117                 }
118                 setError( "" );
119             }
120         }
121         if ( !valid() && a->isEmpty() && !s.contains( "@" ) ) {
122             // some spammers send total garbage. we can't detect all
123             // instances of garbage, but if it doesn't contain even
124             // one "@" and also not even one parsable address, surely
125             // it's garbage.
126             setError( "" );
127         }
128         if ( !valid() && a->count() <= 1 &&
129              ( s.startsWith( "@" ) || s.contains( "<@" ) ) ) {
130             // some spammers send To: @hostname. forget it.
131             a->clear();
132             setError( "" );
133         }
134         break;
135 
136     case HeaderField::ContentId:
137         parseContentId( s );
138         break;
139 
140     case HeaderField::MessageId:
141     case HeaderField::ResentMessageId:
142         parseMessageId( s );
143         break;
144 
145     case HeaderField::References:
146         parseReferences( s );
147         break;
148 
149     default:
150         // Should not happen.
151         break;
152     }
153 
154     if ( type() != HeaderField::ReturnPath )
155         outlawBounce();
156 }
157 
158 
159 /*! Generates the RFC 822 representation of the field, based on the
160     addresses(). If \a avoidUTf8 is true, rfc822() will be lossy
161     rather than include any UTF-8.
162 */
163 
rfc822(bool avoidUtf8) const164 EString AddressField::rfc822( bool avoidUtf8 ) const
165 {
166     EString s;
167     s.reserve( 30 * addresses()->count() );
168     HeaderField::Type t = type();
169     List< Address >::Iterator it( addresses() );
170 
171     if ( t == HeaderField::ReturnPath ) {
172         if ( !it )
173             ;
174         else if ( it->type() == Address::Bounce )
175             s = "<>";
176         else if ( it->type() == Address::Normal &&
177                   avoidUtf8 && it->needsUnicode() )
178             s = "<this-address@needs-unicode.invalid>";
179         else if ( it->type() == Address::Normal )
180             s = "<" + it->lpdomain() + ">";
181     }
182     else if ( t == HeaderField::MessageId ||
183               t == HeaderField::ResentMessageId ||
184               t == HeaderField::ContentId ||
185               ( t == HeaderField::References && !it ) )
186     {
187         if ( it ) {
188             s = "<" + it->toString( false ) + ">";
189         }
190         else {
191             s = name() + ": ";
192             s.append( value().ascii() );
193             s = s.simplified().wrapped( 78, "", " ", false );
194             uint p = name().length() + 1;
195             while ( p < s.length() &&
196                     ( s[p] == ' ' || s[p] == '\r' || s[p] == '\n' ) )
197                 p++;
198             s = s.mid( p );
199         }
200     }
201     else if ( t <= HeaderField::LastAddressField ||
202               t == HeaderField::References )
203     {
204         bool first = true;
205         EString wsep, lsep;
206         uint c = name().length() + 2;
207         uint lpos;
208 
209         if ( t == HeaderField::References ) {
210             wsep = " ";
211             lsep = "\r\n ";
212             lpos = 1;
213         }
214         else {
215             wsep = ", ";
216             lsep = ",\r\n    ";
217             lpos = 4;
218         }
219 
220         while ( it ) {
221             EString a = it->toString( avoidUtf8 );
222             ++it;
223 
224             if ( t == HeaderField::References )
225                 a = "<" + a + ">";
226 
227             if ( first ) {
228                 first = false;
229             }
230             else if ( ( c + wsep.length() + a.length() > 78 ) ||
231                       ( c + wsep.length() + a.length() == 78 && it ) )
232             {
233                 s.append( lsep );
234                 c = lpos;
235             }
236             else {
237                 s.append( wsep );
238                 c += wsep.length();
239             }
240             s.append( a );
241             c += a.length();
242         }
243     }
244 
245     return s;
246 }
247 
248 
value() const249 UString AddressField::value() const
250 {
251     if ( addresses()->isEmpty() )
252         return HeaderField::value();
253     // and for message-id, content-id and references:
254     AsciiCodec a;
255     return a.toUnicode( rfc822( true ).simplified() );
256 }
257 
258 
259 /*! Parses the RFC 2822 address-list production from \a s and records
260     the first problem found.
261 */
262 
parseAddressList(const EString & s)263 void AddressField::parseAddressList( const EString &s )
264 {
265     AddressParser ap( s );
266     setError( ap.error() );
267     a = ap.addresses();
268 }
269 
270 
271 /*! Parses the RFC 2822 mailbox-list production from \a s and records
272     the first problem found.
273 */
274 
parseMailboxList(const EString & s)275 void AddressField::parseMailboxList( const EString &s )
276 {
277     parseAddressList( s );
278 
279     // A mailbox-list is an address-list where groups aren't allowed.
280     List< Address >::Iterator it( a );
281     while ( it && valid() ) {
282         if ( it->type() == Address::EmptyGroup )
283             setError( "Invalid mailbox: " + it->toString( false ).quoted() );
284         ++it;
285     }
286 }
287 
288 
289 /*! Parses the RFC 2822 mailbox production from \a s and records the
290     first problem found.
291 */
292 
parseMailbox(const EString & s)293 void AddressField::parseMailbox( const EString &s )
294 {
295     parseMailboxList( s );
296 
297     // A mailbox in our world is just a mailbox-list with one entry.
298     if ( valid() && a->count() > 1 )
299         setError( "Only one address is allowed" );
300 }
301 
302 
303 /*! Parses the contents of an RFC 2822 references field in \a s. This
304     is nominally 1*msg-id, but in practice we need to be a little more
305     flexible. Overlooks common problems and records the first serious
306     problems found.
307 */
308 
parseReferences(const EString & s)309 void AddressField::parseReferences( const EString &s )
310 {
311     AddressParser *ap = AddressParser::references( s );
312     a = ap->addresses();
313     setError( ap->error() );
314 }
315 
316 
317 /*! Parses the RFC 2822 msg-id production from \a s and/or records the
318     first serious error found.
319 */
320 
parseMessageId(const EString & s)321 void AddressField::parseMessageId( const EString &s )
322 {
323     AddressParser *ap = AddressParser::references( s );
324 
325     if ( !ap->error().isEmpty() )
326         setError( ap->error() );
327     else if ( ap->addresses()->count() == 1 )
328         a = ap->addresses();
329     else
330         setError( "Need exactly one" );
331 }
332 
333 
334 /*! Like parseMessageId( \a s ), except that it also accepts <blah>. */
335 
parseContentId(const EString & s)336 void AddressField::parseContentId( const EString & s )
337 {
338     AddressParser ap( s );
339     setError( ap.error() );
340     if ( ap.addresses()->count() != 1 ) {
341         setError( "Need exactly one" );
342         return;
343     }
344 
345     switch ( ap.addresses()->first()->type() ) {
346     case Address::Normal:
347         a = ap.addresses();
348         //setData( "<" + a->lpdomain() + ">" );
349         break;
350     case Address::Bounce:
351         setError( "<> is not legal, it has to be <some@thing>" );
352         break;
353     case Address::EmptyGroup:
354         setError( "Error parsing Content-ID" );
355         break;
356     case Address::Local:
357         a = ap.addresses();
358         //setData( "<" + a->localpart() + ">" );
359         break;
360     case Address::Invalid:
361         setError( "Error parsing Content-Id" );
362         break;
363     }
364 }
365 
366 
367 /*! Returns a pointer to the List of addresses contained in this field.
368 
369     This is never a null pointer.
370 */
371 
addresses() const372 List< Address > *AddressField::addresses() const
373 {
374     return a;
375 }
376 
377 
378 /*! Records \a addr as the new list of addresses in this address. */
379 
setAddresses(List<Address> * addr)380 void AddressField::setAddresses( List<Address>* addr )
381 {
382     a->clear();
383     a->append( addr );
384 }
385 
386 
387 /*! Checks whether '<>' is present in this address field, and records
388     an error if it is. '<>' is legal in Return-Path, but as of April
389     2005, not in any other field.
390 */
391 
outlawBounce()392 void AddressField::outlawBounce()
393 {
394     List< Address >::Iterator it( a );
395     while ( it && valid() ) {
396         if ( it->type() == Address::Bounce )
397             setError( "No-bounce address not allowed in this field" );
398         ++it;
399     }
400 }
401 
402 
403 /*! Returns true if any of the addresses in this field need unicode to
404     be transmitted, and false if none of them do (or if the list is
405     empty).
406 */
407 
needsUnicode() const408 bool AddressField::needsUnicode() const
409 {
410     List< Address >::Iterator it( a );
411     while ( it ) {
412         if ( it->needsUnicode() )
413             return true;
414         ++it;
415     }
416     return false;
417 }
418