1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
4  *
5  * This library is free software: you can redistribute it and/or modify it
6  * under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation.
8  *
9  * This library is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
12  * for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library. If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Authors: Michael Zucchi <notzed@ximian.com>
18  *          Jeffrey Stedfast <fejj@ximian.com>
19  */
20 
21 #include "evolution-data-server-config.h"
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <sys/param.h>  /* for MAXHOSTNAMELEN */
28 #include <sys/stat.h>
29 #include <unistd.h>
30 #include <regex.h>
31 #include <fcntl.h>
32 #include <errno.h>
33 #include <ctype.h>
34 #include <time.h>
35 
36 #ifndef MAXHOSTNAMELEN
37 #define MAXHOSTNAMELEN 1024
38 #endif
39 
40 #include "camel-charset-map.h"
41 #include "camel-iconv.h"
42 #include "camel-mime-utils.h"
43 #include "camel-net-utils.h"
44 #include "camel-string-utils.h"
45 #ifdef G_OS_WIN32
46 #include <winsock2.h>
47 #include <ws2tcpip.h>
48 #ifdef HAVE_WSPIAPI_H
49 #include <wspiapi.h>
50 #endif
51 #endif
52 #include "camel-utf8.h"
53 
54 #ifdef G_OS_WIN32
55 #ifdef gmtime_r
56 #undef gmtime_r
57 #endif
58 
59 /* The gmtime() in Microsoft's C library is MT-safe */
60 #define gmtime_r(tp,tmp) (gmtime(tp)?(*(tmp)=*gmtime(tp),(tmp)):0)
61 #endif
62 
63 #if !defined HAVE_LOCALTIME_R && !defined localtime_r
64 # ifdef _LIBC
65 #  define localtime_r __localtime_r
66 # else
67 /* Approximate localtime_r as best we can in its absence.  */
68 #  define localtime_r my_localtime_r
69 static struct tm *localtime_r (const time_t *, struct tm *);
70 static struct tm *
localtime_r(t,tp)71 localtime_r (t,
72              tp)
73 	const time_t *t;
74 	struct tm *tp;
75 {
76 	struct tm *l = localtime (t);
77 	if (!l)
78 		return 0;
79 	*tp = *l;
80 	return tp;
81 }
82 # endif /* !_LIBC */
83 #endif /* HAVE_LOCALTIME_R && !defined (localtime_r) */
84 
85 /* for all non-essential warnings ... */
86 #define w(x)
87 
88 #define d(x)
89 #define d2(x)
90 
G_DEFINE_BOXED_TYPE(CamelContentType,camel_content_type,camel_content_type_ref,camel_content_type_unref)91 G_DEFINE_BOXED_TYPE (CamelContentType,
92 		camel_content_type,
93 		camel_content_type_ref,
94 		camel_content_type_unref)
95 
96 G_DEFINE_BOXED_TYPE (CamelContentDisposition,
97 		camel_content_disposition,
98 		camel_content_disposition_ref,
99 		camel_content_disposition_unref)
100 
101 G_DEFINE_BOXED_TYPE (CamelHeaderAddress,
102 		camel_header_address,
103 		camel_header_address_ref,
104 		camel_header_address_unref)
105 
106 /**
107  * camel_mktime_utc:
108  * @tm: the #tm to convert to a calendar time representation
109  *
110  * Like mktime(3), but assumes UTC instead of local timezone.
111  *
112  * Returns: the calendar time representation of @tm
113  *
114  * Since: 3.4
115  **/
116 time_t
117 camel_mktime_utc (struct tm *tm)
118 {
119 	time_t tt;
120 
121 	tm->tm_isdst = -1;
122 	tt = mktime (tm);
123 
124 #if defined (HAVE_TM_GMTOFF)
125 	tt += tm->tm_gmtoff;
126 #elif defined (HAVE_TIMEZONE)
127 	if (tm->tm_isdst > 0) {
128 #if defined (HAVE_ALTZONE)
129 		tt -= altzone;
130 #else
131 		tt -= (timezone - 3600);
132 #endif
133 	} else
134 		tt -= timezone;
135 #endif
136 
137 	return tt;
138 }
139 
140 /**
141  * camel_localtime_with_offset:
142  * @tt: the #time_t to convert
143  * @tm: the #tm to store the result in
144  * @offset: the #gint to store the offset in
145  *
146  * Converts the calendar time representation @tt to a broken-down
147  * time representation, stored in @tm, and provides the offset in
148  * seconds from UTC time, stored in @offset.
149  **/
150 void
camel_localtime_with_offset(time_t tt,struct tm * tm,gint * offset)151 camel_localtime_with_offset (time_t tt,
152                              struct tm *tm,
153                              gint *offset)
154 {
155 	localtime_r (&tt, tm);
156 
157 #if defined (HAVE_TM_GMTOFF)
158 	*offset = tm->tm_gmtoff;
159 #elif defined (HAVE_TIMEZONE)
160 	if (tm->tm_isdst > 0) {
161 #if defined (HAVE_ALTZONE)
162 		*offset = -altzone;
163 #else
164 		*offset = -(timezone - 3600);
165 #endif
166 	} else
167 		*offset = -timezone;
168 #endif
169 }
170 
171 #define CAMEL_UUENCODE_CHAR(c)  ((c) ? (c) + ' ' : '`')
172 #define	CAMEL_UUDECODE_CHAR(c)	(((c) - ' ') & 077)
173 
174 static const guchar tohex[16] = {
175 	'0', '1', '2', '3', '4', '5', '6', '7',
176 	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
177 };
178 
179 /**
180  * camel_uuencode_close:
181  * @in: (array length=len): input stream
182  * @len: input stream length
183  * @out: (inout) (array): output stream
184  * @uubuf: (inout) (array fixed-size=60): temporary buffer of 60 bytes
185  * @state: (inout): holds the number of bits that are stored in @save
186  * @save: (inout) (array length=state): leftover bits that have not yet been encoded
187  *
188  * Uuencodes a chunk of data. Call this when finished encoding data
189  * with camel_uuencode_step() to flush off the last little bit.
190  *
191  * Returns: the number of bytes encoded
192  **/
193 gsize
camel_uuencode_close(guchar * in,gsize len,guchar * out,guchar * uubuf,gint * state,guint32 * save)194 camel_uuencode_close (guchar *in,
195                       gsize len,
196                       guchar *out,
197                       guchar *uubuf,
198                       gint *state,
199                       guint32 *save)
200 {
201 	register guchar *outptr, *bufptr;
202 	register guint32 saved;
203 	gint uulen, uufill, i;
204 
205 	outptr = out;
206 
207 	if (len > 0)
208 		outptr += camel_uuencode_step (in, len, out, uubuf, state, save);
209 
210 	uufill = 0;
211 
212 	saved = *save;
213 	i = *state & 0xff;
214 	uulen = (*state >> 8) & 0xff;
215 
216 	bufptr = uubuf + ((uulen / 3) * 4);
217 
218 	if (i > 0) {
219 		while (i < 3) {
220 			saved <<= 8;
221 			uufill++;
222 			i++;
223 		}
224 
225 		if (i == 3) {
226 			/* convert 3 normal bytes into 4 uuencoded bytes */
227 			guchar b0, b1, b2;
228 
229 			b0 = (saved >> 16) & 0xff;
230 			b1 = (saved >> 8) & 0xff;
231 			b2 = saved & 0xff;
232 
233 			*bufptr++ = CAMEL_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
234 			*bufptr++ = CAMEL_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
235 			*bufptr++ = CAMEL_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
236 			*bufptr++ = CAMEL_UUENCODE_CHAR (b2 & 0x3f);
237 
238 			i = 0;
239 			saved = 0;
240 			uulen += 3;
241 		}
242 	}
243 
244 	if (uulen > 0) {
245 		gint cplen = ((uulen / 3) * 4);
246 
247 		*outptr++ = CAMEL_UUENCODE_CHAR ((uulen - uufill) & 0xff);
248 		memcpy (outptr, uubuf, cplen);
249 		outptr += cplen;
250 		*outptr++ = '\n';
251 		uulen = 0;
252 	}
253 
254 	*outptr++ = CAMEL_UUENCODE_CHAR (uulen & 0xff);
255 	*outptr++ = '\n';
256 
257 	*save = 0;
258 	*state = 0;
259 
260 	return outptr - out;
261 }
262 
263 /**
264  * camel_uuencode_step:
265  * @in: (array length=len): input stream
266  * @len: input stream length
267  * @out: (inout) (array): output stream
268  * @uubuf: (inout) (array fixed-size=60): temporary buffer of 60 bytes
269  * @state: (inout): holds the number of bits that are stored in @save
270  * @save: (inout) (array length=state): leftover bits that have not yet been encoded
271  *
272  * Uuencodes a chunk of data. Performs an 'encode step', only encodes
273  * blocks of 45 characters to the output at a time, saves left-over
274  * state in @uubuf, @state and @save (initialize to 0 on first
275  * invocation).
276  *
277  * Returns: the number of bytes encoded
278  **/
279 gsize
camel_uuencode_step(guchar * in,gsize len,guchar * out,guchar * uubuf,gint * state,guint32 * save)280 camel_uuencode_step (guchar *in,
281                      gsize len,
282                      guchar *out,
283                      guchar *uubuf,
284                      gint *state,
285                      guint32 *save)
286 {
287 	register guchar *inptr, *outptr, *bufptr;
288 	guchar b0, b1, b2, *inend;
289 	register guint32 saved;
290 	gint uulen, i;
291 
292 	if (len == 0)
293 		return 0;
294 
295 	inend = in + len;
296 	outptr = out;
297 	inptr = in;
298 
299 	saved = *save;
300 	i = *state & 0xff;
301 	uulen = (*state >> 8) & 0xff;
302 
303 	if ((len + uulen) < 45) {
304 		/* not enough input to write a full uuencoded line */
305 		bufptr = uubuf + ((uulen / 3) * 4);
306 	} else {
307 		bufptr = outptr + 1;
308 
309 		if (uulen > 0) {
310 			/* copy the previous call's tmpbuf to outbuf */
311 			memcpy (bufptr, uubuf, ((uulen / 3) * 4));
312 			bufptr += ((uulen / 3) * 4);
313 		}
314 	}
315 
316 	if (i == 2) {
317 		b0 = (saved >> 8) & 0xff;
318 		b1 = saved & 0xff;
319 		saved = 0;
320 		i = 0;
321 
322 		goto skip2;
323 	} else if (i == 1) {
324 		if ((inptr + 2) < inend) {
325 			b0 = saved & 0xff;
326 			saved = 0;
327 			i = 0;
328 
329 			goto skip1;
330 		}
331 
332 		while (inptr < inend) {
333 			saved = (saved << 8) | *inptr++;
334 			i++;
335 		}
336 	}
337 
338 	while (inptr < inend) {
339 		while (uulen < 45 && (inptr + 3) <= inend) {
340 			b0 = *inptr++;
341 		skip1:
342 			b1 = *inptr++;
343 		skip2:
344 			b2 = *inptr++;
345 
346 			/* convert 3 normal bytes into 4 uuencoded bytes */
347 			*bufptr++ = CAMEL_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
348 			*bufptr++ = CAMEL_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
349 			*bufptr++ = CAMEL_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
350 			*bufptr++ = CAMEL_UUENCODE_CHAR (b2 & 0x3f);
351 
352 			uulen += 3;
353 		}
354 
355 		if (uulen >= 45) {
356 			*outptr++ = CAMEL_UUENCODE_CHAR (uulen & 0xff);
357 			outptr += ((45 / 3) * 4) + 1;
358 
359 			*outptr++ = '\n';
360 			uulen = 0;
361 
362 			if ((inptr + 45) <= inend) {
363 				/* we have enough input to output another full line */
364 				bufptr = outptr + 1;
365 			} else {
366 				bufptr = uubuf;
367 			}
368 		} else {
369 			/* not enough input to continue... */
370 			for (i = 0, saved = 0; inptr < inend; i++)
371 				saved = (saved << 8) | *inptr++;
372 		}
373 	}
374 
375 	*save = saved;
376 	*state = ((uulen & 0xff) << 8) | (i & 0xff);
377 
378 	return outptr - out;
379 }
380 
381 /**
382  * camel_uudecode_step:
383  * @in: (array length=inlen): input stream
384  * @inlen: max length of data to decode
385  * @out: (inout) (array): output stream
386  * @state: (inout): holds the number of bits that are stored in @save
387  * @save: (inout) (array length=state): leftover bits that have not yet been decoded
388  *
389  * Uudecodes a chunk of data. Performs a 'decode step' on a chunk of
390  * uuencoded data. Assumes the "begin mode filename" line has
391  * been stripped off.
392  *
393  * Returns: the number of bytes decoded
394  **/
395 gsize
camel_uudecode_step(guchar * in,gsize len,guchar * out,gint * state,guint32 * save)396 camel_uudecode_step (guchar *in,
397                      gsize len,
398                      guchar *out,
399                      gint *state,
400                      guint32 *save)
401 {
402 	register guchar *inptr, *outptr;
403 	guchar *inend, ch;
404 	register guint32 saved;
405 	gboolean last_was_eoln;
406 	gint uulen, i;
407 
408 	if (*state & CAMEL_UUDECODE_STATE_END)
409 		return 0;
410 
411 	saved = *save;
412 	i = *state & 0xff;
413 	uulen = (*state >> 8) & 0xff;
414 	if (uulen == 0)
415 		last_was_eoln = TRUE;
416 	else
417 		last_was_eoln = FALSE;
418 
419 	inend = in + len;
420 	outptr = out;
421 	inptr = in;
422 
423 	while (inptr < inend) {
424 		if (*inptr == '\n') {
425 			last_was_eoln = TRUE;
426 
427 			inptr++;
428 			continue;
429 		} else if (!uulen || last_was_eoln) {
430 			/* first octet on a line is the uulen octet */
431 			uulen = CAMEL_UUDECODE_CHAR (*inptr);
432 			last_was_eoln = FALSE;
433 			if (uulen == 0) {
434 				*state |= CAMEL_UUDECODE_STATE_END;
435 				break;
436 			}
437 
438 			inptr++;
439 			continue;
440 		}
441 
442 		ch = *inptr++;
443 
444 		if (uulen > 0) {
445 			/* save the byte */
446 			saved = (saved << 8) | ch;
447 			i++;
448 			if (i == 4) {
449 				/* convert 4 uuencoded bytes to 3 normal bytes */
450 				guchar b0, b1, b2, b3;
451 
452 				b0 = saved >> 24;
453 				b1 = saved >> 16 & 0xff;
454 				b2 = saved >> 8 & 0xff;
455 				b3 = saved & 0xff;
456 
457 				if (uulen >= 3) {
458 					*outptr++ = CAMEL_UUDECODE_CHAR (b0) << 2 | CAMEL_UUDECODE_CHAR (b1) >> 4;
459 					*outptr++ = CAMEL_UUDECODE_CHAR (b1) << 4 | CAMEL_UUDECODE_CHAR (b2) >> 2;
460 					*outptr++ = CAMEL_UUDECODE_CHAR (b2) << 6 | CAMEL_UUDECODE_CHAR (b3);
461 					uulen -= 3;
462 				} else {
463 					gint orig_uulen = uulen;
464 
465 					if (orig_uulen >= 1) {
466 						*outptr++ = CAMEL_UUDECODE_CHAR (b0) << 2 | CAMEL_UUDECODE_CHAR (b1) >> 4;
467 						uulen--;
468 					}
469 
470 					if (orig_uulen >= 2) {
471 						*outptr++ = CAMEL_UUDECODE_CHAR (b1) << 4 | CAMEL_UUDECODE_CHAR (b2) >> 2;
472 						uulen--;
473 					}
474 				}
475 
476 				i = 0;
477 				saved = 0;
478 			}
479 		} else {
480 			break;
481 		}
482 	}
483 
484 	*save = saved;
485 	*state = (*state & CAMEL_UUDECODE_STATE_MASK) | ((uulen & 0xff) << 8) | (i & 0xff);
486 
487 	return outptr - out;
488 }
489 
490 /**
491  * camel_quoted_encode_close:
492  * @in: (array length=len): input stream
493  * @len: length of the input
494  * @out: (inout) (array): output string
495  * @state: (inout): holds the number of bits that are stored in @save
496  * @save: (inout) (array length=state): leftover bits that have not yet been encoded
497  *
498  * Quoted-printable encodes a block of text. Call this when finished
499  * encoding data with camel_quoted_encode_step() to flush off
500  * the last little bit.
501  *
502  * Returns: the number of bytes encoded
503  **/
504 gsize
camel_quoted_encode_close(guchar * in,gsize len,guchar * out,gint * state,gint * save)505 camel_quoted_encode_close (guchar *in,
506                            gsize len,
507                            guchar *out,
508                            gint *state,
509                            gint *save)
510 {
511 	register guchar *outptr = out;
512 	gint last;
513 
514 	if (len > 0)
515 		outptr += camel_quoted_encode_step (in, len, outptr, state, save);
516 
517 	last = *state;
518 	if (last != -1) {
519 		/* space/tab must be encoded if it's the last character on
520 		 * the line */
521 		if (camel_mime_is_qpsafe (last) && last != ' ' && last != 9) {
522 			*outptr++ = last;
523 		} else {
524 			*outptr++ = '=';
525 			*outptr++ = tohex[(last>>4) & 0xf];
526 			*outptr++ = tohex[last & 0xf];
527 		}
528 	}
529 
530 	*save = 0;
531 	*state = -1;
532 
533 	return outptr - out;
534 }
535 
536 /**
537  * camel_quoted_encode_step:
538  * @in: (array length=len): input stream
539  * @len: length of the input
540  * @out: (inout) (array): output string
541  * @state: (inout): holds the number of bits that are stored in @save
542  * @save: (inout) (array length=state): leftover bits that have not yet been encoded
543  *
544  * Quoted-printable encodes a block of text. Performs an 'encode
545  * step', saves left-over state in state and save (initialise to -1 on
546  * first invocation).
547  *
548  * Returns: the number of bytes encoded
549  **/
550 gsize
camel_quoted_encode_step(guchar * in,gsize len,guchar * out,gint * statep,gint * save)551 camel_quoted_encode_step (guchar *in,
552                           gsize len,
553                           guchar *out,
554                           gint *statep,
555                           gint *save)
556 {
557 	register guchar *inptr, *outptr, *inend;
558 	guchar c;
559 	register gint sofar = *save;  /* keeps track of how many chars on a line */
560 	register gint last = *statep; /* keeps track if last gchar to end was a space cr etc */
561 
562 	#define output_last() \
563 		if (sofar + 3 > 74) { \
564 			*outptr++ = '='; \
565 			*outptr++ = '\n'; \
566 			sofar = 0; \
567 		} \
568 		*outptr++ = '='; \
569 		*outptr++ = tohex[(last >> 4) & 0xf]; \
570 		*outptr++ = tohex[last & 0xf]; \
571 		sofar += 3;
572 
573 	inptr = in;
574 	inend = in + len;
575 	outptr = out;
576 	while (inptr < inend) {
577 		c = *inptr++;
578 		if (c == '\r') {
579 			if (last != -1) {
580 				output_last ();
581 			}
582 			last = c;
583 		} else if (c == '\n') {
584 			if (last != -1 && last != '\r') {
585 				output_last ();
586 			}
587 			*outptr++ = '\n';
588 			sofar = 0;
589 			last = -1;
590 		} else {
591 			if (last != -1) {
592 				if (camel_mime_is_qpsafe (last)) {
593 					*outptr++ = last;
594 					sofar++;
595 				} else {
596 					output_last ();
597 				}
598 			}
599 
600 			if (camel_mime_is_qpsafe (c)) {
601 				if (sofar > 74) {
602 					*outptr++ = '=';
603 					*outptr++ = '\n';
604 					sofar = 0;
605 				}
606 
607 				/* delay output of space gchar */
608 				if (c == ' ' || c == '\t') {
609 					last = c;
610 				} else {
611 					*outptr++ = c;
612 					sofar++;
613 					last = -1;
614 				}
615 			} else {
616 				if (sofar > 72) {
617 					*outptr++ = '=';
618 					*outptr++ = '\n';
619 					sofar = 3;
620 				} else
621 					sofar += 3;
622 
623 				*outptr++ = '=';
624 				*outptr++ = tohex[(c >> 4) & 0xf];
625 				*outptr++ = tohex[c & 0xf];
626 				last = -1;
627 			}
628 		}
629 	}
630 	*save = sofar;
631 	*statep = last;
632 
633 	#undef output_last
634 
635 	return (outptr - out);
636 }
637 
638 /*
639  * FIXME: this does not strip trailing spaces from lines (as it should, rfc 2045, section 6.7)
640  * Should it also canonicalise the end of line to CR LF??
641  *
642  * Note: Trailing rubbish (at the end of input), like = or =x or =\r will be lost.
643  */
644 
645 /**
646  * camel_quoted_decode_step:
647  * @in: (array length=len): input stream
648  * @len: max length of data to decode
649  * @out: (inout) (array): output stream
650  * @savestate: (inout): holds the number of bits that are stored in @saveme
651  * @saveme: (inout) (array length=savestate): leftover bits that have not yet been decoded
652  *
653  * Decodes a block of quoted-printable encoded data. Performs a
654  * 'decode step' on a chunk of QP encoded data.
655  *
656  * Returns: the number of bytes decoded
657  **/
658 gsize
camel_quoted_decode_step(guchar * in,gsize len,guchar * out,gint * savestate,gint * saveme)659 camel_quoted_decode_step (guchar *in,
660                           gsize len,
661                           guchar *out,
662                           gint *savestate,
663                           gint *saveme)
664 {
665 	register guchar *inptr, *outptr;
666 	guchar *inend, c;
667 	gint state, save;
668 
669 	inend = in + len;
670 	outptr = out;
671 
672 	d (printf ("quoted-printable, decoding text '%.*s'\n", len, in));
673 
674 	state = *savestate;
675 	save = *saveme;
676 	inptr = in;
677 	while (inptr < inend) {
678 		switch (state) {
679 		case 0:
680 			while (inptr < inend) {
681 				c = *inptr++;
682 				if (c == '=') {
683 					state = 1;
684 					break;
685 				}
686 #ifdef CANONICALISE_EOL
687 				/*else if (c=='\r') {
688 					state = 3;
689 				} else if (c == '\n') {
690 					*outptr++ = '\r';
691 					*outptr++ = c;
692 					} */
693 #endif
694 				else {
695 					*outptr++ = c;
696 				}
697 			}
698 			break;
699 		case 1:
700 			c = *inptr++;
701 			if (c == '\n') {
702 				/* soft break ... unix end of line */
703 				state = 0;
704 			} else {
705 				save = c;
706 				state = 2;
707 			}
708 			break;
709 		case 2:
710 			c = *inptr++;
711 			if (isxdigit (c) && isxdigit (save)) {
712 				c = toupper (c);
713 				save = toupper (save);
714 				*outptr++ = (((save>='A'?save-'A'+10:save-'0')&0x0f) << 4)
715 					| ((c >= 'A' ? c - 'A' + 10 : c - '0') &0x0f);
716 			} else if (c == '\n' && save == '\r') {
717 				/* soft break ... canonical end of line */
718 			} else {
719 				/* just output the data */
720 				*outptr++ = '=';
721 				*outptr++ = save;
722 				*outptr++ = c;
723 			}
724 			state = 0;
725 			break;
726 #ifdef CANONICALISE_EOL
727 		case 3:
728 			/* convert \r -> to \r\n, leaves \r\n alone */
729 			c = *inptr++;
730 			if (c == '\n') {
731 				*outptr++ = '\r';
732 				*outptr++ = c;
733 			} else {
734 				*outptr++ = '\r';
735 				*outptr++ = '\n';
736 				*outptr++ = c;
737 			}
738 			state = 0;
739 			break;
740 #endif
741 		}
742 	}
743 
744 	*savestate = state;
745 	*saveme = save;
746 
747 	return outptr - out;
748 }
749 
750 /*
751  * this is for the "Q" encoding of international words,
752  * which is slightly different than plain quoted-printable (mainly by allowing 0x20 <> _)
753 */
754 static gsize
quoted_decode(const guchar * in,gsize len,guchar * out)755 quoted_decode (const guchar *in,
756                gsize len,
757                guchar *out)
758 {
759 	register const guchar *inptr;
760 	register guchar *outptr;
761 	const guchar *inend;
762 	guchar c, c1;
763 	gint ret = 0;
764 
765 	inend = in + len;
766 	outptr = out;
767 
768 	d (printf ("decoding text '%.*s'\n", len, in));
769 
770 	inptr = in;
771 	while (inptr < inend) {
772 		c = *inptr++;
773 		if (c == '=') {
774 			/* silently ignore truncated data? */
775 			if (inend - in >= 2) {
776 				c = toupper (*inptr++);
777 				c1 = toupper (*inptr++);
778 				*outptr++ = (((c>='A'?c-'A'+10:c-'0')&0x0f) << 4)
779 					| ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') &0x0f);
780 			} else {
781 				ret = -1;
782 				break;
783 			}
784 		} else if (c == '_') {
785 			*outptr++ = 0x20;
786 		} else {
787 			*outptr++ = c;
788 		}
789 	}
790 	if (ret == 0) {
791 		return outptr - out;
792 	}
793 	return 0;
794 }
795 
796 /* rfc2047 version of quoted-printable */
797 /* safemask is the mask to apply to the camel_mime_special_table to determine what
798  * characters can safely be included without encoding */
799 static gsize
quoted_encode(const guchar * in,gsize len,guchar * out,gushort safemask)800 quoted_encode (const guchar *in,
801                gsize len,
802                guchar *out,
803                gushort safemask)
804 {
805 	register const guchar *inptr, *inend;
806 	guchar *outptr;
807 	guchar c;
808 
809 	inptr = in;
810 	inend = in + len;
811 	outptr = out;
812 	while (inptr < inend) {
813 		c = *inptr++;
814 		if (c == ' ') {
815 			*outptr++ = '_';
816 		} else if (camel_mime_special_table[c] & safemask) {
817 			*outptr++ = c;
818 		} else {
819 			*outptr++ = '=';
820 			*outptr++ = tohex[(c >> 4) & 0xf];
821 			*outptr++ = tohex[c & 0xf];
822 		}
823 	}
824 
825 	d (printf ("encoding '%.*s' = '%.*s'\n", len, in, outptr - out, out));
826 
827 	return (outptr - out);
828 }
829 
830 static void
header_decode_lwsp(const gchar ** in)831 header_decode_lwsp (const gchar **in)
832 {
833 	const gchar *inptr = *in;
834 	gchar c;
835 
836 	d2 (printf ("is ws: '%s'\n", *in));
837 
838 	while ((camel_mime_is_lwsp (*inptr) || *inptr =='(') && *inptr != '\0') {
839 		while (camel_mime_is_lwsp (*inptr) && *inptr != '\0') {
840 			d2 (printf ("(%c)", *inptr));
841 			inptr++;
842 		}
843 		d2 (printf ("\n"));
844 
845 		/* check for comments */
846 		if (*inptr == '(') {
847 			gint depth = 1;
848 			inptr++;
849 			while (depth && (c=*inptr) && *inptr != '\0') {
850 				if (c == '\\' && inptr[1]) {
851 					inptr++;
852 				} else if (c == '(') {
853 					depth++;
854 				} else if (c == ')') {
855 					depth--;
856 				}
857 				inptr++;
858 			}
859 		}
860 	}
861 	*in = inptr;
862 }
863 
864 static gchar *
camel_iconv_strndup(GIConv cd,const gchar * string,gsize n)865 camel_iconv_strndup (GIConv cd,
866                      const gchar *string,
867                      gsize n)
868 {
869 	gsize inleft, outleft, converted = 0;
870 	gchar *out, *outbuf;
871 	const gchar *inbuf;
872 	gsize outlen;
873 	gint errnosav;
874 
875 	if (cd == (GIConv) -1)
876 		return g_strndup (string, n);
877 
878 	outlen = n * 2 + 16;
879 	out = g_malloc (outlen + 4);
880 
881 	inbuf = string;
882 	inleft = n;
883 
884 	do {
885 		errno = 0;
886 		outbuf = out + converted;
887 		outleft = outlen - converted;
888 
889 		converted = g_iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
890 		if (converted == (gsize) -1) {
891 			if (errno != E2BIG && errno != EINVAL)
892 				goto fail;
893 		}
894 
895 		/*
896 		 * E2BIG   There is not sufficient room at *outbuf.
897 		 *
898 		 * We just need to grow our outbuffer and try again.
899 		 */
900 
901 		converted = outbuf - out;
902 		if (errno == E2BIG) {
903 			outlen += inleft * 2 + 16;
904 			out = g_realloc (out, outlen + 4);
905 			outbuf = out + converted;
906 		}
907 	} while (errno == E2BIG && inleft > 0);
908 
909 	/*
910 	 * EINVAL  An  incomplete  multibyte sequence has been encoun­
911 	 *         tered in the input.
912 	 *
913 	 * We'll just have to ignore it...
914 	 */
915 
916 	/* flush the iconv conversion */
917 	while (g_iconv (cd, NULL, NULL, &outbuf, &outleft) == (gsize) -1) {
918 		if (errno != E2BIG)
919 			break;
920 
921 		outlen += 16;
922 		converted = outbuf - out;
923 		out = g_realloc (out, outlen + 4);
924 		outleft = outlen - converted;
925 		outbuf = out + converted;
926 	}
927 
928 	/* Note: not all charsets can be nul-terminated with a single
929 	 * nul byte. UCS2, for example, needs 2 nul bytes and UCS4
930 	 * needs 4. I hope that 4 nul bytes is enough to terminate all
931 	 * multibyte charsets? */
932 
933 	/* nul-terminate the string */
934 	memset (outbuf, 0, 4);
935 
936 	/* reset the cd */
937 	g_iconv (cd, NULL, NULL, NULL, NULL);
938 
939 	return out;
940 
941  fail:
942 
943 	errnosav = errno;
944 
945 	w (g_warning ("camel_iconv_strndup: %s at byte %lu", g_strerror (errno), n - inleft));
946 
947 	g_free (out);
948 
949 	/* reset the cd */
950 	g_iconv (cd, NULL, NULL, NULL, NULL);
951 
952 	errno = errnosav;
953 
954 	return NULL;
955 }
956 
957 #define is_ascii(c) isascii ((gint) ((guchar) (c)))
958 
959 static gchar *
decode_8bit(const gchar * text,gsize len,const gchar * default_charset)960 decode_8bit (const gchar *text,
961              gsize len,
962              const gchar *default_charset)
963 {
964 	const gchar *charsets[4] = { "UTF-8", NULL, NULL, NULL };
965 	gsize inleft, outleft, outlen, rc, min, n;
966 	const gchar *locale_charset, *best;
967 	gchar *out, *outbuf;
968 	const gchar *inbuf;
969 	GIConv cd;
970 	gint i = 1;
971 
972 	if (default_charset && g_ascii_strcasecmp (default_charset, "UTF-8") != 0)
973 		charsets[i++] = default_charset;
974 
975 	locale_charset = camel_iconv_locale_charset ();
976 	if (locale_charset && g_ascii_strcasecmp (locale_charset, "UTF-8") != 0)
977 		charsets[i++] = locale_charset;
978 
979 	min = len;
980 	best = charsets[0];
981 
982 	outlen = (len * 2) + 16;
983 	out = g_malloc (outlen + 1);
984 
985 	for (i = 0; charsets[i]; i++) {
986 		if ((cd = camel_iconv_open ("UTF-8", charsets[i])) == (GIConv) -1)
987 			continue;
988 
989 		outleft = outlen;
990 		outbuf = out;
991 		inleft = len;
992 		inbuf = text;
993 		n = 0;
994 
995 		do {
996 			rc = g_iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
997 			if (rc == (gsize) -1) {
998 				if (errno == EINVAL) {
999 					/* incomplete sequence at the end of the input buffer */
1000 					n += inleft;
1001 					break;
1002 				}
1003 
1004 				if (errno == E2BIG) {
1005 					outlen += (inleft * 2) + 16;
1006 					rc = (gsize) (outbuf - out);
1007 					out = g_realloc (out, outlen + 1);
1008 					outleft = outlen - rc;
1009 					outbuf = out + rc;
1010 				} else {
1011 					inleft--;
1012 					inbuf++;
1013 					n++;
1014 				}
1015 			}
1016 		} while (inleft > 0);
1017 
1018 		while ((rc = g_iconv (cd, NULL, NULL, &outbuf, &outleft)) == (gsize) -1) {
1019 			if (errno != E2BIG)
1020 				break;
1021 
1022 			outlen += 16;
1023 			rc = (gsize) (outbuf - out);
1024 			out = g_realloc (out, outlen + 1);
1025 			outleft = outlen - rc;
1026 			outbuf = out + rc;
1027 		}
1028 
1029 		*outbuf = '\0';
1030 
1031 		camel_iconv_close (cd);
1032 
1033 		if (rc != (gsize) -1 && n == 0)
1034 			return out;
1035 
1036 		if (n < min) {
1037 			best = charsets[i];
1038 			min = n;
1039 		}
1040 	}
1041 
1042 	/* if we get here, then none of the charsets fit the 8bit text flawlessly...
1043 	 * try to find the one that fit the best and use that to convert what we can,
1044 	 * replacing any byte we can't convert with a '?' */
1045 
1046 	if ((cd = camel_iconv_open ("UTF-8", best)) == (GIConv) -1) {
1047 		/* this shouldn't happen... but if we are here, then
1048 		 * it did...  the only thing we can do at this point
1049 		 * is replace the 8bit garbage and pray */
1050 		register const gchar *inptr = text;
1051 		const gchar *inend = inptr + len;
1052 
1053 		outbuf = out;
1054 
1055 		while (inptr < inend) {
1056 			if (is_ascii (*inptr))
1057 				*outbuf++ = *inptr++;
1058 			else
1059 				*outbuf++ = '?';
1060 		}
1061 
1062 		*outbuf = '\0';
1063 
1064 		return out;
1065 	}
1066 
1067 	outleft = outlen;
1068 	outbuf = out;
1069 	inleft = len;
1070 	inbuf = text;
1071 
1072 	do {
1073 		rc = g_iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
1074 		if (rc == (gsize) -1) {
1075 			if (errno == EINVAL) {
1076 				/* incomplete sequence at the end of the input buffer */
1077 				break;
1078 			}
1079 
1080 			if (errno == E2BIG) {
1081 				rc = outbuf - out;
1082 				outlen += inleft * 2 + 16;
1083 				out = g_realloc (out, outlen + 1);
1084 				outleft = outlen - rc;
1085 				outbuf = out + rc;
1086 			} else {
1087 				*outbuf++ = '?';
1088 				outleft--;
1089 				inleft--;
1090 				inbuf++;
1091 			}
1092 		}
1093 	} while (inleft > 0);
1094 
1095 	while ((rc = g_iconv (cd, NULL, NULL, &outbuf, &outleft)) == (gsize) -1) {
1096 		if (errno != E2BIG)
1097 			break;
1098 
1099 		outlen += 16;
1100 		rc = (gsize) (outbuf - out);
1101 		out = g_realloc (out, outlen + 1);
1102 		outleft = outlen - rc;
1103 		outbuf = out + rc;
1104 	}
1105 
1106 	*outbuf = '\0';
1107 
1108 	camel_iconv_close (cd);
1109 
1110 	return out;
1111 }
1112 
1113 #define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
1114 
1115 static void
make_string_utf8_valid(gchar * text,gsize textlen)1116 make_string_utf8_valid (gchar *text,
1117                         gsize textlen)
1118 {
1119 	gchar *p;
1120 	gsize len;
1121 
1122 	p = text;
1123 	len = textlen;
1124 
1125 	while (!g_utf8_validate (p, len, (const gchar **) &p)) {
1126 		len = textlen - (p - text);
1127 		*p = '?';
1128 	}
1129 }
1130 
1131 static void
sanitize_decoded_text(guchar * text,gssize * inout_textlen)1132 sanitize_decoded_text (guchar *text,
1133 		       gssize *inout_textlen)
1134 {
1135 	gssize ii, jj, textlen;
1136 
1137 	g_return_if_fail (text != NULL);
1138 	g_return_if_fail (inout_textlen != NULL);
1139 
1140 	textlen = *inout_textlen;
1141 
1142 	for (ii = 0, jj = 0; ii < textlen; ii++) {
1143 		/* Skip '\0' and '\r' characters */
1144 		if (text[ii] == 0 || text[ii] == '\r')
1145 			continue;
1146 
1147 		/* Change '\n' into space */
1148 		if (text[ii] == '\n')
1149 			text[ii] = ' ';
1150 
1151 		if (ii != jj)
1152 			text[jj] = text[ii];
1153 
1154 		jj++;
1155 	}
1156 
1157 	*inout_textlen = jj;
1158 }
1159 
1160 /* decode an rfc2047 encoded-word token */
1161 static gchar *
rfc2047_decode_word(const gchar * in,gsize inlen,const gchar * default_charset)1162 rfc2047_decode_word (const gchar *in,
1163                      gsize inlen,
1164                      const gchar *default_charset)
1165 {
1166 	const guchar *instart = (const guchar *) in;
1167 	const guchar *inptr = instart + 2;
1168 	const guchar *inend = instart + inlen - 2;
1169 	guchar *decoded;
1170 	const gchar *charset;
1171 	gchar *charenc, *p;
1172 	guint32 save = 0;
1173 	gssize declen;
1174 	gint state = 0;
1175 	gsize len;
1176 	GIConv cd;
1177 	gchar *buf;
1178 
1179 	/* skip over the charset */
1180 	if (inlen < 8 || !(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?')
1181 		return NULL;
1182 
1183 	inptr++;
1184 
1185 	switch (*inptr) {
1186 	case 'B':
1187 	case 'b':
1188 		inptr += 2;
1189 		decoded = g_alloca (((inend - inptr) * 3 / 4) + 3);
1190 		declen = g_base64_decode_step ((gchar *) inptr, inend - inptr, decoded, &state, &save);
1191 		break;
1192 	case 'Q':
1193 	case 'q':
1194 		inptr += 2;
1195 		decoded = g_alloca (inend - inptr);
1196 		declen = quoted_decode (inptr, inend - inptr, decoded);
1197 
1198 		if (declen == -1) {
1199 			d (fprintf (stderr, "encountered broken 'Q' encoding\n"));
1200 			return NULL;
1201 		}
1202 		break;
1203 	default:
1204 		d (fprintf (stderr, "unknown encoding\n"));
1205 		return NULL;
1206 	}
1207 
1208 	sanitize_decoded_text (decoded, &declen);
1209 
1210 	/* never return empty string, return rather NULL */
1211 	if (!declen)
1212 		return NULL;
1213 
1214 	len = (inptr - 3) - (instart + 2);
1215 	charenc = g_alloca (len + 1);
1216 	memcpy (charenc, in + 2, len);
1217 	charenc[len] = '\0';
1218 	charset = charenc;
1219 
1220 	/* rfc2231 updates rfc2047 encoded words...
1221 	 * The ABNF given in RFC 2047 for encoded-words is:
1222 	 *   encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
1223 	 * This specification changes this ABNF to:
1224 	 *   encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
1225 	 */
1226 
1227 	/* trim off the 'language' part if it's there... */
1228 	if ((p = strchr (charset, '*')))
1229 		*p = '\0';
1230 
1231 	/* slight optimization? */
1232 	if (!g_ascii_strcasecmp (charset, "UTF-8"))
1233 		return g_strndup ((gchar *) decoded, declen);
1234 
1235 	if (charset[0])
1236 		charset = camel_iconv_charset_name (charset);
1237 
1238 	if (!charset[0] || (cd = camel_iconv_open ("UTF-8", charset)) == (GIConv) -1) {
1239 		w (g_warning (
1240 			"Cannot convert from %s to UTF-8, "
1241 			"header display may be corrupt: %s",
1242 			charset[0] ? charset : "unspecified charset",
1243 			g_strerror (errno)));
1244 
1245 		return decode_8bit ((gchar *) decoded, declen, default_charset);
1246 	}
1247 
1248 	buf = camel_iconv_strndup (cd, (gchar *) decoded, declen);
1249 	camel_iconv_close (cd);
1250 
1251 	if (buf != NULL)
1252 		return buf;
1253 
1254 	w (g_warning (
1255 		"Failed to convert \"%.*s\" to UTF-8, display may be "
1256 		"corrupt: %s", declen, decoded, g_strerror (errno)));
1257 
1258 	return decode_8bit ((gchar *) decoded, declen, charset);
1259 }
1260 
1261 /* ok, a lot of mailers are BROKEN, and send iso-latin1 encoded
1262  * headers, when they should just be sticking to US-ASCII
1263  * according to the rfc's.  Anyway, since the conversion to utf-8
1264  * is trivial, just do it here without iconv */
1265 static GString *
append_latin1(GString * out,const gchar * in,gsize len)1266 append_latin1 (GString *out,
1267                const gchar *in,
1268                gsize len)
1269 {
1270 	guint c;
1271 
1272 	while (len) {
1273 		c = (guint) * in++;
1274 		len--;
1275 		if (c & 0x80) {
1276 			g_string_append_c (out, 0xc0 | ((c >> 6) & 0x3));  /* 110000xx */
1277 			g_string_append_c (out, 0x80 | (c & 0x3f));        /* 10xxxxxx */
1278 		} else {
1279 			g_string_append_c (out, c);
1280 		}
1281 	}
1282 	return out;
1283 }
1284 
1285 static gint
append_8bit(GString * out,const gchar * inbuf,gsize inlen,const gchar * charset)1286 append_8bit (GString *out,
1287              const gchar *inbuf,
1288              gsize inlen,
1289              const gchar *charset)
1290 {
1291 	gchar *outbase, *outbuf;
1292 	gsize outlen;
1293 	GIConv ic;
1294 
1295 	ic = camel_iconv_open ("UTF-8", charset);
1296 	if (ic == (GIConv) -1)
1297 		return FALSE;
1298 
1299 	outlen = inlen * 6 + 16;
1300 	outbuf = outbase = g_malloc (outlen);
1301 
1302 	if (camel_iconv (ic, &inbuf, &inlen, &outbuf, &outlen) == (gsize) -1) {
1303 		w (g_warning ("Conversion to '%s' failed: %s", charset, g_strerror (errno)));
1304 		g_free (outbase);
1305 		camel_iconv_close (ic);
1306 		return FALSE;
1307 	}
1308 
1309 	camel_iconv (ic, NULL, NULL, &outbuf, &outlen);
1310 
1311 	*outbuf = 0;
1312 	g_string_append (out, outbase);
1313 	g_free (outbase);
1314 	camel_iconv_close (ic);
1315 
1316 	return TRUE;
1317 
1318 }
1319 
1320 static GString *
append_quoted_pair(GString * str,const gchar * in,gsize inlen)1321 append_quoted_pair (GString *str,
1322                     const gchar *in,
1323                     gsize inlen)
1324 {
1325 	register const gchar *inptr = in;
1326 	const gchar *inend = in + inlen;
1327 	gchar c;
1328 
1329 	while (inptr < inend) {
1330 		c = *inptr++;
1331 		if (c == '\\' && inptr < inend)
1332 			g_string_append_c (str, *inptr++);
1333 		else
1334 			g_string_append_c (str, c);
1335 	}
1336 
1337 	return str;
1338 }
1339 
1340 /* decodes a simple text, rfc822 + rfc2047 */
1341 static gchar *
header_decode_text(const gchar * in,gint ctext,const gchar * default_charset)1342 header_decode_text (const gchar *in,
1343                     gint ctext,
1344                     const gchar *default_charset)
1345 {
1346 	register const gchar *inptr = in;
1347 	gboolean encoded = FALSE;
1348 	const gchar *lwsp, *text;
1349 	gsize nlwsp, n;
1350 	gboolean ascii;
1351 	gchar *decoded;
1352 	GString *out;
1353 
1354 	if (in == NULL)
1355 		return g_strdup ("");
1356 
1357 	out = g_string_sized_new (strlen (in) + 1);
1358 
1359 	while (*inptr != '\0') {
1360 		lwsp = inptr;
1361 		while (camel_mime_is_lwsp (*inptr))
1362 			inptr++;
1363 
1364 		nlwsp = (gsize) (inptr - lwsp);
1365 
1366 		if (*inptr != '\0') {
1367 			text = inptr;
1368 			ascii = TRUE;
1369 
1370 			if (!strncmp (inptr, "=?", 2)) {
1371 				inptr += 2;
1372 
1373 				/* skip past the charset (if one is even declared, sigh) */
1374 				while (*inptr && *inptr != '?') {
1375 					ascii = ascii && is_ascii (*inptr);
1376 					inptr++;
1377 				}
1378 
1379 				/* sanity check encoding type */
1380 				if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || !inptr[1] || inptr[2] != '?')
1381 					goto non_rfc2047;
1382 
1383 				inptr += 3;
1384 
1385 				/* find the end of the rfc2047 encoded word token */
1386 				while (*inptr && strncmp (inptr, "?=", 2) != 0) {
1387 					ascii = ascii && is_ascii (*inptr);
1388 					inptr++;
1389 				}
1390 
1391 				if (!strncmp (inptr, "?=", 2))
1392 					inptr += 2;
1393 			} else {
1394 			non_rfc2047:
1395 				/* stop if we encounter a possible rfc2047 encoded
1396 				 * token even if it's inside another word, sigh. */
1397 				while (*inptr && !camel_mime_is_lwsp (*inptr) &&
1398 				       strncmp (inptr, "=?", 2) != 0) {
1399 					ascii = ascii && is_ascii (*inptr);
1400 					inptr++;
1401 				}
1402 			}
1403 
1404 			n = (gsize) (inptr - text);
1405 			if (is_rfc2047_encoded_word (text, n)) {
1406 				if ((decoded = rfc2047_decode_word (text, n, default_charset))) {
1407 					/* rfc2047 states that you must ignore all
1408 					 * whitespace between encoded words */
1409 					if (!encoded)
1410 						g_string_append_len (out, lwsp, nlwsp);
1411 
1412 					g_string_append (out, decoded);
1413 					g_free (decoded);
1414 
1415 					encoded = TRUE;
1416 				} else {
1417 					/* append lwsp and invalid rfc2047 encoded-word token */
1418 					g_string_append_len (out, lwsp, nlwsp + n);
1419 					encoded = FALSE;
1420 				}
1421 			} else {
1422 				/* append lwsp */
1423 				g_string_append_len (out, lwsp, nlwsp);
1424 
1425 				/* append word token */
1426 				if (!ascii) {
1427 					/* *sigh* I hate broken mailers... */
1428 					decoded = decode_8bit (text, n, default_charset);
1429 					n = strlen (decoded);
1430 					text = decoded;
1431 				} else {
1432 					decoded = NULL;
1433 				}
1434 
1435 				if (!ctext)
1436 					g_string_append_len (out, text, n);
1437 				else
1438 					append_quoted_pair (out, text, n);
1439 
1440 				g_free (decoded);
1441 
1442 				encoded = FALSE;
1443 			}
1444 		} else {
1445 			/* appending trailing lwsp */
1446 			g_string_append_len (out, lwsp, nlwsp);
1447 			break;
1448 		}
1449 	}
1450 
1451 	return g_string_free (out, FALSE);
1452 }
1453 
1454 /**
1455  * camel_header_decode_string:
1456  * @in: input header value string
1457  * @default_charset: default charset to use if improperly encoded
1458  *
1459  * Decodes rfc2047 encoded-word tokens
1460  *
1461  * Returns: a string containing the UTF-8 version of the decoded header
1462  * value
1463  **/
1464 gchar *
camel_header_decode_string(const gchar * in,const gchar * default_charset)1465 camel_header_decode_string (const gchar *in,
1466                             const gchar *default_charset)
1467 {
1468 	gchar *res;
1469 
1470 	if (in == NULL)
1471 		return NULL;
1472 
1473 	res = header_decode_text (in, FALSE, default_charset);
1474 
1475 	if (res)
1476 		make_string_utf8_valid (res, strlen (res));
1477 
1478 	return res;
1479 }
1480 
1481 /**
1482  * camel_header_format_ctext:
1483  * @in: input header value string
1484  * @default_charset: default charset to use if improperly encoded
1485  *
1486  * Decodes a header which contains rfc2047 encoded-word tokens that
1487  * may or may not be within a comment.
1488  *
1489  * Returns: a string containing the UTF-8 version of the decoded header
1490  * value
1491  **/
1492 gchar *
camel_header_format_ctext(const gchar * in,const gchar * default_charset)1493 camel_header_format_ctext (const gchar *in,
1494                            const gchar *default_charset)
1495 {
1496 	if (in == NULL)
1497 		return NULL;
1498 
1499 	return header_decode_text (in, TRUE, default_charset);
1500 }
1501 
1502 /* how long a sequence of pre-encoded words should be less than, to attempt to
1503  * fit into a properly folded word.  Only a guide. */
1504 #define CAMEL_FOLD_PREENCODED (24)
1505 
1506 /* FIXME: needs a way to cache iconv opens for different charsets? */
1507 static void
rfc2047_encode_word(GString * outstring,const gchar * in,gsize len,const gchar * type,gushort safemask)1508 rfc2047_encode_word (GString *outstring,
1509                      const gchar *in,
1510                      gsize len,
1511                      const gchar *type,
1512                      gushort safemask)
1513 {
1514 	GIConv ic = (GIConv) -1;
1515 	gchar *buffer, *out, *ascii;
1516 	gsize inlen, outlen, enclen, bufflen;
1517 	const gchar *inptr, *p;
1518 	gint first = 1;
1519 
1520 	d (printf ("Converting [%d] '%.*s' to %s\n", len, len, in, type));
1521 
1522 	/* convert utf8->encoding */
1523 	bufflen = len * 6 + 16;
1524 	buffer = g_alloca (bufflen);
1525 	inlen = len;
1526 	inptr = in;
1527 
1528 	ascii = g_alloca (bufflen);
1529 
1530 	if (g_ascii_strcasecmp (type, "UTF-8") != 0)
1531 		ic = camel_iconv_open (type, "UTF-8");
1532 
1533 	while (inlen) {
1534 		gssize convlen, proclen;
1535 		gint i;
1536 
1537 		/* break up words into smaller bits, what we really want is encoded + overhead < 75,
1538 		 * but we'll just guess what that means in terms of input chars, and assume its good enough */
1539 
1540 		out = buffer;
1541 		outlen = bufflen;
1542 
1543 		if (ic == (GIConv) -1) {
1544 			/* native encoding case, the easy one (?) */
1545 			/* we work out how much we can convert, and still be in length */
1546 			/* proclen will be the result of input characters that we can convert, to the nearest
1547 			 * (approximated) valid utf8 gchar */
1548 			convlen = 0;
1549 			proclen = -1;
1550 			p = inptr;
1551 			i = 0;
1552 			while (p < (in + len) && convlen < (75 - strlen ("=?utf-8?q?\?="))) {
1553 				guchar c = *p++;
1554 
1555 				if (c >= 0xc0)
1556 					proclen = i;
1557 				i++;
1558 				if (c < 0x80)
1559 					proclen = i;
1560 				if (camel_mime_special_table[c] & safemask)
1561 					convlen += 1;
1562 				else
1563 					convlen += 3;
1564 			}
1565 
1566 			if (proclen >= 0 && proclen < i && convlen < (75 - strlen ("=?utf-8?q?\?=")))
1567 				proclen = i;
1568 
1569 			/* well, we probably have broken utf8, just copy it anyway what the heck */
1570 			if (proclen == -1) {
1571 				w (g_warning ("Appear to have truncated utf8 sequence"));
1572 				proclen = inlen;
1573 			}
1574 
1575 			memcpy (out, inptr, proclen);
1576 			inptr += proclen;
1577 			inlen -= proclen;
1578 			out += proclen;
1579 		} else {
1580 			/* well we could do similar, but we can't (without undue effort), we'll just break it up into
1581 			 * hopefully-small-enough chunks, and leave it at that */
1582 			convlen = MIN (inlen, CAMEL_FOLD_PREENCODED);
1583 			p = inptr;
1584 			if (camel_iconv (ic, &inptr, (gsize *) &convlen, &out, &outlen) == (gsize) -1 && errno != EINVAL) {
1585 				w (g_warning ("Conversion problem: conversion truncated: %s", g_strerror (errno)));
1586 				/* blah, we include it anyway, better than infinite loop ... */
1587 				inptr += convlen;
1588 			} else {
1589 				/* make sure we flush out any shift state */
1590 				camel_iconv (ic, NULL, NULL, &out, &outlen);
1591 			}
1592 			inlen -= (inptr - p);
1593 		}
1594 
1595 		enclen = out - buffer;
1596 
1597 		if (enclen) {
1598 			/* create token */
1599 			out = ascii;
1600 			if (first)
1601 				first = 0;
1602 			else
1603 				*out++ = ' ';
1604 			out += sprintf (out, "=?%s?Q?", type);
1605 			out += quoted_encode ((guchar *) buffer, enclen, (guchar *) out, safemask);
1606 			sprintf (out, "?=");
1607 
1608 			d (printf ("converted part = %s\n", ascii));
1609 
1610 			g_string_append (outstring, ascii);
1611 		}
1612 	}
1613 
1614 	if (ic != (GIConv) -1)
1615 		camel_iconv_close (ic);
1616 }
1617 
1618 static gchar *
header_encode_string_rfc2047(const guchar * in,gboolean include_lwsp)1619 header_encode_string_rfc2047 (const guchar *in,
1620                               gboolean include_lwsp)
1621 {
1622 	const guchar *inptr = in, *start, *word;
1623 	gboolean last_was_encoded = FALSE;
1624 	gboolean last_was_space = FALSE;
1625 	const gchar *charset;
1626 	gint encoding;
1627 	GString *out;
1628 
1629 	g_return_val_if_fail (g_utf8_validate ((const gchar *) in, -1, NULL), NULL);
1630 
1631 	if (in == NULL)
1632 		return NULL;
1633 
1634 	/* do a quick us-ascii check (the common case?) */
1635 	while (*inptr) {
1636 		if (*inptr > 127)
1637 			break;
1638 		inptr++;
1639 	}
1640 	if (*inptr == '\0')
1641 		return g_strdup ((gchar *) in);
1642 
1643 	/* This gets each word out of the input, and checks to see what charset
1644 	 * can be used to encode it. */
1645 	/* TODO: Work out when to merge subsequent words, or across word-parts */
1646 	out = g_string_new ("");
1647 	inptr = in;
1648 	encoding = 0;
1649 	word = NULL;
1650 	start = inptr;
1651 	while (inptr && *inptr) {
1652 		gunichar c;
1653 		const gchar *newinptr;
1654 
1655 		newinptr = g_utf8_next_char (inptr);
1656 		c = g_utf8_get_char ((gchar *) inptr);
1657 		if (newinptr == NULL || !g_unichar_validate (c)) {
1658 			w (g_warning (
1659 				"Invalid UTF-8 sequence encountered "
1660 				"(pos %d, gchar '%c'): %s",
1661 				(inptr - in), inptr[0], in));
1662 			inptr++;
1663 			continue;
1664 		}
1665 
1666 		if (c < 256 && !include_lwsp && camel_mime_is_lwsp (c) && !last_was_space) {
1667 			/* we've reached the end of a 'word' */
1668 			if (word && !(last_was_encoded && encoding)) {
1669 				/* output lwsp between non-encoded words */
1670 				g_string_append_len (out, (const gchar *) start, word - start);
1671 				start = word;
1672 			}
1673 
1674 			switch (encoding) {
1675 			case 0:
1676 				g_string_append_len (out, (const gchar *) start, inptr - start);
1677 				last_was_encoded = FALSE;
1678 				break;
1679 			case 1:
1680 				if (last_was_encoded)
1681 					g_string_append_c (out, ' ');
1682 
1683 				rfc2047_encode_word (out, (const gchar *) start, inptr - start, "ISO-8859-1", CAMEL_MIME_IS_ESAFE);
1684 				last_was_encoded = TRUE;
1685 				break;
1686 			case 2:
1687 				if (last_was_encoded)
1688 					g_string_append_c (out, ' ');
1689 
1690 				if (!(charset = camel_charset_best ((const gchar *) start, inptr - start)))
1691 					charset = "UTF-8";
1692 				rfc2047_encode_word (out, (const gchar *) start, inptr - start, charset, CAMEL_MIME_IS_ESAFE);
1693 				last_was_encoded = TRUE;
1694 				break;
1695 			}
1696 
1697 			last_was_space = TRUE;
1698 			start = inptr;
1699 			word = NULL;
1700 			encoding = 0;
1701 		} else if (c > 127 && c < 256) {
1702 			encoding = MAX (encoding, 1);
1703 			last_was_space = FALSE;
1704 		} else if (c >= 256) {
1705 			encoding = MAX (encoding, 2);
1706 			last_was_space = FALSE;
1707 		} else if (include_lwsp || !camel_mime_is_lwsp (c)) {
1708 			last_was_space = FALSE;
1709 		}
1710 
1711 		if (!(c < 256 && !include_lwsp && camel_mime_is_lwsp (c)) && !word)
1712 			word = inptr;
1713 
1714 		inptr = (const guchar *) newinptr;
1715 	}
1716 
1717 	if (inptr - start) {
1718 		if (word && !(last_was_encoded && encoding)) {
1719 			g_string_append_len (out, (const gchar *) start, word - start);
1720 			start = word;
1721 		}
1722 
1723 		switch (encoding) {
1724 		case 0:
1725 			g_string_append_len (out, (const gchar *) start, inptr - start);
1726 			break;
1727 		case 1:
1728 			if (last_was_encoded)
1729 				g_string_append_c (out, ' ');
1730 
1731 			rfc2047_encode_word (out, (const gchar *) start, inptr - start, "ISO-8859-1", CAMEL_MIME_IS_ESAFE);
1732 			break;
1733 		case 2:
1734 			if (last_was_encoded)
1735 				g_string_append_c (out, ' ');
1736 
1737 			if (!(charset = camel_charset_best ((const gchar *) start, inptr - start)))
1738 				charset = "UTF-8";
1739 			rfc2047_encode_word (out, (const gchar *) start, inptr - start, charset, CAMEL_MIME_IS_ESAFE);
1740 			break;
1741 		}
1742 	}
1743 
1744 	return g_string_free (out, FALSE);
1745 }
1746 
1747 /* TODO: Should this worry about quotes?? */
1748 /**
1749  * camel_header_encode_string:
1750  * @in: input string
1751  *
1752  * Encodes a 'text' header according to the rules of rfc2047.
1753  *
1754  * Returns: the rfc2047 encoded header
1755  **/
1756 gchar *
camel_header_encode_string(const guchar * in)1757 camel_header_encode_string (const guchar *in)
1758 {
1759 	return header_encode_string_rfc2047 (in, FALSE);
1760 }
1761 
1762 /* apply quoted-string rules to a string */
1763 static void
quote_word(GString * out,gboolean do_quotes,const gchar * start,gsize len)1764 quote_word (GString *out,
1765             gboolean do_quotes,
1766             const gchar *start,
1767             gsize len)
1768 {
1769 	gint i, c;
1770 
1771 	/* TODO: What about folding on long lines? */
1772 	if (do_quotes)
1773 		g_string_append_c (out, '"');
1774 	for (i = 0; i < len; i++) {
1775 		c = *start++;
1776 		if (c == '\"' || c == '\\' || c == '\r')
1777 			g_string_append_c (out, '\\');
1778 		g_string_append_c (out, c);
1779 	}
1780 	if (do_quotes)
1781 		g_string_append_c (out, '"');
1782 }
1783 
1784 /* incrementing possibility for the word type */
1785 enum _phrase_word_t {
1786 	WORD_ATOM,
1787 	WORD_QSTRING,
1788 	WORD_2047
1789 };
1790 
1791 struct _phrase_word {
1792 	const guchar *start, *end;
1793 	enum _phrase_word_t type;
1794 	gint encoding;
1795 };
1796 
1797 static gboolean
word_types_compatable(enum _phrase_word_t type1,enum _phrase_word_t type2)1798 word_types_compatable (enum _phrase_word_t type1,
1799                        enum _phrase_word_t type2)
1800 {
1801 	switch (type1) {
1802 	case WORD_ATOM:
1803 		return type2 == WORD_QSTRING;
1804 	case WORD_QSTRING:
1805 		return type2 != WORD_2047;
1806 	case WORD_2047:
1807 		return type2 == WORD_2047;
1808 	default:
1809 		return FALSE;
1810 	}
1811 }
1812 
1813 /* split the input into words with info about each word
1814  * merge common word types clean up */
1815 static GList *
header_encode_phrase_get_words(const guchar * in)1816 header_encode_phrase_get_words (const guchar *in)
1817 {
1818 	const guchar *inptr = in, *start, *last;
1819 	struct _phrase_word *word;
1820 	enum _phrase_word_t type;
1821 	gint encoding, count = 0;
1822 	GList *words = NULL;
1823 
1824 	/* break the input into words */
1825 	type = WORD_ATOM;
1826 	last = inptr;
1827 	start = inptr;
1828 	encoding = 0;
1829 	while (inptr && *inptr) {
1830 		gunichar c;
1831 		const gchar *newinptr;
1832 
1833 		newinptr = g_utf8_next_char (inptr);
1834 		c = g_utf8_get_char ((gchar *) inptr);
1835 
1836 		if (!g_unichar_validate (c)) {
1837 			w (g_warning (
1838 				"Invalid UTF-8 sequence encountered "
1839 				"(pos %d, gchar '%c'): %s",
1840 				(inptr - in), inptr[0], in));
1841 			inptr++;
1842 			continue;
1843 		}
1844 
1845 		inptr = (const guchar *) newinptr;
1846 		if (g_unichar_isspace (c)) {
1847 			if (count > 0) {
1848 				word = g_new0 (struct _phrase_word, 1);
1849 				word->start = start;
1850 				word->end = last;
1851 				word->type = type;
1852 				word->encoding = encoding;
1853 				words = g_list_append (words, word);
1854 				count = 0;
1855 			}
1856 
1857 			start = inptr;
1858 			type = WORD_ATOM;
1859 			encoding = 0;
1860 		} else {
1861 			count++;
1862 			if (c < 128) {
1863 				if (!camel_mime_is_atom (c))
1864 					type = MAX (type, WORD_QSTRING);
1865 			} else if (c > 127 && c < 256) {
1866 				type = WORD_2047;
1867 				encoding = MAX (encoding, 1);
1868 			} else if (c >= 256) {
1869 				type = WORD_2047;
1870 				encoding = MAX (encoding, 2);
1871 			}
1872 		}
1873 
1874 		last = inptr;
1875 	}
1876 
1877 	if (count > 0) {
1878 		word = g_new0 (struct _phrase_word, 1);
1879 		word->start = start;
1880 		word->end = last;
1881 		word->type = type;
1882 		word->encoding = encoding;
1883 		words = g_list_append (words, word);
1884 	}
1885 
1886 	return words;
1887 }
1888 
1889 #define MERGED_WORD_LT_FOLDLEN(wordlen, type) ((type) == WORD_2047 ? (wordlen) < CAMEL_FOLD_PREENCODED : (wordlen) < (CAMEL_FOLD_SIZE - 8))
1890 
1891 static gboolean
header_encode_phrase_merge_words(GList ** wordsp)1892 header_encode_phrase_merge_words (GList **wordsp)
1893 {
1894 	GList *wordl, *nextl, *words = *wordsp;
1895 	struct _phrase_word *word, *next;
1896 	gboolean merged = FALSE;
1897 
1898 	/* scan the list, checking for words of similar types that can be merged */
1899 	wordl = words;
1900 	while (wordl) {
1901 		word = wordl->data;
1902 		nextl = g_list_next (wordl);
1903 
1904 		while (nextl) {
1905 			next = nextl->data;
1906 			/* merge nodes of the same type AND we are not creating too long a string */
1907 			if (word_types_compatable (word->type, next->type)) {
1908 				if (MERGED_WORD_LT_FOLDLEN (next->end - word->start, MAX (word->type, next->type))) {
1909 					/* the resulting word type is the MAX of the 2 types */
1910 					word->type = MAX (word->type, next->type);
1911 					word->encoding = MAX (word->encoding, next->encoding);
1912 					word->end = next->end;
1913 					words = g_list_remove_link (words, nextl);
1914 					g_list_free_1 (nextl);
1915 					g_free (next);
1916 
1917 					nextl = g_list_next (wordl);
1918 
1919 					merged = TRUE;
1920 				} else {
1921 					/* if it is going to be too long, make sure we include the
1922 					 * separating whitespace */
1923 					word->end = next->start;
1924 					break;
1925 				}
1926 			} else {
1927 				break;
1928 			}
1929 		}
1930 
1931 		wordl = g_list_next (wordl);
1932 	}
1933 
1934 	*wordsp = words;
1935 
1936 	return merged;
1937 }
1938 
1939 /* encodes a phrase sequence (different quoting/encoding rules to strings) */
1940 /**
1941  * camel_header_encode_phrase:
1942  * @in: header to encode
1943  *
1944  * Encodes a 'phrase' header according to the rules in rfc2047.
1945  *
1946  * Returns: the encoded 'phrase'
1947  **/
1948 gchar *
camel_header_encode_phrase(const guchar * in)1949 camel_header_encode_phrase (const guchar *in)
1950 {
1951 	struct _phrase_word *word = NULL, *last_word = NULL;
1952 	GList *words, *wordl;
1953 	const gchar *charset;
1954 	GString *out;
1955 
1956 	if (in == NULL)
1957 		return NULL;
1958 
1959 	words = header_encode_phrase_get_words (in);
1960 	if (!words)
1961 		return NULL;
1962 
1963 	while (header_encode_phrase_merge_words (&words))
1964 		;
1965 
1966 	out = g_string_new ("");
1967 
1968 	/* output words now with spaces between them */
1969 	wordl = words;
1970 	while (wordl) {
1971 		const gchar *start;
1972 		gsize len;
1973 
1974 		word = wordl->data;
1975 
1976 		/* append correct number of spaces between words */
1977 		if (last_word && !(last_word->type == WORD_2047 && word->type == WORD_2047)) {
1978 			/* one or both of the words are not encoded so we write the spaces out untouched */
1979 			len = word->start - last_word->end;
1980 			g_string_append_len (out, (gchar *) last_word->end, len);
1981 		}
1982 
1983 		switch (word->type) {
1984 		case WORD_ATOM:
1985 			g_string_append_len (out, (gchar *) word->start, word->end - word->start);
1986 			break;
1987 		case WORD_QSTRING:
1988 			quote_word (out, TRUE, (gchar *) word->start, word->end - word->start);
1989 			break;
1990 		case WORD_2047:
1991 			if (last_word && last_word->type == WORD_2047) {
1992 				/* include the whitespace chars between these 2 words in the
1993 				 * resulting rfc2047 encoded word. */
1994 				len = word->end - last_word->end;
1995 				start = (const gchar *) last_word->end;
1996 
1997 				/* encoded words need to be separated by linear whitespace */
1998 				g_string_append_c (out, ' ');
1999 			} else {
2000 				len = word->end - word->start;
2001 				start = (const gchar *) word->start;
2002 			}
2003 
2004 			if (word->encoding == 1) {
2005 				rfc2047_encode_word (out, start, len, "ISO-8859-1", CAMEL_MIME_IS_PSAFE);
2006 			} else {
2007 				if (!(charset = camel_charset_best (start, len)))
2008 					charset = "UTF-8";
2009 				rfc2047_encode_word (out, start, len, charset, CAMEL_MIME_IS_PSAFE);
2010 			}
2011 			break;
2012 		}
2013 
2014 		g_free (last_word);
2015 		wordl = g_list_next (wordl);
2016 
2017 		last_word = word;
2018 	}
2019 
2020 	/* and we no longer need the list */
2021 	g_free (word);
2022 	g_list_free (words);
2023 
2024 	return g_string_free (out, FALSE);
2025 }
2026 
2027 /* these are all internal parser functions */
2028 
2029 static gchar *
decode_token(const gchar ** in)2030 decode_token (const gchar **in)
2031 {
2032 	const gchar *inptr = *in;
2033 	const gchar *start;
2034 
2035 	header_decode_lwsp (&inptr);
2036 	start = inptr;
2037 	while (camel_mime_is_ttoken (*inptr))
2038 		inptr++;
2039 	if (inptr > start) {
2040 		*in = inptr;
2041 		return g_strndup (start, inptr - start);
2042 	} else {
2043 		return NULL;
2044 	}
2045 }
2046 
2047 /**
2048  * camel_header_token_decode:
2049  * @in: input string
2050  *
2051  * Gets the first token in the string according to the rules of
2052  * rfc0822.
2053  *
2054  * Returns: a new string containing the first token in @in
2055  **/
2056 gchar *
camel_header_token_decode(const gchar * in)2057 camel_header_token_decode (const gchar *in)
2058 {
2059 	if (in == NULL)
2060 		return NULL;
2061 
2062 	return decode_token (&in);
2063 }
2064 
2065 /*
2066  * <"> * ( <any gchar except <"> \, cr  /  \ <any char> ) <">
2067 */
2068 static gchar *
header_decode_quoted_string(const gchar ** in)2069 header_decode_quoted_string (const gchar **in)
2070 {
2071 	const gchar *inptr = *in;
2072 	gchar *out = NULL, *outptr;
2073 	gsize outlen;
2074 	gint c;
2075 
2076 	header_decode_lwsp (&inptr);
2077 	if (*inptr == '"') {
2078 		const gchar *intmp;
2079 		gint skip = 0;
2080 
2081 		/* first, calc length */
2082 		inptr++;
2083 		intmp = inptr;
2084 		while ( (c = *intmp++) && c!= '"') {
2085 			if (c == '\\' && *intmp) {
2086 				intmp++;
2087 				skip++;
2088 			} else if (c == '\n') {
2089 				skip++;
2090 			}
2091 		}
2092 		outlen = intmp - inptr - skip;
2093 		out = outptr = g_malloc (outlen + 1);
2094 		while ( (c = *inptr) && c!= '"') {
2095 			inptr++;
2096 			if (c == '\\' && *inptr) {
2097 				c = *inptr++;
2098 			} else if (c == '\n') {
2099 				continue;
2100 			}
2101 			*outptr++ = c;
2102 		}
2103 		if (c)
2104 			inptr++;
2105 		*outptr = '\0';
2106 	}
2107 	*in = inptr;
2108 	return out;
2109 }
2110 
2111 static gchar *
header_decode_atom(const gchar ** in)2112 header_decode_atom (const gchar **in)
2113 {
2114 	const gchar *inptr = *in, *start;
2115 
2116 	header_decode_lwsp (&inptr);
2117 	start = inptr;
2118 	while (camel_mime_is_atom (*inptr))
2119 		inptr++;
2120 	*in = inptr;
2121 	if (inptr > start)
2122 		return g_strndup (start, inptr - start);
2123 	else
2124 		return NULL;
2125 }
2126 
2127 static gboolean
extract_rfc2047_encoded_word(const gchar ** in,gchar ** word)2128 extract_rfc2047_encoded_word (const gchar **in,
2129                               gchar **word)
2130 {
2131 	const gchar *inptr = *in, *start;
2132 
2133 	header_decode_lwsp (&inptr);
2134 	start = inptr;
2135 
2136 	if (!strncmp (inptr, "=?", 2)) {
2137 		inptr += 2;
2138 
2139 		/* skip past the charset (if one is even declared, sigh) */
2140 		while (*inptr && *inptr != '?') {
2141 			inptr++;
2142 		}
2143 
2144 		/* sanity check encoding type */
2145 		if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || !inptr[1] || inptr[2] != '?')
2146 			return FALSE;
2147 
2148 		inptr += 3;
2149 
2150 		/* find the end of the rfc2047 encoded word token */
2151 		while (*inptr && strncmp (inptr, "?=", 2) != 0) {
2152 			inptr++;
2153 		}
2154 
2155 		if (!strncmp (inptr, "?=", 2)) {
2156 			inptr += 2;
2157 
2158 			*in = inptr;
2159 			*word = g_strndup (start, inptr - start);
2160 
2161 			return TRUE;
2162 		}
2163 	}
2164 
2165 	return FALSE;
2166 }
2167 
2168 static gchar *
header_decode_word(const gchar ** in)2169 header_decode_word (const gchar **in)
2170 {
2171 	const gchar *inptr = *in;
2172 	gchar *word = NULL;
2173 
2174 	header_decode_lwsp (&inptr);
2175 	*in = inptr;
2176 
2177 	if (*inptr == '"') {
2178 		return header_decode_quoted_string (in);
2179 	} else if (*inptr == '=' && inptr[1] == '?' && extract_rfc2047_encoded_word (in, &word) && word) {
2180 		return word;
2181 	} else {
2182 		return header_decode_atom (in);
2183 	}
2184 }
2185 
2186 static gchar *
header_decode_value(const gchar ** in)2187 header_decode_value (const gchar **in)
2188 {
2189 	const gchar *inptr = *in;
2190 
2191 	header_decode_lwsp (&inptr);
2192 	if (*inptr == '"') {
2193 		d (printf ("decoding quoted string\n"));
2194 		return header_decode_quoted_string (in);
2195 	} else if (camel_mime_is_ttoken (*inptr)) {
2196 		d (printf ("decoding token\n"));
2197 		/* this may not have the right specials for all params? */
2198 		return decode_token (in);
2199 	}
2200 	return NULL;
2201 }
2202 
2203 /* should this return -1 for no int? */
2204 
2205 /**
2206  * camel_header_decode_int:
2207  * @in: pointer to input string
2208  *
2209  * Extracts an integer token from @in and updates the pointer to point
2210  * to after the end of the integer token (sort of like strtol).
2211  *
2212  * Returns: the gint value
2213  **/
2214 gint
camel_header_decode_int(const gchar ** in)2215 camel_header_decode_int (const gchar **in)
2216 {
2217 	const gchar *inptr = *in;
2218 	gint c, v = 0;
2219 
2220 	header_decode_lwsp (&inptr);
2221 	while ( (c=*inptr++ & 0xff)
2222 		&& isdigit (c) ) {
2223 		v = v * 10 + (c - '0');
2224 	}
2225 	*in = inptr-1;
2226 	return v;
2227 }
2228 
2229 #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
2230 
2231 static gchar *
hex_decode(const gchar * in,gsize len)2232 hex_decode (const gchar *in,
2233             gsize len)
2234 {
2235 	const guchar *inend = (const guchar *) (in + len);
2236 	guchar *inptr, *outptr;
2237 	gchar *outbuf;
2238 
2239 	outbuf = (gchar *) g_malloc (len + 1);
2240 	outptr = (guchar *) outbuf;
2241 
2242 	inptr = (guchar *) in;
2243 	while (inptr < inend) {
2244 		if (*inptr == '%') {
2245 			if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
2246 				*outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
2247 				inptr += 3;
2248 			} else
2249 				*outptr++ = *inptr++;
2250 		} else
2251 			*outptr++ = *inptr++;
2252 	}
2253 
2254 	*outptr = '\0';
2255 
2256 	return outbuf;
2257 }
2258 
2259 /* Tries to convert @in @from charset @to charset.  Any failure, we get no data out rather than partial conversion */
2260 static gchar *
header_convert(const gchar * to,const gchar * from,const gchar * in,gsize inlen)2261 header_convert (const gchar *to,
2262                 const gchar *from,
2263                 const gchar *in,
2264                 gsize inlen)
2265 {
2266 	GIConv ic;
2267 	gsize outlen, ret;
2268 	gchar *outbuf, *outbase, *result = NULL;
2269 
2270 	ic = camel_iconv_open (to, from);
2271 	if (ic == (GIConv) -1)
2272 		return NULL;
2273 
2274 	outlen = inlen * 6 + 16;
2275 	outbuf = outbase = g_malloc (outlen);
2276 
2277 	ret = camel_iconv (ic, &in, &inlen, &outbuf, &outlen);
2278 	if (ret != (gsize) -1) {
2279 		camel_iconv (ic, NULL, NULL, &outbuf, &outlen);
2280 		*outbuf = '\0';
2281 		result = g_strdup (outbase);
2282 	}
2283 	camel_iconv_close (ic);
2284 	g_free (outbase);
2285 
2286 	return result;
2287 }
2288 
2289 /* an rfc2184 encoded string looks something like:
2290  * us-ascii'en'This%20is%20even%20more%20
2291  */
2292 
2293 static gchar *
rfc2184_decode(const gchar * in,gsize len)2294 rfc2184_decode (const gchar *in,
2295                 gsize len)
2296 {
2297 	const gchar *inptr = in;
2298 	const gchar *inend = in + len;
2299 	const gchar *charset;
2300 	gchar *decoded, *decword, *encoding;
2301 
2302 	inptr = memchr (inptr, '\'', len);
2303 	if (!inptr)
2304 		return NULL;
2305 
2306 	encoding = g_alloca (inptr - in + 1);
2307 	memcpy (encoding, in, inptr - in);
2308 	encoding[inptr - in] = 0;
2309 	charset = camel_iconv_charset_name (encoding);
2310 
2311 	inptr = memchr (inptr + 1, '\'', inend - inptr - 1);
2312 	if (!inptr)
2313 		return NULL;
2314 	inptr++;
2315 	if (inptr >= inend)
2316 		return NULL;
2317 
2318 	decword = hex_decode (inptr, inend - inptr);
2319 	decoded = header_convert ("UTF-8", charset, decword, strlen (decword));
2320 	g_free (decword);
2321 
2322 	return decoded;
2323 }
2324 
2325 /**
2326  * camel_header_param:
2327  * @params: parameters
2328  * @name: name of param to find
2329  *
2330  * Searches @params for a param named @name and gets the value.
2331  *
2332  * Returns: (transfer none): the value of the @name param
2333  **/
2334 gchar *
camel_header_param(struct _camel_header_param * params,const gchar * name)2335 camel_header_param (struct _camel_header_param *params,
2336                     const gchar *name)
2337 {
2338 	while (params && params->name &&
2339 	       g_ascii_strcasecmp (params->name, name) != 0)
2340 		params = params->next;
2341 	if (params)
2342 		return params->value;
2343 
2344 	return NULL;
2345 }
2346 
2347 /**
2348  * camel_header_set_param:
2349  * @paramsp: poinetr to a list of params
2350  * @name: name of param to set
2351  * @value: value to set
2352  *
2353  * Set a parameter in the list.
2354  *
2355  * Returns: (transfer none): the set param
2356  **/
2357 struct _camel_header_param *
camel_header_set_param(struct _camel_header_param ** l,const gchar * name,const gchar * value)2358 camel_header_set_param (struct _camel_header_param **l,
2359                         const gchar *name,
2360                         const gchar *value)
2361 {
2362 	struct _camel_header_param *p = (struct _camel_header_param *) l, *pn;
2363 
2364 	if (name == NULL)
2365 		return NULL;
2366 
2367 	while (p->next) {
2368 		pn = p->next;
2369 		if (!g_ascii_strcasecmp (pn->name, name)) {
2370 			g_free (pn->value);
2371 			if (value) {
2372 				pn->value = g_strdup (value);
2373 				return pn;
2374 			} else {
2375 				p->next = pn->next;
2376 				g_free (pn->name);
2377 				g_free (pn);
2378 				return NULL;
2379 			}
2380 		}
2381 		p = pn;
2382 	}
2383 
2384 	if (value == NULL)
2385 		return NULL;
2386 
2387 	pn = g_malloc (sizeof (*pn));
2388 	pn->next = NULL;
2389 	pn->name = g_strdup (name);
2390 	pn->value = g_strdup (value);
2391 	p->next = pn;
2392 
2393 	return pn;
2394 }
2395 
2396 /**
2397  * camel_content_type_param:
2398  * @content_type: a #CamelContentType
2399  * @name: name of param to find
2400  *
2401  * Searches the params on s #CamelContentType for a param named @name
2402  * and gets the value.
2403  *
2404  * Returns: the value of the @name param
2405  **/
2406 const gchar *
camel_content_type_param(CamelContentType * t,const gchar * name)2407 camel_content_type_param (CamelContentType *t,
2408                           const gchar *name)
2409 {
2410 	if (t == NULL)
2411 		return NULL;
2412 	return camel_header_param (t->params, name);
2413 }
2414 
2415 /**
2416  * camel_content_type_set_param:
2417  * @content_type: a #CamelContentType
2418  * @name: name of param to set
2419  * @value: value of param to set
2420  *
2421  * Set a parameter on @content_type.
2422  **/
2423 void
camel_content_type_set_param(CamelContentType * t,const gchar * name,const gchar * value)2424 camel_content_type_set_param (CamelContentType *t,
2425                               const gchar *name,
2426                               const gchar *value)
2427 {
2428 	g_return_if_fail (t != NULL);
2429 
2430 	camel_header_set_param (&t->params, name, value);
2431 }
2432 
2433 /**
2434  * camel_content_type_is:
2435  * @content_type: A content type specifier, or %NULL.
2436  * @type: A type to check against.
2437  * @subtype: A subtype to check against, or "*" to match any subtype.
2438  *
2439  * The subtype of "*" will match any subtype.  If @ct is %NULL, then
2440  * it will match the type "text/plain".
2441  *
2442  * Returns: %TRUE if the content type @ct is of type @type/@subtype or
2443  * %FALSE otherwise
2444  **/
2445 gboolean
camel_content_type_is(const CamelContentType * ct,const gchar * type,const gchar * subtype)2446 camel_content_type_is (const CamelContentType *ct,
2447                        const gchar *type,
2448                        const gchar *subtype)
2449 {
2450 	/* no type == text/plain or text/"*" */
2451 	if (ct == NULL || (ct->type == NULL && ct->subtype == NULL)) {
2452 		return (!g_ascii_strcasecmp (type, "text")
2453 			&& (!g_ascii_strcasecmp (subtype, "plain")
2454 			|| !strcmp (subtype, "*")));
2455 	}
2456 
2457 	return (ct->type != NULL
2458 		&& (!g_ascii_strcasecmp (ct->type, type)
2459 		&& ((ct->subtype != NULL
2460 		&& !g_ascii_strcasecmp (ct->subtype, subtype))
2461 			|| !strcmp ("*", subtype))));
2462 }
2463 
2464 /**
2465  * camel_header_param_list_free:
2466  * @params: a list of params
2467  *
2468  * Free the list of params.
2469  **/
2470 void
camel_header_param_list_free(struct _camel_header_param * p)2471 camel_header_param_list_free (struct _camel_header_param *p)
2472 {
2473 	struct _camel_header_param *n;
2474 
2475 	while (p) {
2476 		n = p->next;
2477 		g_free (p->name);
2478 		g_free (p->value);
2479 		g_free (p);
2480 		p = n;
2481 	}
2482 }
2483 
2484 /**
2485  * camel_content_type_new:
2486  * @type: the major type of the new content-type
2487  * @subtype: the subtype
2488  *
2489  * Create a new #CamelContentType.
2490  *
2491  * Returns: the new #CamelContentType
2492  **/
2493 CamelContentType *
camel_content_type_new(const gchar * type,const gchar * subtype)2494 camel_content_type_new (const gchar *type,
2495                         const gchar *subtype)
2496 {
2497 	CamelContentType *t;
2498 
2499 	t = g_slice_new (CamelContentType);
2500 	t->type = g_strdup (type);
2501 	t->subtype = g_strdup (subtype);
2502 	t->params = NULL;
2503 	t->refcount = 1;
2504 
2505 	return t;
2506 }
2507 
2508 /**
2509  * camel_content_type_ref:
2510  * @content_type: a #CamelContentType
2511  *
2512  * Refs the content type.
2513  **/
2514 CamelContentType *
camel_content_type_ref(CamelContentType * ct)2515 camel_content_type_ref (CamelContentType *ct)
2516 {
2517 	if (ct)
2518 		ct->refcount++;
2519 
2520 	return ct;
2521 }
2522 
2523 /**
2524  * camel_content_type_unref:
2525  * @content_type: a #CamelContentType
2526  *
2527  * Unrefs, and potentially frees, the content type.
2528  **/
2529 void
camel_content_type_unref(CamelContentType * ct)2530 camel_content_type_unref (CamelContentType *ct)
2531 {
2532 	if (ct) {
2533 		if (ct->refcount <= 1) {
2534 			camel_header_param_list_free (ct->params);
2535 			g_free (ct->type);
2536 			g_free (ct->subtype);
2537 			g_slice_free (CamelContentType, ct);
2538 			ct = NULL;
2539 		} else {
2540 			ct->refcount--;
2541 		}
2542 	}
2543 }
2544 
2545 /* for decoding email addresses, canonically */
2546 static gchar *
header_decode_domain(const gchar ** in)2547 header_decode_domain (const gchar **in)
2548 {
2549 	const gchar *inptr = *in;
2550 	gint go = TRUE;
2551 	GString *domain = g_string_new ("");
2552 
2553 	/* domain ref | domain literal */
2554 	header_decode_lwsp (&inptr);
2555 	while (go) {
2556 		if (*inptr == '[') { /* domain literal */
2557 			g_string_append_c (domain, '[');
2558 			inptr++;
2559 			header_decode_lwsp (&inptr);
2560 			while (*inptr && camel_mime_is_dtext (*inptr)) {
2561 				g_string_append_c (domain, *inptr);
2562 				inptr++;
2563 			}
2564 			if (*inptr == ']') {
2565 				g_string_append_c (domain, ']');
2566 				inptr++;
2567 			} else {
2568 				w (g_warning ("closing ']' not found in domain: %s", *in));
2569 			}
2570 		} else {
2571 			gchar *a = header_decode_atom (&inptr);
2572 			if (a) {
2573 				g_string_append (domain, a);
2574 				g_free (a);
2575 			} else {
2576 				w (g_warning ("missing atom from domain-ref"));
2577 				break;
2578 			}
2579 		}
2580 		header_decode_lwsp (&inptr);
2581 		if (*inptr == '.') { /* next sub-domain? */
2582 			g_string_append_c (domain, '.');
2583 			inptr++;
2584 			header_decode_lwsp (&inptr);
2585 		} else
2586 			go = FALSE;
2587 	}
2588 
2589 	*in = inptr;
2590 
2591 	return g_string_free (domain, FALSE);
2592 }
2593 
2594 static gchar *
header_decode_addrspec(const gchar ** in)2595 header_decode_addrspec (const gchar **in)
2596 {
2597 	const gchar *inptr = *in;
2598 	gchar *word;
2599 	GString *addr = g_string_new ("");
2600 
2601 	header_decode_lwsp (&inptr);
2602 
2603 	/* addr-spec */
2604 	word = header_decode_word (&inptr);
2605 	if (word) {
2606 		g_string_append (addr, word);
2607 		header_decode_lwsp (&inptr);
2608 		g_free (word);
2609 		while (*inptr == '.' && word) {
2610 			inptr++;
2611 			g_string_append_c (addr, '.');
2612 			word = header_decode_word (&inptr);
2613 			if (word) {
2614 				g_string_append (addr, word);
2615 				header_decode_lwsp (&inptr);
2616 				g_free (word);
2617 			} else {
2618 				w (g_warning ("Invalid address spec: %s", *in));
2619 			}
2620 		}
2621 		if (*inptr == '@') {
2622 			inptr++;
2623 			g_string_append_c (addr, '@');
2624 			word = header_decode_domain (&inptr);
2625 			if (word) {
2626 				g_string_append (addr, word);
2627 				g_free (word);
2628 			} else {
2629 				w (g_warning ("Invalid address, missing domain: %s", *in));
2630 			}
2631 		} else {
2632 			w (g_warning ("Invalid addr-spec, missing @: %s", *in));
2633 		}
2634 	} else {
2635 		w (g_warning ("invalid addr-spec, no local part"));
2636 		g_string_free (addr, TRUE);
2637 
2638 		return NULL;
2639 	}
2640 
2641 	/* FIXME: return null on error? */
2642 
2643 	*in = inptr;
2644 	return g_string_free (addr, FALSE);
2645 }
2646 
2647 /*
2648  * address:
2649  * word *('.' word) @ domain |
2650  * *(word) '<' [ *('@' domain ) ':' ] word *( '.' word) @ domain |
2651  *
2652  * 1 * word ':'[ word ... etc (mailbox, as above) ] ';'
2653  */
2654 
2655 /* mailbox:
2656  * word *( '.' word ) '@' domain
2657  * *(word) '<' [ *('@' domain ) ':' ] word *( '.' word) @ domain
2658  * */
2659 
2660 static CamelHeaderAddress *
header_decode_mailbox(const gchar ** in,const gchar * charset)2661 header_decode_mailbox (const gchar **in,
2662                        const gchar *charset)
2663 {
2664 	const gchar *inptr = *in;
2665 	gchar *pre;
2666 	gint closeme = FALSE;
2667 	GString *addr;
2668 	GString *name = NULL;
2669 	CamelHeaderAddress *address = NULL;
2670 	const gchar *comment = NULL;
2671 
2672 	addr = g_string_new ("");
2673 
2674  start:
2675 	/* for each address */
2676 	pre = header_decode_word (&inptr);
2677 	header_decode_lwsp (&inptr);
2678 	if (!(*inptr == '.' || *inptr == '@' || *inptr == ',' || *inptr == '\0')) {
2679 		/* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */
2680 		if (!name)
2681 			name = g_string_new ("");
2682 		while (pre) {
2683 			gchar *text, *last;
2684 
2685 			/* perform internationalised decoding, and append */
2686 			text = header_decode_text (pre, FALSE, charset);
2687 			g_string_append (name, text);
2688 			last = pre;
2689 			g_free (text);
2690 
2691 			pre = header_decode_word (&inptr);
2692 			if (pre) {
2693 				gsize l = strlen (last);
2694 				gsize p = strlen (pre);
2695 
2696 				/* dont append ' ' between sucsessive encoded words */
2697 				if ((l > 6 && last[l - 2] == '?' && last[l - 1] == '=')
2698 				    && (p > 6 && pre[0] == '=' && pre[1] == '?')) {
2699 					/* dont append ' ' */
2700 				} else {
2701 					g_string_append_c (name, ' ');
2702 				}
2703 			} else {
2704 				/* Fix for stupidly-broken-mailers that like to put '.''s in names unquoted */
2705 				/* see bug #8147 */
2706 				while (!pre && *inptr && *inptr != '<') {
2707 					w (g_warning ("Working around stupid mailer bug #5: unescaped characters in names"));
2708 					g_string_append_c (name, *inptr++);
2709 					pre = header_decode_word (&inptr);
2710 				}
2711 			}
2712 			g_free (last);
2713 		}
2714 		header_decode_lwsp (&inptr);
2715 		if (*inptr == '<') {
2716 			closeme = TRUE;
2717 		try_address_again:
2718 			inptr++;
2719 			header_decode_lwsp (&inptr);
2720 			if (*inptr == '@') {
2721 				while (*inptr == '@') {
2722 					inptr++;
2723 					header_decode_domain (&inptr);
2724 					header_decode_lwsp (&inptr);
2725 					if (*inptr == ',') {
2726 						inptr++;
2727 						header_decode_lwsp (&inptr);
2728 					}
2729 				}
2730 				if (*inptr == ':') {
2731 					inptr++;
2732 				} else {
2733 					w (g_warning ("broken route-address, missing ':': %s", *in));
2734 				}
2735 			}
2736 			pre = header_decode_word (&inptr);
2737 			/*header_decode_lwsp(&inptr);*/
2738 		} else {
2739 			w (g_warning ("broken address? %s", *in));
2740 		}
2741 	}
2742 
2743 	if (pre) {
2744 		g_string_append (addr, pre);
2745 	} else {
2746 		w (g_warning ("No local-part for email address: %s", *in));
2747 	}
2748 
2749 	/* should be at word '.' localpart */
2750 	while (*inptr == '.' && pre) {
2751 		inptr++;
2752 		g_free (pre);
2753 		pre = header_decode_word (&inptr);
2754 		g_string_append_c (addr, '.');
2755 		if (pre)
2756 			g_string_append (addr, pre);
2757 		comment = inptr;
2758 		header_decode_lwsp (&inptr);
2759 	}
2760 	g_free (pre);
2761 
2762 	/* now at '@' domain part */
2763 	if (*inptr == '@') {
2764 		gchar *dom;
2765 
2766 		inptr++;
2767 		g_string_append_c (addr, '@');
2768 		comment = inptr;
2769 		dom = header_decode_domain (&inptr);
2770 		g_string_append (addr, dom);
2771 		g_free (dom);
2772 	} else if (*inptr != '>' || !closeme) {
2773 		/* If we get a <, the address was probably a name part, lets try again shall we? */
2774 		/* Another fix for seriously-broken-mailers */
2775 		if (*inptr && *inptr != ',') {
2776 			gchar *text;
2777 			const gchar *name_part;
2778 			gboolean in_quote;
2779 
2780 			w (g_warning ("We didn't get an '@' where we expected in '%s', trying again", *in));
2781 			w (g_warning ("Name is '%s', Addr is '%s' we're at '%s'\n", name ? name->str:"<UNSET>", addr->str, inptr));
2782 
2783 			/* need to keep *inptr, as try_address_again will drop the current character */
2784 			if (*inptr == '<')
2785 				closeme = TRUE;
2786 			else
2787 				g_string_append_c (addr, *inptr);
2788 
2789 			name_part = *in;
2790 			in_quote = FALSE;
2791 			while (*name_part && *name_part != ',') {
2792 				if (*name_part == '\"')
2793 					in_quote = !in_quote;
2794 				else if (!in_quote && *name_part == '<')
2795 					break;
2796 				name_part++;
2797 			}
2798 
2799 			if (*name_part == '<' && ((!strchr (name_part, ',') && strchr (name_part, '>')) || (strchr (name_part, ',') > strchr (name_part, '>')))) {
2800 				/* it's of a form "display-name <addr-spec>" */
2801 				if (name)
2802 					g_string_free (name, TRUE);
2803 				name = NULL;
2804 				g_string_free (addr, TRUE);
2805 
2806 				if (name_part == *in)
2807 					addr = g_string_new ("");
2808 				else
2809 					addr = g_string_new_len (*in, name_part - *in - (camel_mime_is_lwsp (name_part[-1]) ? 1 : 0));
2810 			}
2811 
2812 			/* check for address is encoded word ... */
2813 			text = header_decode_text (addr->str, FALSE, charset);
2814 			if (name == NULL) {
2815 				name = addr;
2816 				addr = g_string_new ("");
2817 				if (text) {
2818 					g_string_truncate (name, 0);
2819 					g_string_append (name, text);
2820 				}
2821 			}/* else {
2822 				g_string_append (name, text ? text : addr->str);
2823 				g_string_truncate (addr, 0);
2824 			}*/
2825 			g_free (text);
2826 
2827 			/* or maybe that we've added up a bunch of broken bits to make an encoded word */
2828 			if ((text = rfc2047_decode_word (name->str, name->len, charset))) {
2829 				g_string_truncate (name, 0);
2830 				g_string_append (name, text);
2831 				g_free (text);
2832 			}
2833 
2834 			goto try_address_again;
2835 		}
2836 		w (g_warning ("invalid address, no '@' domain part at %c: %s", *inptr, *in));
2837 	}
2838 
2839 	if (closeme) {
2840 		header_decode_lwsp (&inptr);
2841 		if (*inptr == '>') {
2842 			inptr++;
2843 		} else {
2844 			w (g_warning ("invalid route address, no closing '>': %s", *in));
2845 		}
2846 	} else if (name == NULL && comment != NULL && inptr>comment) { /* check for comment after address */
2847 		gchar *text, *tmp;
2848 		const gchar *comstart, *comend;
2849 
2850 		/* this is a bit messy, we go from the last known position, because
2851 		 * decode_domain/etc skip over any comments on the way */
2852 		/* FIXME: This wont detect comments inside the domain itself,
2853 		 * but nobody seems to use that feature anyway ... */
2854 
2855 		d (printf ("checking for comment from '%s'\n", comment));
2856 
2857 		comstart = strchr (comment, '(');
2858 		if (comstart) {
2859 			comstart++;
2860 			header_decode_lwsp (&inptr);
2861 			comend = inptr - 1;
2862 			while (comend > comstart && comend[0] != ')')
2863 				comend--;
2864 
2865 			if (comend > comstart) {
2866 				d (printf ("  looking at subset '%.*s'\n", comend - comstart, comstart));
2867 				tmp = g_strndup (comstart, comend - comstart);
2868 				text = header_decode_text (tmp, FALSE, charset);
2869 				name = g_string_new (text);
2870 				g_free (tmp);
2871 				g_free (text);
2872 			}
2873 		}
2874 	}
2875 
2876 	header_decode_lwsp (&inptr);
2877 
2878 	if (*inptr && *inptr != ',') {
2879 		if (addr->len > 0) {
2880 			if (!name) {
2881 				name = g_string_sized_new (addr->len + 5);
2882 			} else {
2883 				g_string_append_c (name, ' ');
2884 			}
2885 
2886 			g_string_append_c (name, '<');
2887 			g_string_append (name, addr->str);
2888 			g_string_append_c (name, '>');
2889 			g_string_append_c (name, ' ');
2890 
2891 			g_string_truncate (addr, 0);
2892 		}
2893 
2894 		goto start;
2895 	}
2896 
2897 	*in = inptr;
2898 
2899 	if (name) {
2900 		/* Trim any trailing spaces */
2901 		while (name->len > 0 && name->str[name->len - 1] == ' ') {
2902 			g_string_truncate (name, name->len - 1);
2903 		}
2904 	}
2905 
2906 	if (addr->len > 0) {
2907 		if (!g_utf8_validate (addr->str, addr->len, NULL)) {
2908 			/* workaround for invalid addr-specs containing 8bit chars (see bug #42170 for details) */
2909 			const gchar *locale_charset;
2910 			GString *out;
2911 
2912 			locale_charset = camel_iconv_locale_charset ();
2913 
2914 			out = g_string_new ("");
2915 
2916 			if ((charset == NULL || !append_8bit (out, addr->str, addr->len, charset))
2917 			    && (locale_charset == NULL || !append_8bit (out, addr->str, addr->len, locale_charset)))
2918 				append_latin1 (out, addr->str, addr->len);
2919 
2920 			g_string_free (addr, TRUE);
2921 			addr = out;
2922 		}
2923 
2924 		if (!name) {
2925 			gchar *text;
2926 
2927 			text = rfc2047_decode_word (addr->str, addr->len, charset);
2928 			if (text) {
2929 				g_string_truncate (addr, 0);
2930 				g_string_append (addr, text);
2931 				g_free (text);
2932 
2933 				make_string_utf8_valid (addr->str, addr->len);
2934 			}
2935 
2936 		} else {
2937 			make_string_utf8_valid (name->str, name->len);
2938 		}
2939 
2940 		address = camel_header_address_new_name (name ? name->str : "", addr->str);
2941 	} else if (name) {
2942 		/* A name-only address, might be something wrong, but include it anyway */
2943 		make_string_utf8_valid (name->str, name->len);
2944 		address = camel_header_address_new_name (name->str, "");
2945 	}
2946 
2947 	d (printf ("got mailbox: %s\n", addr->str));
2948 
2949 	g_string_free (addr, TRUE);
2950 	if (name)
2951 		g_string_free (name, TRUE);
2952 
2953 	return address;
2954 }
2955 
2956 static CamelHeaderAddress *
header_decode_address(const gchar ** in,const gchar * charset)2957 header_decode_address (const gchar **in,
2958                        const gchar *charset)
2959 {
2960 	const gchar *inptr = *in;
2961 	gchar *pre;
2962 	GString *group = g_string_new ("");
2963 	CamelHeaderAddress *addr = NULL, *member;
2964 
2965 	/* pre-scan, trying to work out format, discard results */
2966 	header_decode_lwsp (&inptr);
2967 	while ((pre = header_decode_word (&inptr))) {
2968 		g_string_append (group, pre);
2969 		g_string_append_c (group, ' ');
2970 		g_free (pre);
2971 	}
2972 	header_decode_lwsp (&inptr);
2973 	if (*inptr == ':') {
2974 		d (printf ("group detected: %s\n", group->str));
2975 		addr = camel_header_address_new_group (group->str);
2976 		/* that was a group spec, scan mailbox's */
2977 		inptr++;
2978 		/* FIXME: check rfc 2047 encodings of words, here or above in the loop */
2979 		header_decode_lwsp (&inptr);
2980 		if (*inptr != ';') {
2981 			gint go = TRUE;
2982 			do {
2983 				member = header_decode_mailbox (&inptr, charset);
2984 				if (member)
2985 					camel_header_address_add_member (addr, member);
2986 				header_decode_lwsp (&inptr);
2987 				if (*inptr == ',')
2988 					inptr++;
2989 				else
2990 					go = FALSE;
2991 			} while (go);
2992 			if (*inptr == ';') {
2993 				inptr++;
2994 			} else {
2995 				w (g_warning ("Invalid group spec, missing closing ';': %s", *in));
2996 			}
2997 		} else {
2998 			inptr++;
2999 		}
3000 		*in = inptr;
3001 	} else {
3002 		addr = header_decode_mailbox (in, charset);
3003 	}
3004 
3005 	g_string_free (group, TRUE);
3006 
3007 	return addr;
3008 }
3009 
3010 static gchar *
header_msgid_decode_internal(const gchar ** in)3011 header_msgid_decode_internal (const gchar **in)
3012 {
3013 	const gchar *inptr = *in;
3014 	gchar *msgid = NULL;
3015 
3016 	d (printf ("decoding Message-ID: '%s'\n", *in));
3017 
3018 	header_decode_lwsp (&inptr);
3019 	if (*inptr == '<') {
3020 		inptr++;
3021 		header_decode_lwsp (&inptr);
3022 		msgid = header_decode_addrspec (&inptr);
3023 		if (msgid) {
3024 			header_decode_lwsp (&inptr);
3025 			if (*inptr == '>') {
3026 				inptr++;
3027 			} else {
3028 				w (g_warning ("Missing closing '>' on message id: %s", *in));
3029 			}
3030 		} else {
3031 			w (g_warning ("Cannot find message id in: %s", *in));
3032 		}
3033 	} else {
3034 		w (g_warning ("missing opening '<' on message id: %s", *in));
3035 	}
3036 	*in = inptr;
3037 
3038 	return msgid;
3039 }
3040 
3041 /**
3042  * camel_header_msgid_decode:
3043  * @in: input string
3044  *
3045  * Extract a message-id token from @in.
3046  *
3047  * Returns: the msg-id
3048  **/
3049 gchar *
camel_header_msgid_decode(const gchar * in)3050 camel_header_msgid_decode (const gchar *in)
3051 {
3052 	if (in == NULL)
3053 		return NULL;
3054 
3055 	return header_msgid_decode_internal (&in);
3056 }
3057 
3058 /**
3059  * camel_header_contentid_decode:
3060  * @in: input string
3061  *
3062  * Extract a content-id from @in.
3063  *
3064  * Returns: the extracted content-id
3065  **/
3066 gchar *
camel_header_contentid_decode(const gchar * in)3067 camel_header_contentid_decode (const gchar *in)
3068 {
3069 	const gchar *inptr = in;
3070 	gboolean at = FALSE;
3071 	GString *addr;
3072 	gchar *buf;
3073 
3074 	d (printf ("decoding Content-ID: '%s'\n", in));
3075 
3076 	header_decode_lwsp (&inptr);
3077 
3078 	/* some lame mailers quote the Content-Id */
3079 	if (*inptr == '"')
3080 		inptr++;
3081 
3082 	/* make sure the content-id is not "" which can happen if we get a
3083 	 * content-id such as <.@> (which Eudora likes to use...) */
3084 	if ((buf = camel_header_msgid_decode (inptr)) != NULL && *buf)
3085 		return buf;
3086 
3087 	g_free (buf);
3088 
3089 	/* ugh, not a valid msg-id - try to get something useful out of it then? */
3090 	inptr = in;
3091 	header_decode_lwsp (&inptr);
3092 	if (*inptr == '<') {
3093 		inptr++;
3094 		header_decode_lwsp (&inptr);
3095 	}
3096 
3097 	/* Eudora has been known to use <.@> as a content-id */
3098 	if (!(buf = header_decode_word (&inptr)) && (*inptr == '\0' || !strchr (".@", *inptr)))
3099 		return NULL;
3100 
3101 	addr = g_string_new ("");
3102 	header_decode_lwsp (&inptr);
3103 	while (buf != NULL || *inptr == '.' || (*inptr == '@' && !at)) {
3104 		if (buf != NULL) {
3105 			g_string_append (addr, buf);
3106 			g_free (buf);
3107 			buf = NULL;
3108 		}
3109 
3110 		if (!at) {
3111 			if (*inptr == '.') {
3112 				g_string_append_c (addr, *inptr++);
3113 				buf = header_decode_word (&inptr);
3114 			} else if (*inptr == '@') {
3115 				g_string_append_c (addr, *inptr++);
3116 				buf = header_decode_word (&inptr);
3117 				at = TRUE;
3118 			}
3119 		} else if (*inptr != '\0' && strchr (".[]", *inptr)) {
3120 			g_string_append_c (addr, *inptr++);
3121 			buf = header_decode_atom (&inptr);
3122 		}
3123 
3124 		header_decode_lwsp (&inptr);
3125 	}
3126 
3127 	return g_string_free (addr, FALSE);
3128 }
3129 
3130 static void
header_references_decode_single(const gchar ** in,GSList ** list)3131 header_references_decode_single (const gchar **in, GSList **list)
3132 {
3133 	const gchar *inptr = *in;
3134 	GString *accum_word = NULL;
3135 	gchar *id, *word;
3136 
3137 	while (*inptr) {
3138 		header_decode_lwsp (&inptr);
3139 		if (*inptr == '<') {
3140 			id = header_msgid_decode_internal (&inptr);
3141 			if (id) {
3142 				*list = g_slist_prepend (*list, id);
3143 				break;
3144 			}
3145 		} else {
3146 			word = header_decode_word (&inptr);
3147 			if (word) {
3148 				/* To support broken clients, which do not enclose message IDs into angle brackets, as
3149 				   required in the RFC 2822: https://tools.ietf.org/html/rfc2822#section-3.6.4 */
3150 				if (!*inptr || camel_mime_is_lwsp (*inptr)) {
3151 					if (accum_word) {
3152 						g_string_append (accum_word, word);
3153 						*list = g_slist_prepend (*list, g_string_free (accum_word, FALSE));
3154 						accum_word = NULL;
3155 					} else {
3156 						*list = g_slist_prepend (*list, word);
3157 						word = NULL;
3158 					}
3159 				} else {
3160 					if (accum_word)
3161 						g_string_append (accum_word, word);
3162 					else
3163 						accum_word = g_string_new (word);
3164 
3165 					g_string_append_c (accum_word, *inptr);
3166 				}
3167 				g_free (word);
3168 			} else if (*inptr != '\0')
3169 				inptr++; /* Stupid mailer tricks */
3170 		}
3171 	}
3172 
3173 	if (accum_word)
3174 		*list = g_slist_prepend (*list, g_string_free (accum_word, FALSE));
3175 
3176 	*in = inptr;
3177 }
3178 
3179 /**
3180  * camel_header_references_decode:
3181  * @in: References header value
3182  *
3183  * Generate a list of references, from most recent up.
3184  *
3185  * Returns: (element-type utf8) (transfer full): a list of references decoedd from @in
3186  **/
3187 GSList *
camel_header_references_decode(const gchar * in)3188 camel_header_references_decode (const gchar *in)
3189 {
3190 	GSList *refs = NULL;
3191 
3192 	if (in == NULL || in[0] == '\0')
3193 		return NULL;
3194 
3195 	while (*in)
3196 		header_references_decode_single (&in, &refs);
3197 
3198 	return refs;
3199 }
3200 
3201 CamelHeaderAddress *
camel_header_mailbox_decode(const gchar * in,const gchar * charset)3202 camel_header_mailbox_decode (const gchar *in,
3203                              const gchar *charset)
3204 {
3205 	if (in == NULL)
3206 		return NULL;
3207 
3208 	return header_decode_mailbox (&in, charset);
3209 }
3210 
3211 CamelHeaderAddress *
camel_header_address_decode(const gchar * in,const gchar * charset)3212 camel_header_address_decode (const gchar *in,
3213                              const gchar *charset)
3214 {
3215 	const gchar *inptr = in, *last;
3216 	CamelHeaderAddress *list = NULL, *addr;
3217 
3218 	d (printf ("decoding To: '%s'\n", in));
3219 
3220 	if (in == NULL)
3221 		return NULL;
3222 
3223 	header_decode_lwsp (&inptr);
3224 	if (*inptr == 0)
3225 		return NULL;
3226 
3227 	do {
3228 		last = inptr;
3229 		addr = header_decode_address (&inptr, charset);
3230 		if (addr)
3231 			camel_header_address_list_append (&list, addr);
3232 		header_decode_lwsp (&inptr);
3233 		if (*inptr == ',')
3234 			inptr++;
3235 		else
3236 			break;
3237 	} while (inptr != last);
3238 
3239 	if (*inptr) {
3240 		w (g_warning ("Invalid input detected at %c (%d): '%s'\n or at: '%s'", *inptr, (gint) (inptr - in), in, inptr));
3241 	}
3242 
3243 	if (inptr == last) {
3244 		w (g_warning ("detected invalid input loop at : '%s' for '%s'", last, in));
3245 	}
3246 
3247 	return list;
3248 }
3249 
3250 /**
3251  * camel_header_newsgroups_decode:
3252  * @in:
3253  *
3254  * Returns: (element-type utf8) (transfer full):
3255  **/
3256 GSList *
camel_header_newsgroups_decode(const gchar * in)3257 camel_header_newsgroups_decode (const gchar *in)
3258 {
3259 	const gchar *inptr = in;
3260 	register gchar c;
3261 	GSList *list = NULL;
3262 	const gchar *start;
3263 
3264 	do {
3265 		header_decode_lwsp (&inptr);
3266 		start = inptr;
3267 		while ((c = *inptr++) && !camel_mime_is_lwsp (c) && c != ',')
3268 			;
3269 		if (start != inptr - 1) {
3270 			list = g_slist_prepend (list, g_strndup (start, inptr - start - 1));
3271 		}
3272 	} while (c);
3273 
3274 	return list;
3275 }
3276 
3277 /* this must be kept in sync with the header */
3278 static const gchar *encodings[] = {
3279 	"",
3280 	"7bit",
3281 	"8bit",
3282 	"base64",
3283 	"quoted-printable",
3284 	"binary",
3285 	"x-uuencode",
3286 };
3287 
3288 const gchar *
camel_transfer_encoding_to_string(CamelTransferEncoding encoding)3289 camel_transfer_encoding_to_string (CamelTransferEncoding encoding)
3290 {
3291 	if (encoding >= G_N_ELEMENTS (encodings))
3292 		encoding = 0;
3293 
3294 	return encodings[encoding];
3295 }
3296 
3297 CamelTransferEncoding
camel_transfer_encoding_from_string(const gchar * string)3298 camel_transfer_encoding_from_string (const gchar *string)
3299 {
3300 	gint i;
3301 
3302 	if (string != NULL) {
3303 		for (i = 0; i < G_N_ELEMENTS (encodings); i++)
3304 			if (!g_ascii_strcasecmp (string, encodings[i]))
3305 				return i;
3306 
3307 		if (!g_ascii_strcasecmp (string, "uuencode"))
3308 			return CAMEL_TRANSFER_ENCODING_UUENCODE;
3309 	}
3310 
3311 	return CAMEL_TRANSFER_ENCODING_DEFAULT;
3312 }
3313 
3314 void
camel_header_mime_decode(const gchar * in,gint * maj,gint * min)3315 camel_header_mime_decode (const gchar *in,
3316                           gint *maj,
3317                           gint *min)
3318 {
3319 	const gchar *inptr = in;
3320 	gint major=-1, minor=-1;
3321 
3322 	d (printf ("decoding MIME-Version: '%s'\n", in));
3323 
3324 	if (in != NULL) {
3325 		header_decode_lwsp (&inptr);
3326 		if (isdigit (*inptr)) {
3327 			major = camel_header_decode_int (&inptr);
3328 			header_decode_lwsp (&inptr);
3329 			if (*inptr == '.') {
3330 				inptr++;
3331 				header_decode_lwsp (&inptr);
3332 				if (isdigit (*inptr))
3333 					minor = camel_header_decode_int (&inptr);
3334 			}
3335 		}
3336 	}
3337 
3338 	if (maj)
3339 		*maj = major;
3340 	if (min)
3341 		*min = minor;
3342 
3343 	d (printf ("major = %d, minor = %d\n", major, minor));
3344 }
3345 
3346 struct _rfc2184_param {
3347 	struct _camel_header_param param;
3348 	gint index;
3349 };
3350 
3351 static gint
rfc2184_param_cmp(gconstpointer ap,gconstpointer bp)3352 rfc2184_param_cmp (gconstpointer ap,
3353                    gconstpointer bp)
3354 {
3355 	const struct _rfc2184_param *a = *(gpointer *) ap;
3356 	const struct _rfc2184_param *b = *(gpointer *) bp;
3357 	gint res;
3358 
3359 	res = strcmp (a->param.name, b->param.name);
3360 	if (res == 0) {
3361 		if (a->index > b->index)
3362 			res = 1;
3363 		else if (a->index < b->index)
3364 			res = -1;
3365 	}
3366 
3367 	return res;
3368 }
3369 
3370 /* NB: Steals name and value */
3371 static struct _camel_header_param *
header_append_param(struct _camel_header_param * last,gchar * name,gchar * value)3372 header_append_param (struct _camel_header_param *last,
3373                      gchar *name,
3374                      gchar *value)
3375 {
3376 	struct _camel_header_param *node;
3377 
3378 	/* This handles -
3379 	 *  8 bit data in parameters, illegal, tries to convert using locale, or just safens it up.
3380 	 *  rfc2047 ecoded parameters, illegal, decodes them anyway.  Some Outlook & Mozilla do this?
3381 	*/
3382 	node = g_malloc (sizeof (*node));
3383 	last->next = node;
3384 	node->next = NULL;
3385 	node->name = name;
3386 	if (strncmp (value, "=?", 2) == 0
3387 	    && (node->value = header_decode_text (value, FALSE, NULL))) {
3388 		g_free (value);
3389 	} else if (g_ascii_strcasecmp (name, "boundary") != 0 && !g_utf8_validate (value, -1, NULL)) {
3390 		const gchar *charset = camel_iconv_locale_charset ();
3391 
3392 		if ((node->value = header_convert ("UTF-8", charset ? charset:"ISO-8859-1", value, strlen (value)))) {
3393 			g_free (value);
3394 		} else {
3395 			node->value = value;
3396 			for (;*value; value++)
3397 				if (!isascii ((guchar) * value))
3398 					*value = '_';
3399 		}
3400 	} else
3401 		node->value = value;
3402 
3403 	return node;
3404 }
3405 
3406 static struct _camel_header_param *
header_decode_param_list(const gchar ** in)3407 header_decode_param_list (const gchar **in)
3408 {
3409 	struct _camel_header_param *head = NULL, *last = (struct _camel_header_param *) &head;
3410 	GPtrArray *split = NULL;
3411 	const gchar *inptr = *in;
3412 	struct _rfc2184_param *work;
3413 	gchar *tmp;
3414 
3415 	/* Dump parameters into the output list, in the order found.  RFC 2184 split parameters are kept in an array */
3416 	header_decode_lwsp (&inptr);
3417 	while (*inptr == ';') {
3418 		gchar *name;
3419 		gchar *value = NULL;
3420 
3421 		inptr++;
3422 		name = decode_token (&inptr);
3423 		header_decode_lwsp (&inptr);
3424 		if (*inptr == '=') {
3425 			inptr++;
3426 			value = header_decode_value (&inptr);
3427 		}
3428 
3429 		if (name && value) {
3430 			gchar *index = strchr (name, '*');
3431 
3432 			if (index) {
3433 				if (index[1] == 0) {
3434 					/* VAL*="foo", decode immediately and append */
3435 					*index = 0;
3436 					tmp = rfc2184_decode (value, strlen (value));
3437 					if (tmp) {
3438 						g_free (value);
3439 						value = tmp;
3440 					}
3441 					last = header_append_param (last, name, value);
3442 				} else {
3443 					/* VAL*1="foo", save for later */
3444 					*index++ = 0;
3445 					work = g_malloc (sizeof (*work));
3446 					work->param.name = name;
3447 					work->param.value = value;
3448 					work->index = atoi (index);
3449 					if (split == NULL)
3450 						split = g_ptr_array_new ();
3451 					g_ptr_array_add (split, work);
3452 				}
3453 			} else {
3454 				last = header_append_param (last, name, value);
3455 			}
3456 		} else {
3457 			g_free (name);
3458 			g_free (value);
3459 		}
3460 
3461 		header_decode_lwsp (&inptr);
3462 	}
3463 
3464 	/* Rejoin any RFC 2184 split parameters in the proper order */
3465 	/* Parameters with the same index will be concatenated in undefined order */
3466 	if (split) {
3467 		GString *value = g_string_new ("");
3468 		struct _rfc2184_param *first;
3469 		gint i;
3470 
3471 		qsort (split->pdata, split->len, sizeof (split->pdata[0]), rfc2184_param_cmp);
3472 		first = split->pdata[0];
3473 		for (i = 0; i < split->len; i++) {
3474 			work = split->pdata[i];
3475 			if (split->len - 1 == i)
3476 				g_string_append (value, work->param.value);
3477 			if (split->len - 1 == i || strcmp (work->param.name, first->param.name) != 0) {
3478 				tmp = rfc2184_decode (value->str, value->len);
3479 				if (tmp == NULL)
3480 					tmp = g_strdup (value->str);
3481 
3482 				last = header_append_param (last, g_strdup (first->param.name), tmp);
3483 				g_string_truncate (value, 0);
3484 				first = work;
3485 			}
3486 			if (split->len - 1 != i)
3487 				g_string_append (value, work->param.value);
3488 		}
3489 		g_string_free (value, TRUE);
3490 		for (i = 0; i < split->len; i++) {
3491 			work = split->pdata[i];
3492 			g_free (work->param.name);
3493 			g_free (work->param.value);
3494 			g_free (work);
3495 		}
3496 		g_ptr_array_free (split, TRUE);
3497 	}
3498 
3499 	*in = inptr;
3500 
3501 	return head;
3502 }
3503 
3504 /**
3505  * camel_header_param_list_decode:
3506  * @in: (nullable): a header param value to decode
3507  *
3508  * Returns: (nullable) (transfer full): Decode list of parameters.
3509  *    Free with camel_header_param_list_free() when done with it.
3510  **/
3511 struct _camel_header_param *
camel_header_param_list_decode(const gchar * in)3512 camel_header_param_list_decode (const gchar *in)
3513 {
3514 	if (in == NULL)
3515 		return NULL;
3516 
3517 	return header_decode_param_list (&in);
3518 }
3519 
3520 static gchar *
header_encode_param(const guchar * in,gboolean * encoded,gboolean is_filename)3521 header_encode_param (const guchar *in,
3522                      gboolean *encoded,
3523                      gboolean is_filename)
3524 {
3525 	const guchar *inptr = in;
3526 	guchar *outbuf = NULL;
3527 	const gchar *charset;
3528 	GString *out;
3529 	guint32 c;
3530 
3531 	*encoded = FALSE;
3532 
3533 	g_return_val_if_fail (in != NULL, NULL);
3534 
3535 	if (is_filename) {
3536 		gchar *str;
3537 		if (!g_utf8_validate ((gchar *) inptr, -1, NULL)) {
3538 			GString *buff = g_string_new ("");
3539 
3540 			for (; inptr && *inptr; inptr++) {
3541 				if (*inptr < 32)
3542 					g_string_append_printf (buff, "%%%02X", (*inptr) & 0xFF);
3543 				else
3544 					g_string_append_c (buff, *inptr);
3545 			}
3546 
3547 			outbuf = (guchar *) g_string_free (buff, FALSE);
3548 			inptr = outbuf;
3549 		}
3550 
3551 		/* do not set encoded flag for file names */
3552 		str = header_encode_string_rfc2047 (inptr, TRUE);
3553 		g_free (outbuf);
3554 
3555 		return str;
3556 	}
3557 
3558 	/* if we have really broken utf8 passed in, we just treat it as binary data */
3559 
3560 	charset = camel_charset_best ((gchar *) in, strlen ((gchar *) in));
3561 	if (charset == NULL) {
3562 		return g_strdup ((gchar *) in);
3563 	}
3564 
3565 	if (g_ascii_strcasecmp (charset, "UTF-8") != 0) {
3566 		if ((outbuf = (guchar *) header_convert (charset, "UTF-8", (const gchar *) in, strlen ((gchar *) in))))
3567 			inptr = outbuf;
3568 		else
3569 			return g_strdup ((gchar *) in);
3570 	}
3571 
3572 	/* FIXME: set the 'language' as well, assuming we can get that info...? */
3573 	out = g_string_new (charset);
3574 	g_string_append (out, "''");
3575 
3576 	while ((c = *inptr++)) {
3577 		if (camel_mime_is_attrchar (c))
3578 			g_string_append_c (out, c);
3579 		else
3580 			g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
3581 	}
3582 	g_free (outbuf);
3583 	*encoded = TRUE;
3584 
3585 	return g_string_free (out, FALSE);
3586 }
3587 
3588 /* HACK: Set to non-zero when you want the 'filename' and 'name' headers encoded in RFC 2047 way,
3589  * otherwise they will be encoded in the correct RFC 2231 way. It's because Outlook and GMail
3590  * do not understand the correct standard and refuse attachments with localized name sent
3591  * from evolution. This seems to have been fixed in Exchange 2007 at least - not sure about
3592  * standalone Outlook. */
3593 gint camel_header_param_encode_filenames_in_rfc_2047 = 0;
3594 
3595 void
camel_header_param_list_format_append(GString * out,struct _camel_header_param * p)3596 camel_header_param_list_format_append (GString *out,
3597                                        struct _camel_header_param *p)
3598 {
3599 	gint used = out->len;
3600 
3601 	while (p) {
3602 		gboolean is_filename = camel_header_param_encode_filenames_in_rfc_2047 && (g_ascii_strcasecmp (p->name, "filename") == 0 || g_ascii_strcasecmp (p->name, "name") == 0);
3603 		gboolean encoded = FALSE;
3604 		gboolean quote = FALSE;
3605 		gint here = out->len;
3606 		gsize nlen, vlen;
3607 		gchar *value;
3608 
3609 		if (!p->value) {
3610 			p = p->next;
3611 			continue;
3612 		}
3613 
3614 		value = header_encode_param ((guchar *) p->value, &encoded, is_filename);
3615 		if (!value) {
3616 			w (g_warning ("appending parameter %s=%s violates rfc2184", p->name, p->value));
3617 			value = g_strdup (p->value);
3618 		}
3619 
3620 		if (!encoded) {
3621 			gchar *ch;
3622 
3623 			for (ch = value; ch && *ch; ch++) {
3624 				if (camel_mime_is_tspecial (*ch) || camel_mime_is_lwsp (*ch))
3625 					break;
3626 			}
3627 
3628 			quote = ch && *ch;
3629 		}
3630 
3631 		quote = quote || is_filename;
3632 		nlen = strlen (p->name);
3633 		vlen = strlen (value);
3634 
3635 		/* do not fold file names */
3636 		if (!is_filename && used + nlen + vlen > CAMEL_FOLD_SIZE - 8) {
3637 			g_string_append (out, ";\n\t");
3638 			here = out->len;
3639 			used = 0;
3640 		} else
3641 			g_string_append (out, "; ");
3642 
3643 		if (!is_filename && nlen + vlen > CAMEL_FOLD_SIZE - 8) {
3644 			/* we need to do special rfc2184 parameter wrapping */
3645 			gint maxlen = CAMEL_FOLD_SIZE - (nlen + 8);
3646 			gchar *inptr, *inend;
3647 			gint i = 0;
3648 
3649 			inptr = value;
3650 			inend = value + vlen;
3651 
3652 			while (inptr < inend) {
3653 				gchar *ptr = inptr + MIN (inend - inptr, maxlen);
3654 
3655 				if (encoded && ptr < inend) {
3656 					/* be careful not to break an encoded gchar (ie %20) */
3657 					gchar *q = ptr;
3658 					gint j = 2;
3659 
3660 					for (; j > 0 && q > inptr && *q != '%'; j--, q--);
3661 					if (*q == '%')
3662 						ptr = q;
3663 				}
3664 
3665 				if (i != 0) {
3666 					g_string_append (out, ";\n\t");
3667 					here = out->len;
3668 					used = 0;
3669 				}
3670 
3671 				g_string_append_printf (out, "%s*%d%s=", p->name, i++, encoded ? "*" : "");
3672 				if (encoded || !quote)
3673 					g_string_append_len (out, inptr, ptr - inptr);
3674 				else
3675 					quote_word (out, TRUE, inptr, ptr - inptr);
3676 
3677 				d (printf ("wrote: %s\n", out->str + here));
3678 
3679 				used += (out->len - here);
3680 
3681 				inptr = ptr;
3682 			}
3683 		} else {
3684 			g_string_append_printf (out, "%s%s=", p->name, encoded ? "*" : "");
3685 
3686 			/* Quote even if we don't need to in order to
3687 			 * work around broken mail software like the
3688 			 * Jive Forums' NNTP gateway */
3689 			if (encoded /*|| !quote */)
3690 				g_string_append (out, value);
3691 			else
3692 				quote_word (out, TRUE, value, vlen);
3693 
3694 			used += (out->len - here);
3695 		}
3696 
3697 		g_free (value);
3698 
3699 		p = p->next;
3700 	}
3701 }
3702 
3703 gchar *
camel_header_param_list_format(struct _camel_header_param * p)3704 camel_header_param_list_format (struct _camel_header_param *p)
3705 {
3706 	GString *out = g_string_new ("");
3707 
3708 	camel_header_param_list_format_append (out, p);
3709 	return g_string_free (out, FALSE);
3710 }
3711 
3712 CamelContentType *
camel_content_type_decode(const gchar * in)3713 camel_content_type_decode (const gchar *in)
3714 {
3715 	const gchar *inptr = in;
3716 	gchar *type, *subtype = NULL;
3717 	CamelContentType *t = NULL;
3718 
3719 	if (in == NULL)
3720 		return NULL;
3721 
3722 	type = decode_token (&inptr);
3723 	header_decode_lwsp (&inptr);
3724 	if (type) {
3725 		if  (*inptr == '/') {
3726 			inptr++;
3727 			subtype = decode_token (&inptr);
3728 		}
3729 		if (subtype == NULL && (!g_ascii_strcasecmp (type, "text"))) {
3730 			w (g_warning ("text type with no subtype, resorting to text/plain: %s", in));
3731 			subtype = g_strdup ("plain");
3732 		}
3733 		if (subtype == NULL) {
3734 			w (g_warning ("MIME type with no subtype: %s", in));
3735 		}
3736 
3737 		t = camel_content_type_new (type, subtype);
3738 		t->params = header_decode_param_list (&inptr);
3739 		g_free (type);
3740 		g_free (subtype);
3741 	} else {
3742 		g_free (type);
3743 		d (printf ("cannot find MIME type in header (2) '%s'", in));
3744 	}
3745 	return t;
3746 }
3747 
3748 void
camel_content_type_dump(CamelContentType * ct)3749 camel_content_type_dump (CamelContentType *ct)
3750 {
3751 	struct _camel_header_param *p;
3752 
3753 	printf ("Content-Type: ");
3754 	if (ct == NULL) {
3755 		printf ("<NULL>\n");
3756 		return;
3757 	}
3758 	printf ("%s / %s", ct->type, ct->subtype);
3759 	p = ct->params;
3760 	if (p) {
3761 		while (p) {
3762 			printf (";\n\t%s=\"%s\"", p->name, p->value);
3763 			p = p->next;
3764 		}
3765 	}
3766 	printf ("\n");
3767 }
3768 
3769 gchar *
camel_content_type_format(CamelContentType * ct)3770 camel_content_type_format (CamelContentType *ct)
3771 {
3772 	GString *out;
3773 
3774 	if (ct == NULL)
3775 		return NULL;
3776 
3777 	out = g_string_new ("");
3778 	if (ct->type == NULL) {
3779 		g_string_append_printf (out, "text/plain");
3780 		w (g_warning ("Content-Type with no main type"));
3781 	} else if (ct->subtype == NULL) {
3782 		w (g_warning ("Content-Type with no sub type: %s", ct->type));
3783 		if (!g_ascii_strcasecmp (ct->type, "multipart"))
3784 			g_string_append_printf (out, "%s/mixed", ct->type);
3785 		else
3786 			g_string_append_printf (out, "%s", ct->type);
3787 	} else {
3788 		g_string_append_printf (out, "%s/%s", ct->type, ct->subtype);
3789 	}
3790 	camel_header_param_list_format_append (out, ct->params);
3791 
3792 	return g_string_free (out, FALSE);
3793 }
3794 
3795 gchar *
camel_content_type_simple(CamelContentType * ct)3796 camel_content_type_simple (CamelContentType *ct)
3797 {
3798 	if (!ct)
3799 		return NULL;
3800 
3801 	if (ct->type == NULL) {
3802 		w (g_warning ("Content-Type with no main type"));
3803 		return g_strdup ("text/plain");
3804 	} else if (ct->subtype == NULL) {
3805 		w (g_warning ("Content-Type with no sub type: %s", ct->type));
3806 		if (!g_ascii_strcasecmp (ct->type, "multipart"))
3807 			return g_strdup_printf ("%s/mixed", ct->type);
3808 		else
3809 			return g_strdup (ct->type);
3810 	} else
3811 		return g_strdup_printf ("%s/%s", ct->type, ct->subtype);
3812 }
3813 
3814 gchar *
camel_content_transfer_encoding_decode(const gchar * in)3815 camel_content_transfer_encoding_decode (const gchar *in)
3816 {
3817 	if (in)
3818 		return decode_token (&in);
3819 
3820 	return NULL;
3821 }
3822 
3823 CamelContentDisposition *
camel_content_disposition_new(void)3824 camel_content_disposition_new (void)
3825 {
3826 	CamelContentDisposition *dd;
3827 
3828 	dd = g_malloc0 (sizeof (CamelContentDisposition));
3829 	dd->refcount = 1;
3830 	dd->disposition = NULL;
3831 	dd->params = NULL;
3832 
3833 	return dd;
3834 }
3835 
3836 CamelContentDisposition *
camel_content_disposition_decode(const gchar * in)3837 camel_content_disposition_decode (const gchar *in)
3838 {
3839 	CamelContentDisposition *d = NULL;
3840 	const gchar *inptr = in;
3841 
3842 	if (in == NULL)
3843 		return NULL;
3844 
3845 	d = camel_content_disposition_new ();
3846 	d->disposition = decode_token (&inptr);
3847 	if (d->disposition == NULL) {
3848 		w (g_warning ("Empty disposition type"));
3849 	}
3850 	d->params = header_decode_param_list (&inptr);
3851 	return d;
3852 }
3853 
3854 CamelContentDisposition *
camel_content_disposition_ref(CamelContentDisposition * d)3855 camel_content_disposition_ref (CamelContentDisposition *d)
3856 {
3857 	if (d)
3858 		d->refcount++;
3859 
3860 	return d;
3861 }
3862 
3863 void
camel_content_disposition_unref(CamelContentDisposition * d)3864 camel_content_disposition_unref (CamelContentDisposition *d)
3865 {
3866 	if (d) {
3867 		if (d->refcount <= 1) {
3868 			camel_header_param_list_free (d->params);
3869 			g_free (d->disposition);
3870 			g_free (d);
3871 		} else {
3872 			d->refcount--;
3873 		}
3874 	}
3875 }
3876 
3877 gchar *
camel_content_disposition_format(CamelContentDisposition * d)3878 camel_content_disposition_format (CamelContentDisposition *d)
3879 {
3880 	GString *out;
3881 
3882 	if (d == NULL)
3883 		return NULL;
3884 
3885 	out = g_string_new (d->disposition ? d->disposition : "attachment");
3886 	camel_header_param_list_format_append (out, d->params);
3887 
3888 	return g_string_free (out, FALSE);
3889 }
3890 
3891 gboolean
camel_content_disposition_is_attachment(const CamelContentDisposition * disposition,const CamelContentType * content_type)3892 camel_content_disposition_is_attachment (const CamelContentDisposition *disposition,
3893 					 const CamelContentType *content_type)
3894 {
3895 	return camel_content_disposition_is_attachment_ex (disposition, content_type, NULL);
3896 }
3897 
3898 gboolean
camel_content_disposition_is_attachment_ex(const CamelContentDisposition * disposition,const CamelContentType * content_type,const CamelContentType * parent_content_type)3899 camel_content_disposition_is_attachment_ex (const CamelContentDisposition *disposition,
3900 					    const CamelContentType *content_type,
3901 					    const CamelContentType *parent_content_type)
3902 {
3903 	if (content_type && (
3904 	    camel_content_type_is (content_type, "application", "pkcs7-mime") ||
3905 	    camel_content_type_is (content_type, "application", "xpkcs7mime") ||
3906 	    camel_content_type_is (content_type, "application", "xpkcs7-mime") ||
3907 	    camel_content_type_is (content_type, "application", "x-pkcs7-mime")))
3908 		return FALSE;
3909 
3910 	if (content_type && (
3911 	    camel_content_type_is (content_type, "application", "pgp-encrypted")))
3912 		return !parent_content_type || !camel_content_type_is (parent_content_type, "multipart", "encrypted");
3913 
3914 	if (content_type && camel_content_type_is (content_type, "application", "octet-stream") &&
3915 	    parent_content_type && camel_content_type_is (parent_content_type, "multipart", "encrypted"))
3916 		return FALSE;
3917 
3918 	if (content_type && (
3919 	    camel_content_type_is (content_type, "application", "pkcs7-signature") ||
3920 	    camel_content_type_is (content_type, "application", "xpkcs7signature") ||
3921 	    camel_content_type_is (content_type, "application", "xpkcs7-signature") ||
3922 	    camel_content_type_is (content_type, "application", "x-pkcs7-signature") ||
3923 	    camel_content_type_is (content_type, "application", "pgp-signature")))
3924 		return !parent_content_type || !camel_content_type_is (parent_content_type, "multipart", "signed");
3925 
3926 	if (parent_content_type && content_type && camel_content_type_is (content_type, "message", "rfc822"))
3927 		return TRUE;
3928 
3929 	if (!disposition)
3930 		return FALSE;
3931 
3932 	if (disposition->disposition && g_ascii_strcasecmp (disposition->disposition, "attachment") == 0)
3933 		return TRUE;
3934 
3935 	/* If the Content-Disposition isn't an attachment, then call everything with a "filename"
3936 	   parameter an attachment, but only if there is no Content-Disposition header, or it's
3937 	   not the "inline" or it's neither text/... nor image/... Content-Type, which can be usually
3938 	   shown in the UI inline.
3939 
3940 	   The test for Content-Type was added for Apple Mail, which marks also for example .pdf
3941 	   attachments as 'inline', which broke the previous logic here.
3942 	*/
3943 	if (!disposition->disposition ||
3944 	    g_ascii_strcasecmp (disposition->disposition, "inline") != 0 ||
3945 	    (content_type && !camel_content_type_is (content_type, "text", "*") && !camel_content_type_is (content_type, "image", "*"))) {
3946 		const struct _camel_header_param *param;
3947 
3948 		for (param = disposition->params; param; param = param->next) {
3949 			if (param->name && param->value && *param->value && g_ascii_strcasecmp (param->name, "filename") == 0)
3950 				return TRUE;
3951 		}
3952 	}
3953 
3954 	return FALSE;
3955 }
3956 
3957 /* date parser macros */
3958 #define NUMERIC_CHARS          "1234567890"
3959 #define WEEKDAY_CHARS          "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
3960 #define MONTH_CHARS            "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
3961 #define TIMEZONE_ALPHA_CHARS   "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
3962 #define TIMEZONE_NUMERIC_CHARS "-+1234567890"
3963 #define TIME_CHARS             "1234567890:"
3964 
3965 #define DATE_TOKEN_NON_NUMERIC          (1 << 0)
3966 #define DATE_TOKEN_NON_WEEKDAY          (1 << 1)
3967 #define DATE_TOKEN_NON_MONTH            (1 << 2)
3968 #define DATE_TOKEN_NON_TIME             (1 << 3)
3969 #define DATE_TOKEN_HAS_COLON            (1 << 4)
3970 #define DATE_TOKEN_NON_TIMEZONE_ALPHA   (1 << 5)
3971 #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
3972 #define DATE_TOKEN_HAS_SIGN             (1 << 7)
3973 
3974 static guchar camel_datetok_table[256] = {
3975 	128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3976 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3977 	111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
3978 	 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
3979 	111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
3980 	 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
3981 	111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
3982 	107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
3983 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3984 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3985 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3986 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3987 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3988 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3989 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3990 	111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3991 };
3992 
3993 static struct {
3994 	const gchar *name;
3995 	gint offset;
3996 } tz_offsets[] = {
3997 	{ "UT", 0 },
3998 	{ "GMT", 0 },
3999 	{ "EST", -500 },	/* these are all US timezones.  bloody yanks */
4000 	{ "EDT", -400 },
4001 	{ "CST", -600 },
4002 	{ "CDT", -500 },
4003 	{ "MST", -700 },
4004 	{ "MDT", -600 },
4005 	{ "PST", -800 },
4006 	{ "PDT", -700 },
4007 	{ "Z", 0 },
4008 	{ "A", -100 },
4009 	{ "M", -1200 },
4010 	{ "N", 100 },
4011 	{ "Y", 1200 },
4012 };
4013 
4014 static const gchar tm_months[][4] = {
4015 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
4016 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
4017 };
4018 
4019 static const gchar tm_days[][4] = {
4020 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
4021 };
4022 
4023 /**
4024  * camel_header_format_date:
4025  * @date: time_t date representation
4026  * @tz_offset: Timezone offset
4027  *
4028  * Allocates a string buffer containing the rfc822 formatted date
4029  * string represented by @time and @tz_offset.
4030  *
4031  * Returns: a valid string representation of the date.
4032  **/
4033 gchar *
camel_header_format_date(time_t date,gint tz_offset)4034 camel_header_format_date (time_t date,
4035                           gint tz_offset)
4036 {
4037 	struct tm tm;
4038 
4039 	d (printf ("offset = %d\n", tz_offset));
4040 
4041 	d (printf ("converting date %s", ctime (&date)));
4042 
4043 	date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60;
4044 
4045 	d (printf ("converting date %s", ctime (&date)));
4046 
4047 	gmtime_r (&date, &tm);
4048 
4049 	return g_strdup_printf (
4050 		"%s, %02d %s %04d %02d:%02d:%02d %+05d",
4051 		tm_days[tm.tm_wday],
4052 		tm.tm_mday,
4053 		tm_months[tm.tm_mon],
4054 		tm.tm_year + 1900,
4055 		tm.tm_hour,
4056 		tm.tm_min,
4057 		tm.tm_sec,
4058 		tz_offset);
4059 }
4060 
4061 /* This is where it gets ugly... */
4062 
4063 struct _date_token {
4064 	struct _date_token *next;
4065 	guchar mask;
4066 	const gchar *start;
4067 	gsize len;
4068 };
4069 
4070 static struct _date_token *
datetok(const gchar * date)4071 datetok (const gchar *date)
4072 {
4073 	struct _date_token *tokens = NULL, *token, *tail = (struct _date_token *) &tokens;
4074 	const gchar *start, *end;
4075 	guchar mask;
4076 
4077 	start = date;
4078 	while (*start) {
4079 		/* kill leading whitespace */
4080 		while (*start == ' ' || *start == '\t')
4081 			start++;
4082 
4083 		if (*start == '\0')
4084 			break;
4085 
4086 		mask = camel_datetok_table[(guchar) *start];
4087 
4088 		/* find the end of this token */
4089 		end = start + 1;
4090 		while (*end && !strchr ("-/,\t\r\n ", *end))
4091 			mask |= camel_datetok_table[(guchar) *end++];
4092 
4093 		if (end != start) {
4094 			token = g_malloc (sizeof (struct _date_token));
4095 			token->next = NULL;
4096 			token->start = start;
4097 			token->len = end - start;
4098 			token->mask = mask;
4099 
4100 			tail->next = token;
4101 			tail = token;
4102 		}
4103 
4104 		if (*end)
4105 			start = end + 1;
4106 		else
4107 			break;
4108 	}
4109 
4110 	return tokens;
4111 }
4112 
4113 static gint
decode_int(const gchar * in,gsize inlen)4114 decode_int (const gchar *in,
4115             gsize inlen)
4116 {
4117 	register const gchar *inptr;
4118 	gint sign = 1, val = 0;
4119 	const gchar *inend;
4120 
4121 	inptr = in;
4122 	inend = in + inlen;
4123 
4124 	if (*inptr == '-') {
4125 		sign = -1;
4126 		inptr++;
4127 	} else if (*inptr == '+')
4128 		inptr++;
4129 
4130 	for (; inptr < inend; inptr++) {
4131 		if (!(*inptr >= '0' && *inptr <= '9'))
4132 			return -1;
4133 		else
4134 			val = (val * 10) + (*inptr - '0');
4135 	}
4136 
4137 	val *= sign;
4138 
4139 	return val;
4140 }
4141 
4142 #if 0
4143 static gint
4144 get_days_in_month (gint month,
4145                    gint year)
4146 {
4147 	switch (month) {
4148 	case 1:
4149 	case 3:
4150 	case 5:
4151 	case 7:
4152 	case 8:
4153 	case 10:
4154 	case 12:
4155 		return 31;
4156 	case 4:
4157 	case 6:
4158 	case 9:
4159 	case 11:
4160 		return 30;
4161 	case 2:
4162 		if (g_date_is_leap_year (year))
4163 			return 29;
4164 		else
4165 			return 28;
4166 	default:
4167 		return 0;
4168 	}
4169 }
4170 #endif
4171 
4172 static gint
get_wday(const gchar * in,gsize inlen)4173 get_wday (const gchar *in,
4174           gsize inlen)
4175 {
4176 	gint wday;
4177 
4178 	g_return_val_if_fail (in != NULL, -1);
4179 
4180 	if (inlen < 3)
4181 		return -1;
4182 
4183 	for (wday = 0; wday < 7; wday++) {
4184 		if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
4185 			return wday;
4186 	}
4187 
4188 	return -1;  /* unknown week day */
4189 }
4190 
4191 static gint
get_mday(const gchar * in,gsize inlen)4192 get_mday (const gchar *in,
4193           gsize inlen)
4194 {
4195 	gint mday;
4196 
4197 	g_return_val_if_fail (in != NULL, -1);
4198 
4199 	mday = decode_int (in, inlen);
4200 
4201 	if (mday < 0 || mday > 31)
4202 		mday = -1;
4203 
4204 	return mday;
4205 }
4206 
4207 static gint
get_month(const gchar * in,gsize inlen)4208 get_month (const gchar *in,
4209            gsize inlen)
4210 {
4211 	gint i;
4212 
4213 	g_return_val_if_fail (in != NULL, -1);
4214 
4215 	if (inlen < 3)
4216 		return -1;
4217 
4218 	for (i = 0; i < 12; i++) {
4219 		if (!g_ascii_strncasecmp (in, tm_months[i], 3))
4220 			return i;
4221 	}
4222 
4223 	return -1;  /* unknown month */
4224 }
4225 
4226 static gint
get_year(const gchar * in,gsize inlen)4227 get_year (const gchar *in,
4228           gsize inlen)
4229 {
4230 	gint year;
4231 
4232 	g_return_val_if_fail (in != NULL, -1);
4233 
4234 	if ((year = decode_int (in, inlen)) == -1)
4235 		return -1;
4236 
4237 	if (year < 100)
4238 		year += (year < 70) ? 2000 : 1900;
4239 
4240 	if (year < 1969)
4241 		return -1;
4242 
4243 	return year;
4244 }
4245 
4246 static gboolean
get_time(const gchar * in,gsize inlen,gint * hour,gint * min,gint * sec)4247 get_time (const gchar *in,
4248           gsize inlen,
4249           gint *hour,
4250           gint *min,
4251           gint *sec)
4252 {
4253 	register const gchar *inptr;
4254 	gint *val, colons = 0;
4255 	const gchar *inend;
4256 
4257 	*hour = *min = *sec = 0;
4258 
4259 	inend = in + inlen;
4260 	val = hour;
4261 	for (inptr = in; inptr < inend; inptr++) {
4262 		if (*inptr == ':') {
4263 			colons++;
4264 			switch (colons) {
4265 			case 1:
4266 				val = min;
4267 				break;
4268 			case 2:
4269 				val = sec;
4270 				break;
4271 			default:
4272 				return FALSE;
4273 			}
4274 		} else if (!(*inptr >= '0' && *inptr <= '9'))
4275 			return FALSE;
4276 		else
4277 			*val = (*val * 10) + (*inptr - '0');
4278 	}
4279 
4280 	return TRUE;
4281 }
4282 
4283 static gint
get_tzone(struct _date_token ** token)4284 get_tzone (struct _date_token **token)
4285 {
4286 	const gchar *inptr, *inend;
4287 	gsize inlen;
4288 	gint i, t;
4289 
4290 	for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
4291 		inptr = (*token)->start;
4292 		inlen = (*token)->len;
4293 		inend = inptr + inlen;
4294 
4295 		if (*inptr == '+' || *inptr == '-') {
4296 			return decode_int (inptr, inlen);
4297 		} else {
4298 			if (*inptr == '(') {
4299 				inptr++;
4300 				if (*(inend - 1) == ')')
4301 					inlen -= 2;
4302 				else
4303 					inlen--;
4304 			}
4305 
4306 			for (t = 0; t < 15; t++) {
4307 				gsize len = strlen (tz_offsets[t].name);
4308 
4309 				if (len != inlen)
4310 					continue;
4311 
4312 				if (!strncmp (inptr, tz_offsets[t].name, len))
4313 					return tz_offsets[t].offset;
4314 			}
4315 		}
4316 	}
4317 
4318 	return -1;
4319 }
4320 
4321 static time_t
parse_rfc822_date(struct _date_token * tokens,gint * tzone)4322 parse_rfc822_date (struct _date_token *tokens,
4323                    gint *tzone)
4324 {
4325 	gint hour, min, sec, offset, n;
4326 	struct _date_token *token;
4327 	struct tm tm;
4328 	time_t t;
4329 
4330 	g_return_val_if_fail (tokens != NULL, (time_t) 0);
4331 
4332 	token = tokens;
4333 
4334 	memset ((gpointer) &tm, 0, sizeof (struct tm));
4335 
4336 	if ((n = get_wday (token->start, token->len)) != -1) {
4337 		/* not all dates may have this... */
4338 		tm.tm_wday = n;
4339 		token = token->next;
4340 	}
4341 
4342 	/* get the mday */
4343 	if (!token || (n = get_mday (token->start, token->len)) == -1)
4344 		return (time_t) 0;
4345 
4346 	tm.tm_mday = n;
4347 	token = token->next;
4348 
4349 	/* get the month */
4350 	if (!token || (n = get_month (token->start, token->len)) == -1)
4351 		return (time_t) 0;
4352 
4353 	tm.tm_mon = n;
4354 	token = token->next;
4355 
4356 	/* get the year */
4357 	if (!token || (n = get_year (token->start, token->len)) == -1)
4358 		return (time_t) 0;
4359 
4360 	tm.tm_year = n - 1900;
4361 	token = token->next;
4362 
4363 	/* get the hour/min/sec */
4364 	if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
4365 		return (time_t) 0;
4366 
4367 	tm.tm_hour = hour;
4368 	tm.tm_min = min;
4369 	tm.tm_sec = sec;
4370 	token = token->next;
4371 
4372 	if (token && token->start && (
4373 	    g_ascii_strncasecmp (token->start, "AM", 2) == 0 ||
4374 	    g_ascii_strncasecmp (token->start, "PM", 2) == 0)) {
4375 		/* not a valid RFC 822 time representation */
4376 		return 0;
4377 	}
4378 
4379 	/* get the timezone */
4380 	if (!token || (n = get_tzone (&token)) == -1) {
4381 		/* I guess we assume tz is GMT? */
4382 		offset = 0;
4383 	} else {
4384 		offset = n;
4385 	}
4386 
4387 	t = camel_mktime_utc (&tm);
4388 
4389 	/* t is now GMT of the time we want, but not offset by the timezone ... */
4390 
4391 	/* this should convert the time to the GMT equiv time */
4392 	t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
4393 
4394 	if (tzone)
4395 		*tzone = offset;
4396 
4397 	return t;
4398 }
4399 
4400 #define date_token_mask(t)  (((struct _date_token *) t)->mask)
4401 #define is_numeric(t)       ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
4402 #define is_weekday(t)       ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
4403 #define is_month(t)         ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
4404 #define is_time(t)          (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
4405 #define is_tzone_alpha(t)   ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
4406 #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
4407 #define is_tzone(t)         (is_tzone_alpha (t) || is_tzone_numeric (t))
4408 
4409 static time_t
parse_broken_date(struct _date_token * tokens,gint * tzone)4410 parse_broken_date (struct _date_token *tokens,
4411                    gint *tzone)
4412 {
4413 	gboolean got_wday, got_month, got_tzone, is_pm;
4414 	gint hour, min, sec, offset, n;
4415 	struct _date_token *token;
4416 	struct tm tm;
4417 	time_t t;
4418 
4419 	memset ((gpointer) &tm, 0, sizeof (struct tm));
4420 	got_wday = got_month = got_tzone = FALSE;
4421 	is_pm = FALSE;
4422 	offset = 0;
4423 
4424 	token = tokens;
4425 	while (token) {
4426 		if (is_weekday (token) && !got_wday) {
4427 			if ((n = get_wday (token->start, token->len)) != -1) {
4428 				d (printf ("weekday; "));
4429 				got_wday = TRUE;
4430 				tm.tm_wday = n;
4431 				goto next;
4432 			}
4433 		}
4434 
4435 		if (is_month (token) && !got_month) {
4436 			if ((n = get_month (token->start, token->len)) != -1) {
4437 				d (printf ("month; "));
4438 				got_month = TRUE;
4439 				tm.tm_mon = n;
4440 				goto next;
4441 			}
4442 		}
4443 
4444 		if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
4445 			if (get_time (token->start, token->len, &hour, &min, &sec)) {
4446 				d (printf ("time; "));
4447 				tm.tm_hour = hour;
4448 				tm.tm_min = min;
4449 				tm.tm_sec = sec;
4450 				goto next;
4451 			}
4452 		}
4453 
4454 		if (!got_tzone && token->start && (
4455 		    g_ascii_strncasecmp (token->start, "AM", 2) == 0 ||
4456 		    g_ascii_strncasecmp (token->start, "PM", 2) == 0)) {
4457 			is_pm = g_ascii_strncasecmp (token->start, "PM", 2) == 0;
4458 
4459 			goto next;
4460 		}
4461 
4462 		if (is_tzone (token) && !got_tzone) {
4463 			struct _date_token *t = token;
4464 
4465 			if ((n = get_tzone (&t)) != -1) {
4466 				d (printf ("tzone; "));
4467 				got_tzone = TRUE;
4468 				offset = n;
4469 				goto next;
4470 			}
4471 		}
4472 
4473 		if (is_numeric (token)) {
4474 			if (token->len == 4 && !tm.tm_year) {
4475 				if ((n = get_year (token->start, token->len)) != -1) {
4476 					d (printf ("year; "));
4477 					tm.tm_year = n - 1900;
4478 					goto next;
4479 				}
4480 			} else {
4481 				/* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
4482 				if (!got_month && token->next && is_numeric (token->next)) {
4483 					if ((n = decode_int (token->start, token->len)) > 12) {
4484 						goto mday;
4485 					} else if (n > 0) {
4486 						d (printf ("mon; "));
4487 						got_month = TRUE;
4488 						tm.tm_mon = n - 1;
4489 					}
4490 					goto next;
4491 				} else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
4492 				mday:
4493 					d (printf ("mday; "));
4494 					tm.tm_mday = n;
4495 					goto next;
4496 				} else if (!tm.tm_year) {
4497 					if ((n = get_year (token->start, token->len)) != -1) {
4498 						d (printf ("2-digit year; "));
4499 						tm.tm_year = n - 1900;
4500 					}
4501 					goto next;
4502 				}
4503 			}
4504 		}
4505 
4506 		d (printf ("???; "));
4507 
4508 	next:
4509 
4510 		token = token->next;
4511 	}
4512 
4513 	d (printf ("\n"));
4514 
4515 	t = camel_mktime_utc (&tm);
4516 
4517 	/* t is now GMT of the time we want, but not offset by the timezone ... */
4518 
4519 	/* this should convert the time to the GMT equiv time */
4520 	t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
4521 
4522 	if (is_pm)
4523 		t += 12 * 60 * 60;
4524 
4525 	if (tzone)
4526 		*tzone = offset;
4527 
4528 	return t;
4529 }
4530 
4531 /**
4532  * camel_header_decode_date:
4533  * @str: input date string
4534  * @tz_offset: timezone offset
4535  *
4536  * Decodes the rfc822 date string and saves the GMT offset into
4537  * @tz_offset if non-NULL.
4538  *
4539  * Returns: the time_t representation of the date string specified by
4540  * @str or (time_t) 0 on error. If @tz_offset is non-NULL, the value
4541  * of the timezone offset will be stored.
4542  **/
4543 time_t
camel_header_decode_date(const gchar * str,gint * tz_offset)4544 camel_header_decode_date (const gchar *str,
4545                           gint *tz_offset)
4546 {
4547 	struct _date_token *token, *tokens;
4548 	time_t date;
4549 
4550 	if (!str || !(tokens = datetok (str))) {
4551 		if (tz_offset)
4552 			*tz_offset = 0;
4553 
4554 		return (time_t) 0;
4555 	}
4556 
4557 	if (!(date = parse_rfc822_date (tokens, tz_offset)))
4558 		date = parse_broken_date (tokens, tz_offset);
4559 
4560 	/* cleanup */
4561 	while (tokens) {
4562 		token = tokens;
4563 		tokens = tokens->next;
4564 		g_free (token);
4565 	}
4566 
4567 	return date;
4568 }
4569 
4570 gchar *
camel_header_location_decode(const gchar * in)4571 camel_header_location_decode (const gchar *in)
4572 {
4573 	gint quote = 0;
4574 	GString *out = g_string_new ("");
4575 	gchar c;
4576 
4577 	/* Sigh. RFC2557 says:
4578 	 *   content-location =   "Content-Location:" [CFWS] URI [CFWS]
4579 	 *      where URI is restricted to the syntax for URLs as
4580 	 *      defined in Uniform Resource Locators [URL] until
4581 	 *      IETF specifies other kinds of URIs.
4582 	 *
4583 	 * But Netscape puts quotes around the URI when sending web
4584 	 * pages.
4585 	 *
4586 	 * Which is required as defined in rfc2017 [3.1].  Although
4587 	 * outlook doesn't do this.
4588 	 *
4589 	 * Since we get headers already unfolded, we need just drop
4590 	 * all whitespace.  URL's cannot contain whitespace or quoted
4591 	 * characters, even when included in quotes.
4592 	 */
4593 
4594 	header_decode_lwsp (&in);
4595 	if (*in == '"') {
4596 		in++;
4597 		quote = 1;
4598 	}
4599 
4600 	while ((c = *in++)) {
4601 		if (quote && c == '"')
4602 			break;
4603 		if (!camel_mime_is_lwsp (c))
4604 			g_string_append_c (out, c);
4605 	}
4606 
4607 	return g_string_free (out, FALSE);
4608 }
4609 
4610 /**
4611  * camel_header_msgid_generate:
4612  * @domain: domain to use (like "example.com") for the ID suffix; can be NULL
4613  *
4614  * Either the @domain is used, or the user's local hostname,
4615  * in case it's NULL or empty.
4616  *
4617  * Returns: Unique message ID.
4618  **/
4619 gchar *
camel_header_msgid_generate(const gchar * domain)4620 camel_header_msgid_generate (const gchar *domain)
4621 {
4622 	static GMutex count_lock;
4623 #define LOOKUP_LOCK() g_mutex_lock (&count_lock)
4624 #define LOOKUP_UNLOCK() g_mutex_unlock (&count_lock)
4625 	static volatile gint counter = 0;
4626 	static gchar *cached_hostname = NULL;
4627 	struct addrinfo *ai = NULL;
4628 	GChecksum *checksum;
4629 	gchar *msgid;
4630 
4631 	LOOKUP_LOCK ();
4632 	if (!cached_hostname && (!domain || !*domain)) {
4633 		gchar host[MAXHOSTNAMELEN];
4634 		struct addrinfo hints = { 0 };
4635 		const gchar *name;
4636 		gint retval;
4637 
4638 		domain = NULL;
4639 
4640 		retval = gethostname (host, sizeof (host));
4641 		if (retval == 0 && *host) {
4642 			hints.ai_flags = AI_CANONNAME;
4643 			ai = camel_getaddrinfo (
4644 				host, NULL, &hints, NULL, NULL);
4645 			if (ai && ai->ai_canonname)
4646 				name = ai->ai_canonname;
4647 			else
4648 				name = host;
4649 		} else
4650 			name = "localhost.localdomain";
4651 
4652 		cached_hostname = g_strdup (name);
4653 	}
4654 
4655 	checksum = g_checksum_new (G_CHECKSUM_SHA1);
4656 
4657 	#define add_i64(_x) G_STMT_START { \
4658 		gint64 i64 = (_x); \
4659 		g_checksum_update (checksum, (const guchar *) &i64, sizeof (gint64)); \
4660 	} G_STMT_END
4661 
4662 	#define add_str(_x, _def) G_STMT_START { \
4663 		const gchar *str = (_x); \
4664 		if (!str) \
4665 			str = (_def); \
4666 		g_checksum_update (checksum, (const guchar *) str, strlen (str)); \
4667 	} G_STMT_END
4668 
4669 	add_i64 (g_get_monotonic_time ());
4670 	add_i64 (g_get_real_time ());
4671 	add_i64 (getpid ());
4672 	add_i64 (getgid ());
4673 	add_i64 (getppid ());
4674 	add_i64 (g_atomic_int_add (&counter, 1));
4675 
4676 	add_str (domain, "localhost");
4677 	add_str (cached_hostname, "localhost");
4678 	add_str (g_get_host_name (), "localhost");
4679 	add_str (g_get_user_name (), "user");
4680 	add_str (g_get_real_name (), "User");
4681 
4682 	#undef add_i64
4683 	#undef add_str
4684 
4685 	msgid = g_strdup_printf ("%s.camel@%s", g_checksum_get_string (checksum), domain ? domain : cached_hostname);
4686 
4687 	g_checksum_free (checksum);
4688 
4689 	LOOKUP_UNLOCK ();
4690 
4691 	if (ai)
4692 		camel_freeaddrinfo (ai);
4693 
4694 	return msgid;
4695 }
4696 
4697 static struct {
4698 	const gchar *name;
4699 	const gchar *pattern;
4700 	regex_t regex;
4701 } mail_list_magic[] = {
4702 	/* List-Post: <mailto:gnome-hackers@gnome.org> */
4703 	/* List-Post: <mailto:gnome-hackers> */
4704 	{ "List-Post", "[ \t]*<mailto:([^@>]+)@?([^ \n\t\r>]*)" },
4705 	/* List-Id: GNOME stuff <gnome-hackers.gnome.org> */
4706 	/* List-Id: <gnome-hackers.gnome.org> */
4707 	/* List-Id: <gnome-hackers> */
4708 	/* This old one wasn't very useful: { "List-Id", " *([^<]+)" },*/
4709 	{ "List-Id", "[^<]*<([^\\.>]+)\\.?([^ \n\t\r>]*)" },
4710 	/* Mailing-List: list gnome-hackers@gnome.org; contact gnome-hackers-owner@gnome.org */
4711 	{ "Mailing-List", "[ \t]*list ([^@]+)@?([^ \n\t\r>;]*)" },
4712 	/* Originator: gnome-hackers@gnome.org */
4713 	{ "Originator", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4714 	/* X-Mailing-List: <gnome-hackers@gnome.org> arcive/latest/100 */
4715 	/* X-Mailing-List: gnome-hackers@gnome.org */
4716 	/* X-Mailing-List: gnome-hackers */
4717 	/* X-Mailing-List: <gnome-hackers> */
4718 	{ "X-Mailing-List", "[ \t]*<?([^@>]+)@?([^ \n\t\r>]*)" },
4719 	/* X-Loop: gnome-hackers@gnome.org */
4720 	{ "X-Loop", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4721 	/* X-List: gnome-hackers */
4722 	/* X-List: gnome-hackers@gnome.org */
4723 	{ "X-List", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4724 	/* Sender: owner-gnome-hackers@gnome.org */
4725 	/* Sender: owner-gnome-hacekrs */
4726 	{ "Sender", "[ \t]*owner-([^@]+)@?([^ @\n\t\r>]*)" },
4727 	/* Sender: gnome-hackers-owner@gnome.org */
4728 	/* Sender: gnome-hackers-owner */
4729 	{ "Sender", "[ \t]*([^@]+)-owner@?([^ @\n\t\r>]*)" },
4730 	/* Delivered-To: mailing list gnome-hackers@gnome.org */
4731 	/* Delivered-To: mailing list gnome-hackers */
4732 	{ "Delivered-To", "[ \t]*mailing list ([^@]+)@?([^ \n\t\r>]*)" },
4733 	/* Sender: owner-gnome-hackers@gnome.org */
4734 	/* Sender: <owner-gnome-hackers@gnome.org> */
4735 	/* Sender: owner-gnome-hackers */
4736 	/* Sender: <owner-gnome-hackers> */
4737 	{ "Return-Path", "[ \t]*<?owner-([^@>]+)@?([^ \n\t\r>]*)" },
4738 	/* X-BeenThere: gnome-hackers@gnome.org */
4739 	/* X-BeenThere: gnome-hackers */
4740 	{ "X-BeenThere", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4741 	/* List-Unsubscribe:  <mailto:gnome-hackers-unsubscribe@gnome.org> */
4742 	{ "List-Unsubscribe", "<mailto:(.+)-unsubscribe@([^ \n\t\r>]*)" },
4743 };
4744 
4745 static gpointer
mailing_list_init(gpointer param)4746 mailing_list_init (gpointer param)
4747 {
4748 	gint i, errcode, failed = 0;
4749 
4750 	/* precompile regex's for speed at runtime */
4751 	for (i = 0; i < G_N_ELEMENTS (mail_list_magic); i++) {
4752 		errcode = regcomp (&mail_list_magic[i].regex, mail_list_magic[i].pattern, REG_EXTENDED | REG_ICASE);
4753 		if (errcode != 0) {
4754 			gchar *errstr;
4755 			gsize len;
4756 
4757 			len = regerror (errcode, &mail_list_magic[i].regex, NULL, 0);
4758 			errstr = g_malloc0 (len + 1);
4759 			regerror (errcode, &mail_list_magic[i].regex, errstr, len);
4760 
4761 			g_warning ("Internal error, compiling regex failed: %s: %s", mail_list_magic[i].pattern, errstr);
4762 			g_free (errstr);
4763 			failed++;
4764 		}
4765 	}
4766 
4767 	g_warn_if_fail (failed == 0);
4768 
4769 	return NULL;
4770 }
4771 
4772 /**
4773  * camel_headers_dup_mailing_list:
4774  * @headers: a #CamelNameValueArray with headers
4775  *
4776  * Searches for a mailing list information among known headers and returns
4777  * a newly allocated string with its value.
4778  *
4779  * Returns: (nullable) (transfer full): The mailing list header, or %NULL, if none is found
4780  **/
4781 gchar *
camel_headers_dup_mailing_list(const CamelNameValueArray * headers)4782 camel_headers_dup_mailing_list (const CamelNameValueArray *headers)
4783 {
4784 	static GOnce once = G_ONCE_INIT;
4785 	const gchar *v;
4786 	regmatch_t match[3];
4787 	gint i, j;
4788 
4789 	g_once (&once, mailing_list_init, NULL);
4790 
4791 	for (i = 0; i < G_N_ELEMENTS (mail_list_magic); i++) {
4792 		v = camel_name_value_array_get_named (headers, CAMEL_COMPARE_CASE_INSENSITIVE, mail_list_magic[i].name);
4793 		for (j = 0; j < 3; j++) {
4794 			match[j].rm_so = -1;
4795 			match[j].rm_eo = -1;
4796 		}
4797 		if (v != NULL && regexec (&mail_list_magic[i].regex, v, 3, match, 0) == 0 && match[1].rm_so != -1) {
4798 			gint len1, len2;
4799 			gchar *mlist;
4800 
4801 			len1 = match[1].rm_eo - match[1].rm_so;
4802 			len2 = match[2].rm_eo - match[2].rm_so;
4803 
4804 			mlist = g_malloc (len1 + len2 + 2);
4805 			memcpy (mlist, v + match[1].rm_so, len1);
4806 			if (len2) {
4807 				mlist[len1] = '@';
4808 				memcpy (mlist + len1 + 1, v + match[2].rm_so, len2);
4809 				mlist[len1 + len2 + 1] = '\0';
4810 			} else {
4811 				mlist[len1] = '\0';
4812 			}
4813 
4814 			return mlist;
4815 		}
4816 	}
4817 
4818 	return NULL;
4819 }
4820 
4821 /* ok, here's the address stuff, what a mess ... */
4822 CamelHeaderAddress *
camel_header_address_new(void)4823 camel_header_address_new (void)
4824 {
4825 	CamelHeaderAddress *h;
4826 	h = g_malloc0 (sizeof (*h));
4827 	h->type = CAMEL_HEADER_ADDRESS_NONE;
4828 	h->refcount = 1;
4829 	return h;
4830 }
4831 
4832 CamelHeaderAddress *
camel_header_address_new_name(const gchar * name,const gchar * addr)4833 camel_header_address_new_name (const gchar *name,
4834                                const gchar *addr)
4835 {
4836 	CamelHeaderAddress *h;
4837 	h = camel_header_address_new ();
4838 	h->type = CAMEL_HEADER_ADDRESS_NAME;
4839 	h->name = g_strdup (name);
4840 	h->v.addr = g_strdup (addr);
4841 	return h;
4842 }
4843 
4844 CamelHeaderAddress *
camel_header_address_new_group(const gchar * name)4845 camel_header_address_new_group (const gchar *name)
4846 {
4847 	CamelHeaderAddress *h;
4848 
4849 	h = camel_header_address_new ();
4850 	h->type = CAMEL_HEADER_ADDRESS_GROUP;
4851 	h->name = g_strdup (name);
4852 	return h;
4853 }
4854 
4855 CamelHeaderAddress *
camel_header_address_ref(CamelHeaderAddress * addrlist)4856 camel_header_address_ref (CamelHeaderAddress *addrlist)
4857 {
4858 	if (addrlist)
4859 		addrlist->refcount++;
4860 
4861 	return addrlist;
4862 }
4863 
4864 void
camel_header_address_unref(CamelHeaderAddress * addrlist)4865 camel_header_address_unref (CamelHeaderAddress *addrlist)
4866 {
4867 	if (addrlist) {
4868 		if (addrlist->refcount <= 1) {
4869 			if (addrlist->type == CAMEL_HEADER_ADDRESS_GROUP) {
4870 				camel_header_address_list_clear (&addrlist->v.members);
4871 			} else if (addrlist->type == CAMEL_HEADER_ADDRESS_NAME) {
4872 				g_free (addrlist->v.addr);
4873 			}
4874 			g_free (addrlist->name);
4875 			g_free (addrlist);
4876 		} else {
4877 			addrlist->refcount--;
4878 		}
4879 	}
4880 }
4881 
4882 void
camel_header_address_set_name(CamelHeaderAddress * addrlist,const gchar * name)4883 camel_header_address_set_name (CamelHeaderAddress *addrlist,
4884                                const gchar *name)
4885 {
4886 	if (addrlist) {
4887 		g_free (addrlist->name);
4888 		addrlist->name = g_strdup (name);
4889 	}
4890 }
4891 
4892 void
camel_header_address_set_addr(CamelHeaderAddress * addrlist,const gchar * addr)4893 camel_header_address_set_addr (CamelHeaderAddress *addrlist,
4894                                const gchar *addr)
4895 {
4896 	if (addrlist) {
4897 		if (addrlist->type == CAMEL_HEADER_ADDRESS_NAME
4898 		    || addrlist->type == CAMEL_HEADER_ADDRESS_NONE) {
4899 			addrlist->type = CAMEL_HEADER_ADDRESS_NAME;
4900 			g_free (addrlist->v.addr);
4901 			addrlist->v.addr = g_strdup (addr);
4902 		} else {
4903 			g_warning ("Trying to set the address on a group");
4904 		}
4905 	}
4906 }
4907 
4908 /**
4909  * camel_header_address_set_members:
4910  * @addrlist: a #CamelHeaderAddress object
4911  * @group: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress
4912  *
4913  * TODO: Document me.
4914  *
4915  **/
4916 void
camel_header_address_set_members(CamelHeaderAddress * addrlist,CamelHeaderAddress * group)4917 camel_header_address_set_members (CamelHeaderAddress *addrlist,
4918                                   CamelHeaderAddress *group)
4919 {
4920 	if (addrlist) {
4921 		if (addrlist->type == CAMEL_HEADER_ADDRESS_GROUP
4922 		    || addrlist->type == CAMEL_HEADER_ADDRESS_NONE) {
4923 			addrlist->type = CAMEL_HEADER_ADDRESS_GROUP;
4924 			camel_header_address_list_clear (&addrlist->v.members);
4925 			/* should this ref them? */
4926 			addrlist->v.members = group;
4927 		} else {
4928 			g_warning ("Trying to set the members on a name, not group");
4929 		}
4930 	}
4931 }
4932 
4933 void
camel_header_address_add_member(CamelHeaderAddress * addrlist,CamelHeaderAddress * member)4934 camel_header_address_add_member (CamelHeaderAddress *addrlist,
4935                                  CamelHeaderAddress *member)
4936 {
4937 	if (addrlist) {
4938 		if (addrlist->type == CAMEL_HEADER_ADDRESS_GROUP
4939 		    || addrlist->type == CAMEL_HEADER_ADDRESS_NONE) {
4940 			addrlist->type = CAMEL_HEADER_ADDRESS_GROUP;
4941 			camel_header_address_list_append (&addrlist->v.members, member);
4942 		}
4943 	}
4944 }
4945 
4946 /**
4947  * camel_header_address_list_append_list:
4948  * @addrlistp: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
4949  * @addrs: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress to add
4950  *
4951  * TODO: Document me.
4952  *
4953  **/
4954 void
camel_header_address_list_append_list(CamelHeaderAddress ** addrlistp,CamelHeaderAddress ** addrs)4955 camel_header_address_list_append_list (CamelHeaderAddress **addrlistp,
4956                                        CamelHeaderAddress **addrs)
4957 {
4958 	if (addrlistp) {
4959 		CamelHeaderAddress *n = (CamelHeaderAddress *) addrlistp;
4960 
4961 		while (n->next)
4962 			n = n->next;
4963 		n->next = *addrs;
4964 	}
4965 }
4966 
4967 /**
4968  * camel_header_address_list_append:
4969  * @addrlistp: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
4970  * @addr: the #CamelHeaderAddress to add
4971  *
4972  * TODO: Document me.
4973  *
4974  **/
4975 void
camel_header_address_list_append(CamelHeaderAddress ** addrlistp,CamelHeaderAddress * addr)4976 camel_header_address_list_append (CamelHeaderAddress **addrlistp,
4977                                   CamelHeaderAddress *addr)
4978 {
4979 	if (addr) {
4980 		camel_header_address_list_append_list (addrlistp, &addr);
4981 		addr->next = NULL;
4982 	}
4983 }
4984 
4985 /**
4986  * camel_header_address_list_clear:
4987  * @addrlistp: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
4988  *
4989  * TODO: Document me.
4990  *
4991  **/
4992 void
camel_header_address_list_clear(CamelHeaderAddress ** addrlistp)4993 camel_header_address_list_clear (CamelHeaderAddress **addrlistp)
4994 {
4995 	CamelHeaderAddress *a, *n;
4996 	a = *addrlistp;
4997 	while (a) {
4998 		n = a->next;
4999 		camel_header_address_unref (a);
5000 		a = n;
5001 	}
5002 	*addrlistp = NULL;
5003 }
5004 
5005 static gchar *
maybe_quote_name(const gchar * name,gboolean * out_free_result)5006 maybe_quote_name (const gchar *name,
5007 		  gboolean *out_free_result)
5008 {
5009 	if (out_free_result)
5010 		*out_free_result = FALSE;
5011 
5012 	if (name && *name && (strchr (name, ',') || strchr (name, ';') || strchr (name, '\"') || strchr (name, '<') || strchr (name, '>'))) {
5013 		GString *quoted;
5014 
5015 		if (out_free_result)
5016 			*out_free_result = TRUE;
5017 
5018 		quoted = g_string_sized_new (strlen (name) + 2);
5019 		g_string_append_c (quoted, '\"');
5020 
5021 		while (*name) {
5022 			if (*name != '\"')
5023 				g_string_append_c (quoted, *name);
5024 			name++;
5025 		}
5026 
5027 		g_string_append_c (quoted, '\"');
5028 
5029 		return g_string_free (quoted, FALSE);
5030 	}
5031 
5032 	return (gchar *) name;
5033 }
5034 
5035 /* if encode is true, then the result is suitable for mailing, otherwise
5036  * the result is suitable for display only (and may not even be re-parsable) */
5037 static void
header_address_list_encode_append(GString * out,gint encode,CamelHeaderAddress * a)5038 header_address_list_encode_append (GString *out,
5039                                    gint encode,
5040                                    CamelHeaderAddress *a)
5041 {
5042 	while (a) {
5043 		gchar *text = NULL;
5044 		gboolean free_text = FALSE;
5045 
5046 		switch (a->type) {
5047 		case CAMEL_HEADER_ADDRESS_NAME:
5048 			if (encode)
5049 				text = camel_header_encode_phrase ((guchar *) a->name);
5050 			else
5051 				text = maybe_quote_name (a->name, &free_text);
5052 			if (text && *text)
5053 				g_string_append_printf (out, "%s <%s>", text, a->v.addr);
5054 			else
5055 				g_string_append (out, a->v.addr);
5056 			if (encode)
5057 				g_free (text);
5058 			break;
5059 		case CAMEL_HEADER_ADDRESS_GROUP:
5060 			if (encode)
5061 				text = camel_header_encode_phrase ((guchar *) a->name);
5062 			else
5063 				text = maybe_quote_name (a->name, &free_text);
5064 			g_string_append_printf (out, "%s: ", text);
5065 			header_address_list_encode_append (out, encode, a->v.members);
5066 			g_string_append_printf (out, ";");
5067 			if (encode)
5068 				g_free (text);
5069 			break;
5070 		default:
5071 			g_warning ("Invalid address type");
5072 			break;
5073 		}
5074 
5075 		a = a->next;
5076 		if (a)
5077 			g_string_append (out, ", ");
5078 
5079 		if (free_text)
5080 			g_free (text);
5081 	}
5082 }
5083 
5084 /**
5085  * camel_header_address_list_encode:
5086  * @addrlist: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
5087  *
5088  * TODO: Document me.
5089  *
5090  **/
5091 gchar *
camel_header_address_list_encode(CamelHeaderAddress * addrlist)5092 camel_header_address_list_encode (CamelHeaderAddress *addrlist)
5093 {
5094 	GString *out;
5095 
5096 	if (!addrlist)
5097 		return NULL;
5098 
5099 	out = g_string_new ("");
5100 	header_address_list_encode_append (out, TRUE, addrlist);
5101 	return g_string_free (out, FALSE);
5102 }
5103 
5104 /**
5105  * camel_header_address_list_format:
5106  * @addrlist: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
5107  *
5108  * TODO: Document me.
5109  *
5110  **/
5111 gchar *
camel_header_address_list_format(CamelHeaderAddress * addrlist)5112 camel_header_address_list_format (CamelHeaderAddress *addrlist)
5113 {
5114 	GString *out;
5115 
5116 	if (!addrlist)
5117 		return NULL;
5118 
5119 	out = g_string_new ("");
5120 
5121 	header_address_list_encode_append (out, FALSE, addrlist);
5122 
5123 	return g_string_free (out, FALSE);
5124 }
5125 
5126 gchar *
camel_header_address_fold(const gchar * in,gsize headerlen)5127 camel_header_address_fold (const gchar *in,
5128                            gsize headerlen)
5129 {
5130 	gsize len, outlen;
5131 	const gchar *inptr = in, *space, *p, *n;
5132 	GString *out;
5133 	gint i, needunfold = FALSE;
5134 
5135 	if (in == NULL)
5136 		return NULL;
5137 
5138 	/* first, check to see if we even need to fold */
5139 	len = headerlen + 2;
5140 	p = in;
5141 	while (*p) {
5142 		n = strchr (p, '\n');
5143 		if (n == NULL) {
5144 			len += strlen (p);
5145 			break;
5146 		}
5147 
5148 		needunfold = TRUE;
5149 		len += n - p;
5150 
5151 		if (len >= CAMEL_FOLD_SIZE)
5152 			break;
5153 		len = 0;
5154 		p = n + 1;
5155 	}
5156 	if (len < CAMEL_FOLD_SIZE)
5157 		return g_strdup (in);
5158 
5159 	/* we need to fold, so first unfold (if we need to), then process */
5160 	if (needunfold)
5161 		inptr = in = camel_header_unfold (in);
5162 
5163 	out = g_string_new ("");
5164 	outlen = headerlen + 2;
5165 	while (*inptr) {
5166 		space = strchr (inptr, ' ');
5167 		if (space) {
5168 			len = space - inptr + 1;
5169 		} else {
5170 			len = strlen (inptr);
5171 		}
5172 
5173 		d (printf ("next word '%.*s'\n", len, inptr));
5174 
5175 		if (outlen + len > CAMEL_FOLD_SIZE) {
5176 			d (printf ("outlen = %d wordlen = %d\n", outlen, len));
5177 			/* strip trailing space */
5178 			if (out->len > 0 && out->str[out->len - 1] == ' ')
5179 				g_string_truncate (out, out->len - 1);
5180 			g_string_append (out, "\n\t");
5181 			outlen = 1;
5182 		}
5183 
5184 		outlen += len;
5185 		for (i = 0; i < len; i++) {
5186 			g_string_append_c (out, inptr[i]);
5187 		}
5188 
5189 		inptr += len;
5190 	}
5191 	if (needunfold)
5192 		g_free ((gchar *) in);
5193 
5194 	return g_string_free (out, FALSE);
5195 }
5196 
5197 /* simple header folding */
5198 /* will work even if the header is already folded */
5199 gchar *
camel_header_fold(const gchar * in,gsize headerlen)5200 camel_header_fold (const gchar *in,
5201                    gsize headerlen)
5202 {
5203 	gsize len, outlen, tmplen;
5204 	const gchar *inptr = in, *space, *p, *n;
5205 	GString *out;
5206 	gint needunfold = FALSE;
5207 	gchar spc;
5208 
5209 	if (in == NULL)
5210 		return NULL;
5211 
5212 	/* first, check to see if we even need to fold */
5213 	len = headerlen + 2;
5214 	p = in;
5215 	while (*p) {
5216 		n = strchr (p, '\n');
5217 		if (n == NULL) {
5218 			len += strlen (p);
5219 			break;
5220 		}
5221 
5222 		needunfold = TRUE;
5223 		len += n - p;
5224 
5225 		if (len >= CAMEL_FOLD_SIZE)
5226 			break;
5227 		len = 0;
5228 		p = n + 1;
5229 	}
5230 	if (len < CAMEL_FOLD_SIZE)
5231 		return g_strdup (in);
5232 
5233 	/* we need to fold, so first unfold (if we need to), then process */
5234 	if (needunfold)
5235 		inptr = in = camel_header_unfold (in);
5236 
5237 	out = g_string_new ("");
5238 	outlen = headerlen + 2;
5239 	while (*inptr) {
5240 		space = inptr;
5241 		while (*space && *space != ' ' && *space != '\t')
5242 			space++;
5243 
5244 		if (*space)
5245 			len = space - inptr + 1;
5246 		else
5247 			len = space - inptr;
5248 
5249 		d (printf ("next word '%.*s'\n", len, inptr));
5250 		if (outlen + len > CAMEL_FOLD_SIZE) {
5251 			d (printf ("outlen = %d wordlen = %d\n", outlen, len));
5252 			/* strip trailing space */
5253 			if (out->len > 0 && (out->str[out->len - 1] == ' ' || out->str[out->len - 1] == '\t')) {
5254 				spc = out->str[out->len - 1];
5255 				g_string_truncate (out, out->len - 1);
5256 				g_string_append_c (out, '\n');
5257 				g_string_append_c (out, spc);
5258 				outlen = 1;
5259 			}
5260 
5261 			/* check for very long words, just cut them up */
5262 			while (outlen + len > CAMEL_FOLD_MAX_SIZE) {
5263 				tmplen = CAMEL_FOLD_MAX_SIZE - outlen;
5264 				g_string_append_len (out, inptr, tmplen);
5265 				g_string_append (out, "\n\t");
5266 				inptr += tmplen;
5267 				len -= tmplen;
5268 				outlen = 1;
5269 			}
5270 		}
5271 
5272 		g_string_append_len (out, inptr, len);
5273 		outlen += len;
5274 		inptr += len;
5275 	}
5276 	if (needunfold)
5277 		g_free ((gchar *) in);
5278 
5279 	return g_string_free (out, FALSE);
5280 }
5281 
5282 gchar *
camel_header_unfold(const gchar * in)5283 camel_header_unfold (const gchar *in)
5284 {
5285 	const gchar *inptr = in;
5286 	gchar c, *o, *out;
5287 
5288 	if (in == NULL)
5289 		return NULL;
5290 
5291 	out = g_malloc (strlen (in) + 1);
5292 
5293 	o = out;
5294 	while ((c = *inptr++)) {
5295 		if (c == '\n') {
5296 			if (camel_mime_is_lwsp (*inptr)) {
5297 				do {
5298 					inptr++;
5299 				} while (camel_mime_is_lwsp (*inptr));
5300 				*o++ = ' ';
5301 			} else {
5302 				*o++ = c;
5303 			}
5304 		} else {
5305 			*o++ = c;
5306 		}
5307 	}
5308 	*o = 0;
5309 
5310 	return out;
5311 }
5312