1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
4 *
5 * This library is free software: you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this library. If not, see <http://www.gnu.org/licenses/>.
16 *
17 * Authors: Michael Zucchi <notzed@ximian.com>
18 * Jeffrey Stedfast <fejj@ximian.com>
19 */
20
21 #include "evolution-data-server-config.h"
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <sys/param.h> /* for MAXHOSTNAMELEN */
28 #include <sys/stat.h>
29 #include <unistd.h>
30 #include <regex.h>
31 #include <fcntl.h>
32 #include <errno.h>
33 #include <ctype.h>
34 #include <time.h>
35
36 #ifndef MAXHOSTNAMELEN
37 #define MAXHOSTNAMELEN 1024
38 #endif
39
40 #include "camel-charset-map.h"
41 #include "camel-iconv.h"
42 #include "camel-mime-utils.h"
43 #include "camel-net-utils.h"
44 #include "camel-string-utils.h"
45 #ifdef G_OS_WIN32
46 #include <winsock2.h>
47 #include <ws2tcpip.h>
48 #ifdef HAVE_WSPIAPI_H
49 #include <wspiapi.h>
50 #endif
51 #endif
52 #include "camel-utf8.h"
53
54 #ifdef G_OS_WIN32
55 #ifdef gmtime_r
56 #undef gmtime_r
57 #endif
58
59 /* The gmtime() in Microsoft's C library is MT-safe */
60 #define gmtime_r(tp,tmp) (gmtime(tp)?(*(tmp)=*gmtime(tp),(tmp)):0)
61 #endif
62
63 #if !defined HAVE_LOCALTIME_R && !defined localtime_r
64 # ifdef _LIBC
65 # define localtime_r __localtime_r
66 # else
67 /* Approximate localtime_r as best we can in its absence. */
68 # define localtime_r my_localtime_r
69 static struct tm *localtime_r (const time_t *, struct tm *);
70 static struct tm *
localtime_r(t,tp)71 localtime_r (t,
72 tp)
73 const time_t *t;
74 struct tm *tp;
75 {
76 struct tm *l = localtime (t);
77 if (!l)
78 return 0;
79 *tp = *l;
80 return tp;
81 }
82 # endif /* !_LIBC */
83 #endif /* HAVE_LOCALTIME_R && !defined (localtime_r) */
84
85 /* for all non-essential warnings ... */
86 #define w(x)
87
88 #define d(x)
89 #define d2(x)
90
G_DEFINE_BOXED_TYPE(CamelContentType,camel_content_type,camel_content_type_ref,camel_content_type_unref)91 G_DEFINE_BOXED_TYPE (CamelContentType,
92 camel_content_type,
93 camel_content_type_ref,
94 camel_content_type_unref)
95
96 G_DEFINE_BOXED_TYPE (CamelContentDisposition,
97 camel_content_disposition,
98 camel_content_disposition_ref,
99 camel_content_disposition_unref)
100
101 G_DEFINE_BOXED_TYPE (CamelHeaderAddress,
102 camel_header_address,
103 camel_header_address_ref,
104 camel_header_address_unref)
105
106 /**
107 * camel_mktime_utc:
108 * @tm: the #tm to convert to a calendar time representation
109 *
110 * Like mktime(3), but assumes UTC instead of local timezone.
111 *
112 * Returns: the calendar time representation of @tm
113 *
114 * Since: 3.4
115 **/
116 time_t
117 camel_mktime_utc (struct tm *tm)
118 {
119 time_t tt;
120
121 tm->tm_isdst = -1;
122 tt = mktime (tm);
123
124 #if defined (HAVE_TM_GMTOFF)
125 tt += tm->tm_gmtoff;
126 #elif defined (HAVE_TIMEZONE)
127 if (tm->tm_isdst > 0) {
128 #if defined (HAVE_ALTZONE)
129 tt -= altzone;
130 #else
131 tt -= (timezone - 3600);
132 #endif
133 } else
134 tt -= timezone;
135 #endif
136
137 return tt;
138 }
139
140 /**
141 * camel_localtime_with_offset:
142 * @tt: the #time_t to convert
143 * @tm: the #tm to store the result in
144 * @offset: the #gint to store the offset in
145 *
146 * Converts the calendar time representation @tt to a broken-down
147 * time representation, stored in @tm, and provides the offset in
148 * seconds from UTC time, stored in @offset.
149 **/
150 void
camel_localtime_with_offset(time_t tt,struct tm * tm,gint * offset)151 camel_localtime_with_offset (time_t tt,
152 struct tm *tm,
153 gint *offset)
154 {
155 localtime_r (&tt, tm);
156
157 #if defined (HAVE_TM_GMTOFF)
158 *offset = tm->tm_gmtoff;
159 #elif defined (HAVE_TIMEZONE)
160 if (tm->tm_isdst > 0) {
161 #if defined (HAVE_ALTZONE)
162 *offset = -altzone;
163 #else
164 *offset = -(timezone - 3600);
165 #endif
166 } else
167 *offset = -timezone;
168 #endif
169 }
170
171 #define CAMEL_UUENCODE_CHAR(c) ((c) ? (c) + ' ' : '`')
172 #define CAMEL_UUDECODE_CHAR(c) (((c) - ' ') & 077)
173
174 static const guchar tohex[16] = {
175 '0', '1', '2', '3', '4', '5', '6', '7',
176 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
177 };
178
179 /**
180 * camel_uuencode_close:
181 * @in: (array length=len): input stream
182 * @len: input stream length
183 * @out: (inout) (array): output stream
184 * @uubuf: (inout) (array fixed-size=60): temporary buffer of 60 bytes
185 * @state: (inout): holds the number of bits that are stored in @save
186 * @save: (inout) (array length=state): leftover bits that have not yet been encoded
187 *
188 * Uuencodes a chunk of data. Call this when finished encoding data
189 * with camel_uuencode_step() to flush off the last little bit.
190 *
191 * Returns: the number of bytes encoded
192 **/
193 gsize
camel_uuencode_close(guchar * in,gsize len,guchar * out,guchar * uubuf,gint * state,guint32 * save)194 camel_uuencode_close (guchar *in,
195 gsize len,
196 guchar *out,
197 guchar *uubuf,
198 gint *state,
199 guint32 *save)
200 {
201 register guchar *outptr, *bufptr;
202 register guint32 saved;
203 gint uulen, uufill, i;
204
205 outptr = out;
206
207 if (len > 0)
208 outptr += camel_uuencode_step (in, len, out, uubuf, state, save);
209
210 uufill = 0;
211
212 saved = *save;
213 i = *state & 0xff;
214 uulen = (*state >> 8) & 0xff;
215
216 bufptr = uubuf + ((uulen / 3) * 4);
217
218 if (i > 0) {
219 while (i < 3) {
220 saved <<= 8;
221 uufill++;
222 i++;
223 }
224
225 if (i == 3) {
226 /* convert 3 normal bytes into 4 uuencoded bytes */
227 guchar b0, b1, b2;
228
229 b0 = (saved >> 16) & 0xff;
230 b1 = (saved >> 8) & 0xff;
231 b2 = saved & 0xff;
232
233 *bufptr++ = CAMEL_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
234 *bufptr++ = CAMEL_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
235 *bufptr++ = CAMEL_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
236 *bufptr++ = CAMEL_UUENCODE_CHAR (b2 & 0x3f);
237
238 i = 0;
239 saved = 0;
240 uulen += 3;
241 }
242 }
243
244 if (uulen > 0) {
245 gint cplen = ((uulen / 3) * 4);
246
247 *outptr++ = CAMEL_UUENCODE_CHAR ((uulen - uufill) & 0xff);
248 memcpy (outptr, uubuf, cplen);
249 outptr += cplen;
250 *outptr++ = '\n';
251 uulen = 0;
252 }
253
254 *outptr++ = CAMEL_UUENCODE_CHAR (uulen & 0xff);
255 *outptr++ = '\n';
256
257 *save = 0;
258 *state = 0;
259
260 return outptr - out;
261 }
262
263 /**
264 * camel_uuencode_step:
265 * @in: (array length=len): input stream
266 * @len: input stream length
267 * @out: (inout) (array): output stream
268 * @uubuf: (inout) (array fixed-size=60): temporary buffer of 60 bytes
269 * @state: (inout): holds the number of bits that are stored in @save
270 * @save: (inout) (array length=state): leftover bits that have not yet been encoded
271 *
272 * Uuencodes a chunk of data. Performs an 'encode step', only encodes
273 * blocks of 45 characters to the output at a time, saves left-over
274 * state in @uubuf, @state and @save (initialize to 0 on first
275 * invocation).
276 *
277 * Returns: the number of bytes encoded
278 **/
279 gsize
camel_uuencode_step(guchar * in,gsize len,guchar * out,guchar * uubuf,gint * state,guint32 * save)280 camel_uuencode_step (guchar *in,
281 gsize len,
282 guchar *out,
283 guchar *uubuf,
284 gint *state,
285 guint32 *save)
286 {
287 register guchar *inptr, *outptr, *bufptr;
288 guchar b0, b1, b2, *inend;
289 register guint32 saved;
290 gint uulen, i;
291
292 if (len == 0)
293 return 0;
294
295 inend = in + len;
296 outptr = out;
297 inptr = in;
298
299 saved = *save;
300 i = *state & 0xff;
301 uulen = (*state >> 8) & 0xff;
302
303 if ((len + uulen) < 45) {
304 /* not enough input to write a full uuencoded line */
305 bufptr = uubuf + ((uulen / 3) * 4);
306 } else {
307 bufptr = outptr + 1;
308
309 if (uulen > 0) {
310 /* copy the previous call's tmpbuf to outbuf */
311 memcpy (bufptr, uubuf, ((uulen / 3) * 4));
312 bufptr += ((uulen / 3) * 4);
313 }
314 }
315
316 if (i == 2) {
317 b0 = (saved >> 8) & 0xff;
318 b1 = saved & 0xff;
319 saved = 0;
320 i = 0;
321
322 goto skip2;
323 } else if (i == 1) {
324 if ((inptr + 2) < inend) {
325 b0 = saved & 0xff;
326 saved = 0;
327 i = 0;
328
329 goto skip1;
330 }
331
332 while (inptr < inend) {
333 saved = (saved << 8) | *inptr++;
334 i++;
335 }
336 }
337
338 while (inptr < inend) {
339 while (uulen < 45 && (inptr + 3) <= inend) {
340 b0 = *inptr++;
341 skip1:
342 b1 = *inptr++;
343 skip2:
344 b2 = *inptr++;
345
346 /* convert 3 normal bytes into 4 uuencoded bytes */
347 *bufptr++ = CAMEL_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
348 *bufptr++ = CAMEL_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
349 *bufptr++ = CAMEL_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
350 *bufptr++ = CAMEL_UUENCODE_CHAR (b2 & 0x3f);
351
352 uulen += 3;
353 }
354
355 if (uulen >= 45) {
356 *outptr++ = CAMEL_UUENCODE_CHAR (uulen & 0xff);
357 outptr += ((45 / 3) * 4) + 1;
358
359 *outptr++ = '\n';
360 uulen = 0;
361
362 if ((inptr + 45) <= inend) {
363 /* we have enough input to output another full line */
364 bufptr = outptr + 1;
365 } else {
366 bufptr = uubuf;
367 }
368 } else {
369 /* not enough input to continue... */
370 for (i = 0, saved = 0; inptr < inend; i++)
371 saved = (saved << 8) | *inptr++;
372 }
373 }
374
375 *save = saved;
376 *state = ((uulen & 0xff) << 8) | (i & 0xff);
377
378 return outptr - out;
379 }
380
381 /**
382 * camel_uudecode_step:
383 * @in: (array length=inlen): input stream
384 * @inlen: max length of data to decode
385 * @out: (inout) (array): output stream
386 * @state: (inout): holds the number of bits that are stored in @save
387 * @save: (inout) (array length=state): leftover bits that have not yet been decoded
388 *
389 * Uudecodes a chunk of data. Performs a 'decode step' on a chunk of
390 * uuencoded data. Assumes the "begin mode filename" line has
391 * been stripped off.
392 *
393 * Returns: the number of bytes decoded
394 **/
395 gsize
camel_uudecode_step(guchar * in,gsize len,guchar * out,gint * state,guint32 * save)396 camel_uudecode_step (guchar *in,
397 gsize len,
398 guchar *out,
399 gint *state,
400 guint32 *save)
401 {
402 register guchar *inptr, *outptr;
403 guchar *inend, ch;
404 register guint32 saved;
405 gboolean last_was_eoln;
406 gint uulen, i;
407
408 if (*state & CAMEL_UUDECODE_STATE_END)
409 return 0;
410
411 saved = *save;
412 i = *state & 0xff;
413 uulen = (*state >> 8) & 0xff;
414 if (uulen == 0)
415 last_was_eoln = TRUE;
416 else
417 last_was_eoln = FALSE;
418
419 inend = in + len;
420 outptr = out;
421 inptr = in;
422
423 while (inptr < inend) {
424 if (*inptr == '\n') {
425 last_was_eoln = TRUE;
426
427 inptr++;
428 continue;
429 } else if (!uulen || last_was_eoln) {
430 /* first octet on a line is the uulen octet */
431 uulen = CAMEL_UUDECODE_CHAR (*inptr);
432 last_was_eoln = FALSE;
433 if (uulen == 0) {
434 *state |= CAMEL_UUDECODE_STATE_END;
435 break;
436 }
437
438 inptr++;
439 continue;
440 }
441
442 ch = *inptr++;
443
444 if (uulen > 0) {
445 /* save the byte */
446 saved = (saved << 8) | ch;
447 i++;
448 if (i == 4) {
449 /* convert 4 uuencoded bytes to 3 normal bytes */
450 guchar b0, b1, b2, b3;
451
452 b0 = saved >> 24;
453 b1 = saved >> 16 & 0xff;
454 b2 = saved >> 8 & 0xff;
455 b3 = saved & 0xff;
456
457 if (uulen >= 3) {
458 *outptr++ = CAMEL_UUDECODE_CHAR (b0) << 2 | CAMEL_UUDECODE_CHAR (b1) >> 4;
459 *outptr++ = CAMEL_UUDECODE_CHAR (b1) << 4 | CAMEL_UUDECODE_CHAR (b2) >> 2;
460 *outptr++ = CAMEL_UUDECODE_CHAR (b2) << 6 | CAMEL_UUDECODE_CHAR (b3);
461 uulen -= 3;
462 } else {
463 gint orig_uulen = uulen;
464
465 if (orig_uulen >= 1) {
466 *outptr++ = CAMEL_UUDECODE_CHAR (b0) << 2 | CAMEL_UUDECODE_CHAR (b1) >> 4;
467 uulen--;
468 }
469
470 if (orig_uulen >= 2) {
471 *outptr++ = CAMEL_UUDECODE_CHAR (b1) << 4 | CAMEL_UUDECODE_CHAR (b2) >> 2;
472 uulen--;
473 }
474 }
475
476 i = 0;
477 saved = 0;
478 }
479 } else {
480 break;
481 }
482 }
483
484 *save = saved;
485 *state = (*state & CAMEL_UUDECODE_STATE_MASK) | ((uulen & 0xff) << 8) | (i & 0xff);
486
487 return outptr - out;
488 }
489
490 /**
491 * camel_quoted_encode_close:
492 * @in: (array length=len): input stream
493 * @len: length of the input
494 * @out: (inout) (array): output string
495 * @state: (inout): holds the number of bits that are stored in @save
496 * @save: (inout) (array length=state): leftover bits that have not yet been encoded
497 *
498 * Quoted-printable encodes a block of text. Call this when finished
499 * encoding data with camel_quoted_encode_step() to flush off
500 * the last little bit.
501 *
502 * Returns: the number of bytes encoded
503 **/
504 gsize
camel_quoted_encode_close(guchar * in,gsize len,guchar * out,gint * state,gint * save)505 camel_quoted_encode_close (guchar *in,
506 gsize len,
507 guchar *out,
508 gint *state,
509 gint *save)
510 {
511 register guchar *outptr = out;
512 gint last;
513
514 if (len > 0)
515 outptr += camel_quoted_encode_step (in, len, outptr, state, save);
516
517 last = *state;
518 if (last != -1) {
519 /* space/tab must be encoded if it's the last character on
520 * the line */
521 if (camel_mime_is_qpsafe (last) && last != ' ' && last != 9) {
522 *outptr++ = last;
523 } else {
524 *outptr++ = '=';
525 *outptr++ = tohex[(last>>4) & 0xf];
526 *outptr++ = tohex[last & 0xf];
527 }
528 }
529
530 *save = 0;
531 *state = -1;
532
533 return outptr - out;
534 }
535
536 /**
537 * camel_quoted_encode_step:
538 * @in: (array length=len): input stream
539 * @len: length of the input
540 * @out: (inout) (array): output string
541 * @state: (inout): holds the number of bits that are stored in @save
542 * @save: (inout) (array length=state): leftover bits that have not yet been encoded
543 *
544 * Quoted-printable encodes a block of text. Performs an 'encode
545 * step', saves left-over state in state and save (initialise to -1 on
546 * first invocation).
547 *
548 * Returns: the number of bytes encoded
549 **/
550 gsize
camel_quoted_encode_step(guchar * in,gsize len,guchar * out,gint * statep,gint * save)551 camel_quoted_encode_step (guchar *in,
552 gsize len,
553 guchar *out,
554 gint *statep,
555 gint *save)
556 {
557 register guchar *inptr, *outptr, *inend;
558 guchar c;
559 register gint sofar = *save; /* keeps track of how many chars on a line */
560 register gint last = *statep; /* keeps track if last gchar to end was a space cr etc */
561
562 #define output_last() \
563 if (sofar + 3 > 74) { \
564 *outptr++ = '='; \
565 *outptr++ = '\n'; \
566 sofar = 0; \
567 } \
568 *outptr++ = '='; \
569 *outptr++ = tohex[(last >> 4) & 0xf]; \
570 *outptr++ = tohex[last & 0xf]; \
571 sofar += 3;
572
573 inptr = in;
574 inend = in + len;
575 outptr = out;
576 while (inptr < inend) {
577 c = *inptr++;
578 if (c == '\r') {
579 if (last != -1) {
580 output_last ();
581 }
582 last = c;
583 } else if (c == '\n') {
584 if (last != -1 && last != '\r') {
585 output_last ();
586 }
587 *outptr++ = '\n';
588 sofar = 0;
589 last = -1;
590 } else {
591 if (last != -1) {
592 if (camel_mime_is_qpsafe (last)) {
593 *outptr++ = last;
594 sofar++;
595 } else {
596 output_last ();
597 }
598 }
599
600 if (camel_mime_is_qpsafe (c)) {
601 if (sofar > 74) {
602 *outptr++ = '=';
603 *outptr++ = '\n';
604 sofar = 0;
605 }
606
607 /* delay output of space gchar */
608 if (c == ' ' || c == '\t') {
609 last = c;
610 } else {
611 *outptr++ = c;
612 sofar++;
613 last = -1;
614 }
615 } else {
616 if (sofar > 72) {
617 *outptr++ = '=';
618 *outptr++ = '\n';
619 sofar = 3;
620 } else
621 sofar += 3;
622
623 *outptr++ = '=';
624 *outptr++ = tohex[(c >> 4) & 0xf];
625 *outptr++ = tohex[c & 0xf];
626 last = -1;
627 }
628 }
629 }
630 *save = sofar;
631 *statep = last;
632
633 #undef output_last
634
635 return (outptr - out);
636 }
637
638 /*
639 * FIXME: this does not strip trailing spaces from lines (as it should, rfc 2045, section 6.7)
640 * Should it also canonicalise the end of line to CR LF??
641 *
642 * Note: Trailing rubbish (at the end of input), like = or =x or =\r will be lost.
643 */
644
645 /**
646 * camel_quoted_decode_step:
647 * @in: (array length=len): input stream
648 * @len: max length of data to decode
649 * @out: (inout) (array): output stream
650 * @savestate: (inout): holds the number of bits that are stored in @saveme
651 * @saveme: (inout) (array length=savestate): leftover bits that have not yet been decoded
652 *
653 * Decodes a block of quoted-printable encoded data. Performs a
654 * 'decode step' on a chunk of QP encoded data.
655 *
656 * Returns: the number of bytes decoded
657 **/
658 gsize
camel_quoted_decode_step(guchar * in,gsize len,guchar * out,gint * savestate,gint * saveme)659 camel_quoted_decode_step (guchar *in,
660 gsize len,
661 guchar *out,
662 gint *savestate,
663 gint *saveme)
664 {
665 register guchar *inptr, *outptr;
666 guchar *inend, c;
667 gint state, save;
668
669 inend = in + len;
670 outptr = out;
671
672 d (printf ("quoted-printable, decoding text '%.*s'\n", len, in));
673
674 state = *savestate;
675 save = *saveme;
676 inptr = in;
677 while (inptr < inend) {
678 switch (state) {
679 case 0:
680 while (inptr < inend) {
681 c = *inptr++;
682 if (c == '=') {
683 state = 1;
684 break;
685 }
686 #ifdef CANONICALISE_EOL
687 /*else if (c=='\r') {
688 state = 3;
689 } else if (c == '\n') {
690 *outptr++ = '\r';
691 *outptr++ = c;
692 } */
693 #endif
694 else {
695 *outptr++ = c;
696 }
697 }
698 break;
699 case 1:
700 c = *inptr++;
701 if (c == '\n') {
702 /* soft break ... unix end of line */
703 state = 0;
704 } else {
705 save = c;
706 state = 2;
707 }
708 break;
709 case 2:
710 c = *inptr++;
711 if (isxdigit (c) && isxdigit (save)) {
712 c = toupper (c);
713 save = toupper (save);
714 *outptr++ = (((save>='A'?save-'A'+10:save-'0')&0x0f) << 4)
715 | ((c >= 'A' ? c - 'A' + 10 : c - '0') &0x0f);
716 } else if (c == '\n' && save == '\r') {
717 /* soft break ... canonical end of line */
718 } else {
719 /* just output the data */
720 *outptr++ = '=';
721 *outptr++ = save;
722 *outptr++ = c;
723 }
724 state = 0;
725 break;
726 #ifdef CANONICALISE_EOL
727 case 3:
728 /* convert \r -> to \r\n, leaves \r\n alone */
729 c = *inptr++;
730 if (c == '\n') {
731 *outptr++ = '\r';
732 *outptr++ = c;
733 } else {
734 *outptr++ = '\r';
735 *outptr++ = '\n';
736 *outptr++ = c;
737 }
738 state = 0;
739 break;
740 #endif
741 }
742 }
743
744 *savestate = state;
745 *saveme = save;
746
747 return outptr - out;
748 }
749
750 /*
751 * this is for the "Q" encoding of international words,
752 * which is slightly different than plain quoted-printable (mainly by allowing 0x20 <> _)
753 */
754 static gsize
quoted_decode(const guchar * in,gsize len,guchar * out)755 quoted_decode (const guchar *in,
756 gsize len,
757 guchar *out)
758 {
759 register const guchar *inptr;
760 register guchar *outptr;
761 const guchar *inend;
762 guchar c, c1;
763 gint ret = 0;
764
765 inend = in + len;
766 outptr = out;
767
768 d (printf ("decoding text '%.*s'\n", len, in));
769
770 inptr = in;
771 while (inptr < inend) {
772 c = *inptr++;
773 if (c == '=') {
774 /* silently ignore truncated data? */
775 if (inend - in >= 2) {
776 c = toupper (*inptr++);
777 c1 = toupper (*inptr++);
778 *outptr++ = (((c>='A'?c-'A'+10:c-'0')&0x0f) << 4)
779 | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') &0x0f);
780 } else {
781 ret = -1;
782 break;
783 }
784 } else if (c == '_') {
785 *outptr++ = 0x20;
786 } else {
787 *outptr++ = c;
788 }
789 }
790 if (ret == 0) {
791 return outptr - out;
792 }
793 return 0;
794 }
795
796 /* rfc2047 version of quoted-printable */
797 /* safemask is the mask to apply to the camel_mime_special_table to determine what
798 * characters can safely be included without encoding */
799 static gsize
quoted_encode(const guchar * in,gsize len,guchar * out,gushort safemask)800 quoted_encode (const guchar *in,
801 gsize len,
802 guchar *out,
803 gushort safemask)
804 {
805 register const guchar *inptr, *inend;
806 guchar *outptr;
807 guchar c;
808
809 inptr = in;
810 inend = in + len;
811 outptr = out;
812 while (inptr < inend) {
813 c = *inptr++;
814 if (c == ' ') {
815 *outptr++ = '_';
816 } else if (camel_mime_special_table[c] & safemask) {
817 *outptr++ = c;
818 } else {
819 *outptr++ = '=';
820 *outptr++ = tohex[(c >> 4) & 0xf];
821 *outptr++ = tohex[c & 0xf];
822 }
823 }
824
825 d (printf ("encoding '%.*s' = '%.*s'\n", len, in, outptr - out, out));
826
827 return (outptr - out);
828 }
829
830 static void
header_decode_lwsp(const gchar ** in)831 header_decode_lwsp (const gchar **in)
832 {
833 const gchar *inptr = *in;
834 gchar c;
835
836 d2 (printf ("is ws: '%s'\n", *in));
837
838 while ((camel_mime_is_lwsp (*inptr) || *inptr =='(') && *inptr != '\0') {
839 while (camel_mime_is_lwsp (*inptr) && *inptr != '\0') {
840 d2 (printf ("(%c)", *inptr));
841 inptr++;
842 }
843 d2 (printf ("\n"));
844
845 /* check for comments */
846 if (*inptr == '(') {
847 gint depth = 1;
848 inptr++;
849 while (depth && (c=*inptr) && *inptr != '\0') {
850 if (c == '\\' && inptr[1]) {
851 inptr++;
852 } else if (c == '(') {
853 depth++;
854 } else if (c == ')') {
855 depth--;
856 }
857 inptr++;
858 }
859 }
860 }
861 *in = inptr;
862 }
863
864 static gchar *
camel_iconv_strndup(GIConv cd,const gchar * string,gsize n)865 camel_iconv_strndup (GIConv cd,
866 const gchar *string,
867 gsize n)
868 {
869 gsize inleft, outleft, converted = 0;
870 gchar *out, *outbuf;
871 const gchar *inbuf;
872 gsize outlen;
873 gint errnosav;
874
875 if (cd == (GIConv) -1)
876 return g_strndup (string, n);
877
878 outlen = n * 2 + 16;
879 out = g_malloc (outlen + 4);
880
881 inbuf = string;
882 inleft = n;
883
884 do {
885 errno = 0;
886 outbuf = out + converted;
887 outleft = outlen - converted;
888
889 converted = g_iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
890 if (converted == (gsize) -1) {
891 if (errno != E2BIG && errno != EINVAL)
892 goto fail;
893 }
894
895 /*
896 * E2BIG There is not sufficient room at *outbuf.
897 *
898 * We just need to grow our outbuffer and try again.
899 */
900
901 converted = outbuf - out;
902 if (errno == E2BIG) {
903 outlen += inleft * 2 + 16;
904 out = g_realloc (out, outlen + 4);
905 outbuf = out + converted;
906 }
907 } while (errno == E2BIG && inleft > 0);
908
909 /*
910 * EINVAL An incomplete multibyte sequence has been encoun
911 * tered in the input.
912 *
913 * We'll just have to ignore it...
914 */
915
916 /* flush the iconv conversion */
917 while (g_iconv (cd, NULL, NULL, &outbuf, &outleft) == (gsize) -1) {
918 if (errno != E2BIG)
919 break;
920
921 outlen += 16;
922 converted = outbuf - out;
923 out = g_realloc (out, outlen + 4);
924 outleft = outlen - converted;
925 outbuf = out + converted;
926 }
927
928 /* Note: not all charsets can be nul-terminated with a single
929 * nul byte. UCS2, for example, needs 2 nul bytes and UCS4
930 * needs 4. I hope that 4 nul bytes is enough to terminate all
931 * multibyte charsets? */
932
933 /* nul-terminate the string */
934 memset (outbuf, 0, 4);
935
936 /* reset the cd */
937 g_iconv (cd, NULL, NULL, NULL, NULL);
938
939 return out;
940
941 fail:
942
943 errnosav = errno;
944
945 w (g_warning ("camel_iconv_strndup: %s at byte %lu", g_strerror (errno), n - inleft));
946
947 g_free (out);
948
949 /* reset the cd */
950 g_iconv (cd, NULL, NULL, NULL, NULL);
951
952 errno = errnosav;
953
954 return NULL;
955 }
956
957 #define is_ascii(c) isascii ((gint) ((guchar) (c)))
958
959 static gchar *
decode_8bit(const gchar * text,gsize len,const gchar * default_charset)960 decode_8bit (const gchar *text,
961 gsize len,
962 const gchar *default_charset)
963 {
964 const gchar *charsets[4] = { "UTF-8", NULL, NULL, NULL };
965 gsize inleft, outleft, outlen, rc, min, n;
966 const gchar *locale_charset, *best;
967 gchar *out, *outbuf;
968 const gchar *inbuf;
969 GIConv cd;
970 gint i = 1;
971
972 if (default_charset && g_ascii_strcasecmp (default_charset, "UTF-8") != 0)
973 charsets[i++] = default_charset;
974
975 locale_charset = camel_iconv_locale_charset ();
976 if (locale_charset && g_ascii_strcasecmp (locale_charset, "UTF-8") != 0)
977 charsets[i++] = locale_charset;
978
979 min = len;
980 best = charsets[0];
981
982 outlen = (len * 2) + 16;
983 out = g_malloc (outlen + 1);
984
985 for (i = 0; charsets[i]; i++) {
986 if ((cd = camel_iconv_open ("UTF-8", charsets[i])) == (GIConv) -1)
987 continue;
988
989 outleft = outlen;
990 outbuf = out;
991 inleft = len;
992 inbuf = text;
993 n = 0;
994
995 do {
996 rc = g_iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
997 if (rc == (gsize) -1) {
998 if (errno == EINVAL) {
999 /* incomplete sequence at the end of the input buffer */
1000 n += inleft;
1001 break;
1002 }
1003
1004 if (errno == E2BIG) {
1005 outlen += (inleft * 2) + 16;
1006 rc = (gsize) (outbuf - out);
1007 out = g_realloc (out, outlen + 1);
1008 outleft = outlen - rc;
1009 outbuf = out + rc;
1010 } else {
1011 inleft--;
1012 inbuf++;
1013 n++;
1014 }
1015 }
1016 } while (inleft > 0);
1017
1018 while ((rc = g_iconv (cd, NULL, NULL, &outbuf, &outleft)) == (gsize) -1) {
1019 if (errno != E2BIG)
1020 break;
1021
1022 outlen += 16;
1023 rc = (gsize) (outbuf - out);
1024 out = g_realloc (out, outlen + 1);
1025 outleft = outlen - rc;
1026 outbuf = out + rc;
1027 }
1028
1029 *outbuf = '\0';
1030
1031 camel_iconv_close (cd);
1032
1033 if (rc != (gsize) -1 && n == 0)
1034 return out;
1035
1036 if (n < min) {
1037 best = charsets[i];
1038 min = n;
1039 }
1040 }
1041
1042 /* if we get here, then none of the charsets fit the 8bit text flawlessly...
1043 * try to find the one that fit the best and use that to convert what we can,
1044 * replacing any byte we can't convert with a '?' */
1045
1046 if ((cd = camel_iconv_open ("UTF-8", best)) == (GIConv) -1) {
1047 /* this shouldn't happen... but if we are here, then
1048 * it did... the only thing we can do at this point
1049 * is replace the 8bit garbage and pray */
1050 register const gchar *inptr = text;
1051 const gchar *inend = inptr + len;
1052
1053 outbuf = out;
1054
1055 while (inptr < inend) {
1056 if (is_ascii (*inptr))
1057 *outbuf++ = *inptr++;
1058 else
1059 *outbuf++ = '?';
1060 }
1061
1062 *outbuf = '\0';
1063
1064 return out;
1065 }
1066
1067 outleft = outlen;
1068 outbuf = out;
1069 inleft = len;
1070 inbuf = text;
1071
1072 do {
1073 rc = g_iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
1074 if (rc == (gsize) -1) {
1075 if (errno == EINVAL) {
1076 /* incomplete sequence at the end of the input buffer */
1077 break;
1078 }
1079
1080 if (errno == E2BIG) {
1081 rc = outbuf - out;
1082 outlen += inleft * 2 + 16;
1083 out = g_realloc (out, outlen + 1);
1084 outleft = outlen - rc;
1085 outbuf = out + rc;
1086 } else {
1087 *outbuf++ = '?';
1088 outleft--;
1089 inleft--;
1090 inbuf++;
1091 }
1092 }
1093 } while (inleft > 0);
1094
1095 while ((rc = g_iconv (cd, NULL, NULL, &outbuf, &outleft)) == (gsize) -1) {
1096 if (errno != E2BIG)
1097 break;
1098
1099 outlen += 16;
1100 rc = (gsize) (outbuf - out);
1101 out = g_realloc (out, outlen + 1);
1102 outleft = outlen - rc;
1103 outbuf = out + rc;
1104 }
1105
1106 *outbuf = '\0';
1107
1108 camel_iconv_close (cd);
1109
1110 return out;
1111 }
1112
1113 #define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
1114
1115 static void
make_string_utf8_valid(gchar * text,gsize textlen)1116 make_string_utf8_valid (gchar *text,
1117 gsize textlen)
1118 {
1119 gchar *p;
1120 gsize len;
1121
1122 p = text;
1123 len = textlen;
1124
1125 while (!g_utf8_validate (p, len, (const gchar **) &p)) {
1126 len = textlen - (p - text);
1127 *p = '?';
1128 }
1129 }
1130
1131 static void
sanitize_decoded_text(guchar * text,gssize * inout_textlen)1132 sanitize_decoded_text (guchar *text,
1133 gssize *inout_textlen)
1134 {
1135 gssize ii, jj, textlen;
1136
1137 g_return_if_fail (text != NULL);
1138 g_return_if_fail (inout_textlen != NULL);
1139
1140 textlen = *inout_textlen;
1141
1142 for (ii = 0, jj = 0; ii < textlen; ii++) {
1143 /* Skip '\0' and '\r' characters */
1144 if (text[ii] == 0 || text[ii] == '\r')
1145 continue;
1146
1147 /* Change '\n' into space */
1148 if (text[ii] == '\n')
1149 text[ii] = ' ';
1150
1151 if (ii != jj)
1152 text[jj] = text[ii];
1153
1154 jj++;
1155 }
1156
1157 *inout_textlen = jj;
1158 }
1159
1160 /* decode an rfc2047 encoded-word token */
1161 static gchar *
rfc2047_decode_word(const gchar * in,gsize inlen,const gchar * default_charset)1162 rfc2047_decode_word (const gchar *in,
1163 gsize inlen,
1164 const gchar *default_charset)
1165 {
1166 const guchar *instart = (const guchar *) in;
1167 const guchar *inptr = instart + 2;
1168 const guchar *inend = instart + inlen - 2;
1169 guchar *decoded;
1170 const gchar *charset;
1171 gchar *charenc, *p;
1172 guint32 save = 0;
1173 gssize declen;
1174 gint state = 0;
1175 gsize len;
1176 GIConv cd;
1177 gchar *buf;
1178
1179 /* skip over the charset */
1180 if (inlen < 8 || !(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?')
1181 return NULL;
1182
1183 inptr++;
1184
1185 switch (*inptr) {
1186 case 'B':
1187 case 'b':
1188 inptr += 2;
1189 decoded = g_alloca (((inend - inptr) * 3 / 4) + 3);
1190 declen = g_base64_decode_step ((gchar *) inptr, inend - inptr, decoded, &state, &save);
1191 break;
1192 case 'Q':
1193 case 'q':
1194 inptr += 2;
1195 decoded = g_alloca (inend - inptr);
1196 declen = quoted_decode (inptr, inend - inptr, decoded);
1197
1198 if (declen == -1) {
1199 d (fprintf (stderr, "encountered broken 'Q' encoding\n"));
1200 return NULL;
1201 }
1202 break;
1203 default:
1204 d (fprintf (stderr, "unknown encoding\n"));
1205 return NULL;
1206 }
1207
1208 sanitize_decoded_text (decoded, &declen);
1209
1210 /* never return empty string, return rather NULL */
1211 if (!declen)
1212 return NULL;
1213
1214 len = (inptr - 3) - (instart + 2);
1215 charenc = g_alloca (len + 1);
1216 memcpy (charenc, in + 2, len);
1217 charenc[len] = '\0';
1218 charset = charenc;
1219
1220 /* rfc2231 updates rfc2047 encoded words...
1221 * The ABNF given in RFC 2047 for encoded-words is:
1222 * encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
1223 * This specification changes this ABNF to:
1224 * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
1225 */
1226
1227 /* trim off the 'language' part if it's there... */
1228 if ((p = strchr (charset, '*')))
1229 *p = '\0';
1230
1231 /* slight optimization? */
1232 if (!g_ascii_strcasecmp (charset, "UTF-8"))
1233 return g_strndup ((gchar *) decoded, declen);
1234
1235 if (charset[0])
1236 charset = camel_iconv_charset_name (charset);
1237
1238 if (!charset[0] || (cd = camel_iconv_open ("UTF-8", charset)) == (GIConv) -1) {
1239 w (g_warning (
1240 "Cannot convert from %s to UTF-8, "
1241 "header display may be corrupt: %s",
1242 charset[0] ? charset : "unspecified charset",
1243 g_strerror (errno)));
1244
1245 return decode_8bit ((gchar *) decoded, declen, default_charset);
1246 }
1247
1248 buf = camel_iconv_strndup (cd, (gchar *) decoded, declen);
1249 camel_iconv_close (cd);
1250
1251 if (buf != NULL)
1252 return buf;
1253
1254 w (g_warning (
1255 "Failed to convert \"%.*s\" to UTF-8, display may be "
1256 "corrupt: %s", declen, decoded, g_strerror (errno)));
1257
1258 return decode_8bit ((gchar *) decoded, declen, charset);
1259 }
1260
1261 /* ok, a lot of mailers are BROKEN, and send iso-latin1 encoded
1262 * headers, when they should just be sticking to US-ASCII
1263 * according to the rfc's. Anyway, since the conversion to utf-8
1264 * is trivial, just do it here without iconv */
1265 static GString *
append_latin1(GString * out,const gchar * in,gsize len)1266 append_latin1 (GString *out,
1267 const gchar *in,
1268 gsize len)
1269 {
1270 guint c;
1271
1272 while (len) {
1273 c = (guint) * in++;
1274 len--;
1275 if (c & 0x80) {
1276 g_string_append_c (out, 0xc0 | ((c >> 6) & 0x3)); /* 110000xx */
1277 g_string_append_c (out, 0x80 | (c & 0x3f)); /* 10xxxxxx */
1278 } else {
1279 g_string_append_c (out, c);
1280 }
1281 }
1282 return out;
1283 }
1284
1285 static gint
append_8bit(GString * out,const gchar * inbuf,gsize inlen,const gchar * charset)1286 append_8bit (GString *out,
1287 const gchar *inbuf,
1288 gsize inlen,
1289 const gchar *charset)
1290 {
1291 gchar *outbase, *outbuf;
1292 gsize outlen;
1293 GIConv ic;
1294
1295 ic = camel_iconv_open ("UTF-8", charset);
1296 if (ic == (GIConv) -1)
1297 return FALSE;
1298
1299 outlen = inlen * 6 + 16;
1300 outbuf = outbase = g_malloc (outlen);
1301
1302 if (camel_iconv (ic, &inbuf, &inlen, &outbuf, &outlen) == (gsize) -1) {
1303 w (g_warning ("Conversion to '%s' failed: %s", charset, g_strerror (errno)));
1304 g_free (outbase);
1305 camel_iconv_close (ic);
1306 return FALSE;
1307 }
1308
1309 camel_iconv (ic, NULL, NULL, &outbuf, &outlen);
1310
1311 *outbuf = 0;
1312 g_string_append (out, outbase);
1313 g_free (outbase);
1314 camel_iconv_close (ic);
1315
1316 return TRUE;
1317
1318 }
1319
1320 static GString *
append_quoted_pair(GString * str,const gchar * in,gsize inlen)1321 append_quoted_pair (GString *str,
1322 const gchar *in,
1323 gsize inlen)
1324 {
1325 register const gchar *inptr = in;
1326 const gchar *inend = in + inlen;
1327 gchar c;
1328
1329 while (inptr < inend) {
1330 c = *inptr++;
1331 if (c == '\\' && inptr < inend)
1332 g_string_append_c (str, *inptr++);
1333 else
1334 g_string_append_c (str, c);
1335 }
1336
1337 return str;
1338 }
1339
1340 /* decodes a simple text, rfc822 + rfc2047 */
1341 static gchar *
header_decode_text(const gchar * in,gint ctext,const gchar * default_charset)1342 header_decode_text (const gchar *in,
1343 gint ctext,
1344 const gchar *default_charset)
1345 {
1346 register const gchar *inptr = in;
1347 gboolean encoded = FALSE;
1348 const gchar *lwsp, *text;
1349 gsize nlwsp, n;
1350 gboolean ascii;
1351 gchar *decoded;
1352 GString *out;
1353
1354 if (in == NULL)
1355 return g_strdup ("");
1356
1357 out = g_string_sized_new (strlen (in) + 1);
1358
1359 while (*inptr != '\0') {
1360 lwsp = inptr;
1361 while (camel_mime_is_lwsp (*inptr))
1362 inptr++;
1363
1364 nlwsp = (gsize) (inptr - lwsp);
1365
1366 if (*inptr != '\0') {
1367 text = inptr;
1368 ascii = TRUE;
1369
1370 if (!strncmp (inptr, "=?", 2)) {
1371 inptr += 2;
1372
1373 /* skip past the charset (if one is even declared, sigh) */
1374 while (*inptr && *inptr != '?') {
1375 ascii = ascii && is_ascii (*inptr);
1376 inptr++;
1377 }
1378
1379 /* sanity check encoding type */
1380 if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || !inptr[1] || inptr[2] != '?')
1381 goto non_rfc2047;
1382
1383 inptr += 3;
1384
1385 /* find the end of the rfc2047 encoded word token */
1386 while (*inptr && strncmp (inptr, "?=", 2) != 0) {
1387 ascii = ascii && is_ascii (*inptr);
1388 inptr++;
1389 }
1390
1391 if (!strncmp (inptr, "?=", 2))
1392 inptr += 2;
1393 } else {
1394 non_rfc2047:
1395 /* stop if we encounter a possible rfc2047 encoded
1396 * token even if it's inside another word, sigh. */
1397 while (*inptr && !camel_mime_is_lwsp (*inptr) &&
1398 strncmp (inptr, "=?", 2) != 0) {
1399 ascii = ascii && is_ascii (*inptr);
1400 inptr++;
1401 }
1402 }
1403
1404 n = (gsize) (inptr - text);
1405 if (is_rfc2047_encoded_word (text, n)) {
1406 if ((decoded = rfc2047_decode_word (text, n, default_charset))) {
1407 /* rfc2047 states that you must ignore all
1408 * whitespace between encoded words */
1409 if (!encoded)
1410 g_string_append_len (out, lwsp, nlwsp);
1411
1412 g_string_append (out, decoded);
1413 g_free (decoded);
1414
1415 encoded = TRUE;
1416 } else {
1417 /* append lwsp and invalid rfc2047 encoded-word token */
1418 g_string_append_len (out, lwsp, nlwsp + n);
1419 encoded = FALSE;
1420 }
1421 } else {
1422 /* append lwsp */
1423 g_string_append_len (out, lwsp, nlwsp);
1424
1425 /* append word token */
1426 if (!ascii) {
1427 /* *sigh* I hate broken mailers... */
1428 decoded = decode_8bit (text, n, default_charset);
1429 n = strlen (decoded);
1430 text = decoded;
1431 } else {
1432 decoded = NULL;
1433 }
1434
1435 if (!ctext)
1436 g_string_append_len (out, text, n);
1437 else
1438 append_quoted_pair (out, text, n);
1439
1440 g_free (decoded);
1441
1442 encoded = FALSE;
1443 }
1444 } else {
1445 /* appending trailing lwsp */
1446 g_string_append_len (out, lwsp, nlwsp);
1447 break;
1448 }
1449 }
1450
1451 return g_string_free (out, FALSE);
1452 }
1453
1454 /**
1455 * camel_header_decode_string:
1456 * @in: input header value string
1457 * @default_charset: default charset to use if improperly encoded
1458 *
1459 * Decodes rfc2047 encoded-word tokens
1460 *
1461 * Returns: a string containing the UTF-8 version of the decoded header
1462 * value
1463 **/
1464 gchar *
camel_header_decode_string(const gchar * in,const gchar * default_charset)1465 camel_header_decode_string (const gchar *in,
1466 const gchar *default_charset)
1467 {
1468 gchar *res;
1469
1470 if (in == NULL)
1471 return NULL;
1472
1473 res = header_decode_text (in, FALSE, default_charset);
1474
1475 if (res)
1476 make_string_utf8_valid (res, strlen (res));
1477
1478 return res;
1479 }
1480
1481 /**
1482 * camel_header_format_ctext:
1483 * @in: input header value string
1484 * @default_charset: default charset to use if improperly encoded
1485 *
1486 * Decodes a header which contains rfc2047 encoded-word tokens that
1487 * may or may not be within a comment.
1488 *
1489 * Returns: a string containing the UTF-8 version of the decoded header
1490 * value
1491 **/
1492 gchar *
camel_header_format_ctext(const gchar * in,const gchar * default_charset)1493 camel_header_format_ctext (const gchar *in,
1494 const gchar *default_charset)
1495 {
1496 if (in == NULL)
1497 return NULL;
1498
1499 return header_decode_text (in, TRUE, default_charset);
1500 }
1501
1502 /* how long a sequence of pre-encoded words should be less than, to attempt to
1503 * fit into a properly folded word. Only a guide. */
1504 #define CAMEL_FOLD_PREENCODED (24)
1505
1506 /* FIXME: needs a way to cache iconv opens for different charsets? */
1507 static void
rfc2047_encode_word(GString * outstring,const gchar * in,gsize len,const gchar * type,gushort safemask)1508 rfc2047_encode_word (GString *outstring,
1509 const gchar *in,
1510 gsize len,
1511 const gchar *type,
1512 gushort safemask)
1513 {
1514 GIConv ic = (GIConv) -1;
1515 gchar *buffer, *out, *ascii;
1516 gsize inlen, outlen, enclen, bufflen;
1517 const gchar *inptr, *p;
1518 gint first = 1;
1519
1520 d (printf ("Converting [%d] '%.*s' to %s\n", len, len, in, type));
1521
1522 /* convert utf8->encoding */
1523 bufflen = len * 6 + 16;
1524 buffer = g_alloca (bufflen);
1525 inlen = len;
1526 inptr = in;
1527
1528 ascii = g_alloca (bufflen);
1529
1530 if (g_ascii_strcasecmp (type, "UTF-8") != 0)
1531 ic = camel_iconv_open (type, "UTF-8");
1532
1533 while (inlen) {
1534 gssize convlen, proclen;
1535 gint i;
1536
1537 /* break up words into smaller bits, what we really want is encoded + overhead < 75,
1538 * but we'll just guess what that means in terms of input chars, and assume its good enough */
1539
1540 out = buffer;
1541 outlen = bufflen;
1542
1543 if (ic == (GIConv) -1) {
1544 /* native encoding case, the easy one (?) */
1545 /* we work out how much we can convert, and still be in length */
1546 /* proclen will be the result of input characters that we can convert, to the nearest
1547 * (approximated) valid utf8 gchar */
1548 convlen = 0;
1549 proclen = -1;
1550 p = inptr;
1551 i = 0;
1552 while (p < (in + len) && convlen < (75 - strlen ("=?utf-8?q?\?="))) {
1553 guchar c = *p++;
1554
1555 if (c >= 0xc0)
1556 proclen = i;
1557 i++;
1558 if (c < 0x80)
1559 proclen = i;
1560 if (camel_mime_special_table[c] & safemask)
1561 convlen += 1;
1562 else
1563 convlen += 3;
1564 }
1565
1566 if (proclen >= 0 && proclen < i && convlen < (75 - strlen ("=?utf-8?q?\?=")))
1567 proclen = i;
1568
1569 /* well, we probably have broken utf8, just copy it anyway what the heck */
1570 if (proclen == -1) {
1571 w (g_warning ("Appear to have truncated utf8 sequence"));
1572 proclen = inlen;
1573 }
1574
1575 memcpy (out, inptr, proclen);
1576 inptr += proclen;
1577 inlen -= proclen;
1578 out += proclen;
1579 } else {
1580 /* well we could do similar, but we can't (without undue effort), we'll just break it up into
1581 * hopefully-small-enough chunks, and leave it at that */
1582 convlen = MIN (inlen, CAMEL_FOLD_PREENCODED);
1583 p = inptr;
1584 if (camel_iconv (ic, &inptr, (gsize *) &convlen, &out, &outlen) == (gsize) -1 && errno != EINVAL) {
1585 w (g_warning ("Conversion problem: conversion truncated: %s", g_strerror (errno)));
1586 /* blah, we include it anyway, better than infinite loop ... */
1587 inptr += convlen;
1588 } else {
1589 /* make sure we flush out any shift state */
1590 camel_iconv (ic, NULL, NULL, &out, &outlen);
1591 }
1592 inlen -= (inptr - p);
1593 }
1594
1595 enclen = out - buffer;
1596
1597 if (enclen) {
1598 /* create token */
1599 out = ascii;
1600 if (first)
1601 first = 0;
1602 else
1603 *out++ = ' ';
1604 out += sprintf (out, "=?%s?Q?", type);
1605 out += quoted_encode ((guchar *) buffer, enclen, (guchar *) out, safemask);
1606 sprintf (out, "?=");
1607
1608 d (printf ("converted part = %s\n", ascii));
1609
1610 g_string_append (outstring, ascii);
1611 }
1612 }
1613
1614 if (ic != (GIConv) -1)
1615 camel_iconv_close (ic);
1616 }
1617
1618 static gchar *
header_encode_string_rfc2047(const guchar * in,gboolean include_lwsp)1619 header_encode_string_rfc2047 (const guchar *in,
1620 gboolean include_lwsp)
1621 {
1622 const guchar *inptr = in, *start, *word;
1623 gboolean last_was_encoded = FALSE;
1624 gboolean last_was_space = FALSE;
1625 const gchar *charset;
1626 gint encoding;
1627 GString *out;
1628
1629 g_return_val_if_fail (g_utf8_validate ((const gchar *) in, -1, NULL), NULL);
1630
1631 if (in == NULL)
1632 return NULL;
1633
1634 /* do a quick us-ascii check (the common case?) */
1635 while (*inptr) {
1636 if (*inptr > 127)
1637 break;
1638 inptr++;
1639 }
1640 if (*inptr == '\0')
1641 return g_strdup ((gchar *) in);
1642
1643 /* This gets each word out of the input, and checks to see what charset
1644 * can be used to encode it. */
1645 /* TODO: Work out when to merge subsequent words, or across word-parts */
1646 out = g_string_new ("");
1647 inptr = in;
1648 encoding = 0;
1649 word = NULL;
1650 start = inptr;
1651 while (inptr && *inptr) {
1652 gunichar c;
1653 const gchar *newinptr;
1654
1655 newinptr = g_utf8_next_char (inptr);
1656 c = g_utf8_get_char ((gchar *) inptr);
1657 if (newinptr == NULL || !g_unichar_validate (c)) {
1658 w (g_warning (
1659 "Invalid UTF-8 sequence encountered "
1660 "(pos %d, gchar '%c'): %s",
1661 (inptr - in), inptr[0], in));
1662 inptr++;
1663 continue;
1664 }
1665
1666 if (c < 256 && !include_lwsp && camel_mime_is_lwsp (c) && !last_was_space) {
1667 /* we've reached the end of a 'word' */
1668 if (word && !(last_was_encoded && encoding)) {
1669 /* output lwsp between non-encoded words */
1670 g_string_append_len (out, (const gchar *) start, word - start);
1671 start = word;
1672 }
1673
1674 switch (encoding) {
1675 case 0:
1676 g_string_append_len (out, (const gchar *) start, inptr - start);
1677 last_was_encoded = FALSE;
1678 break;
1679 case 1:
1680 if (last_was_encoded)
1681 g_string_append_c (out, ' ');
1682
1683 rfc2047_encode_word (out, (const gchar *) start, inptr - start, "ISO-8859-1", CAMEL_MIME_IS_ESAFE);
1684 last_was_encoded = TRUE;
1685 break;
1686 case 2:
1687 if (last_was_encoded)
1688 g_string_append_c (out, ' ');
1689
1690 if (!(charset = camel_charset_best ((const gchar *) start, inptr - start)))
1691 charset = "UTF-8";
1692 rfc2047_encode_word (out, (const gchar *) start, inptr - start, charset, CAMEL_MIME_IS_ESAFE);
1693 last_was_encoded = TRUE;
1694 break;
1695 }
1696
1697 last_was_space = TRUE;
1698 start = inptr;
1699 word = NULL;
1700 encoding = 0;
1701 } else if (c > 127 && c < 256) {
1702 encoding = MAX (encoding, 1);
1703 last_was_space = FALSE;
1704 } else if (c >= 256) {
1705 encoding = MAX (encoding, 2);
1706 last_was_space = FALSE;
1707 } else if (include_lwsp || !camel_mime_is_lwsp (c)) {
1708 last_was_space = FALSE;
1709 }
1710
1711 if (!(c < 256 && !include_lwsp && camel_mime_is_lwsp (c)) && !word)
1712 word = inptr;
1713
1714 inptr = (const guchar *) newinptr;
1715 }
1716
1717 if (inptr - start) {
1718 if (word && !(last_was_encoded && encoding)) {
1719 g_string_append_len (out, (const gchar *) start, word - start);
1720 start = word;
1721 }
1722
1723 switch (encoding) {
1724 case 0:
1725 g_string_append_len (out, (const gchar *) start, inptr - start);
1726 break;
1727 case 1:
1728 if (last_was_encoded)
1729 g_string_append_c (out, ' ');
1730
1731 rfc2047_encode_word (out, (const gchar *) start, inptr - start, "ISO-8859-1", CAMEL_MIME_IS_ESAFE);
1732 break;
1733 case 2:
1734 if (last_was_encoded)
1735 g_string_append_c (out, ' ');
1736
1737 if (!(charset = camel_charset_best ((const gchar *) start, inptr - start)))
1738 charset = "UTF-8";
1739 rfc2047_encode_word (out, (const gchar *) start, inptr - start, charset, CAMEL_MIME_IS_ESAFE);
1740 break;
1741 }
1742 }
1743
1744 return g_string_free (out, FALSE);
1745 }
1746
1747 /* TODO: Should this worry about quotes?? */
1748 /**
1749 * camel_header_encode_string:
1750 * @in: input string
1751 *
1752 * Encodes a 'text' header according to the rules of rfc2047.
1753 *
1754 * Returns: the rfc2047 encoded header
1755 **/
1756 gchar *
camel_header_encode_string(const guchar * in)1757 camel_header_encode_string (const guchar *in)
1758 {
1759 return header_encode_string_rfc2047 (in, FALSE);
1760 }
1761
1762 /* apply quoted-string rules to a string */
1763 static void
quote_word(GString * out,gboolean do_quotes,const gchar * start,gsize len)1764 quote_word (GString *out,
1765 gboolean do_quotes,
1766 const gchar *start,
1767 gsize len)
1768 {
1769 gint i, c;
1770
1771 /* TODO: What about folding on long lines? */
1772 if (do_quotes)
1773 g_string_append_c (out, '"');
1774 for (i = 0; i < len; i++) {
1775 c = *start++;
1776 if (c == '\"' || c == '\\' || c == '\r')
1777 g_string_append_c (out, '\\');
1778 g_string_append_c (out, c);
1779 }
1780 if (do_quotes)
1781 g_string_append_c (out, '"');
1782 }
1783
1784 /* incrementing possibility for the word type */
1785 enum _phrase_word_t {
1786 WORD_ATOM,
1787 WORD_QSTRING,
1788 WORD_2047
1789 };
1790
1791 struct _phrase_word {
1792 const guchar *start, *end;
1793 enum _phrase_word_t type;
1794 gint encoding;
1795 };
1796
1797 static gboolean
word_types_compatable(enum _phrase_word_t type1,enum _phrase_word_t type2)1798 word_types_compatable (enum _phrase_word_t type1,
1799 enum _phrase_word_t type2)
1800 {
1801 switch (type1) {
1802 case WORD_ATOM:
1803 return type2 == WORD_QSTRING;
1804 case WORD_QSTRING:
1805 return type2 != WORD_2047;
1806 case WORD_2047:
1807 return type2 == WORD_2047;
1808 default:
1809 return FALSE;
1810 }
1811 }
1812
1813 /* split the input into words with info about each word
1814 * merge common word types clean up */
1815 static GList *
header_encode_phrase_get_words(const guchar * in)1816 header_encode_phrase_get_words (const guchar *in)
1817 {
1818 const guchar *inptr = in, *start, *last;
1819 struct _phrase_word *word;
1820 enum _phrase_word_t type;
1821 gint encoding, count = 0;
1822 GList *words = NULL;
1823
1824 /* break the input into words */
1825 type = WORD_ATOM;
1826 last = inptr;
1827 start = inptr;
1828 encoding = 0;
1829 while (inptr && *inptr) {
1830 gunichar c;
1831 const gchar *newinptr;
1832
1833 newinptr = g_utf8_next_char (inptr);
1834 c = g_utf8_get_char ((gchar *) inptr);
1835
1836 if (!g_unichar_validate (c)) {
1837 w (g_warning (
1838 "Invalid UTF-8 sequence encountered "
1839 "(pos %d, gchar '%c'): %s",
1840 (inptr - in), inptr[0], in));
1841 inptr++;
1842 continue;
1843 }
1844
1845 inptr = (const guchar *) newinptr;
1846 if (g_unichar_isspace (c)) {
1847 if (count > 0) {
1848 word = g_new0 (struct _phrase_word, 1);
1849 word->start = start;
1850 word->end = last;
1851 word->type = type;
1852 word->encoding = encoding;
1853 words = g_list_append (words, word);
1854 count = 0;
1855 }
1856
1857 start = inptr;
1858 type = WORD_ATOM;
1859 encoding = 0;
1860 } else {
1861 count++;
1862 if (c < 128) {
1863 if (!camel_mime_is_atom (c))
1864 type = MAX (type, WORD_QSTRING);
1865 } else if (c > 127 && c < 256) {
1866 type = WORD_2047;
1867 encoding = MAX (encoding, 1);
1868 } else if (c >= 256) {
1869 type = WORD_2047;
1870 encoding = MAX (encoding, 2);
1871 }
1872 }
1873
1874 last = inptr;
1875 }
1876
1877 if (count > 0) {
1878 word = g_new0 (struct _phrase_word, 1);
1879 word->start = start;
1880 word->end = last;
1881 word->type = type;
1882 word->encoding = encoding;
1883 words = g_list_append (words, word);
1884 }
1885
1886 return words;
1887 }
1888
1889 #define MERGED_WORD_LT_FOLDLEN(wordlen, type) ((type) == WORD_2047 ? (wordlen) < CAMEL_FOLD_PREENCODED : (wordlen) < (CAMEL_FOLD_SIZE - 8))
1890
1891 static gboolean
header_encode_phrase_merge_words(GList ** wordsp)1892 header_encode_phrase_merge_words (GList **wordsp)
1893 {
1894 GList *wordl, *nextl, *words = *wordsp;
1895 struct _phrase_word *word, *next;
1896 gboolean merged = FALSE;
1897
1898 /* scan the list, checking for words of similar types that can be merged */
1899 wordl = words;
1900 while (wordl) {
1901 word = wordl->data;
1902 nextl = g_list_next (wordl);
1903
1904 while (nextl) {
1905 next = nextl->data;
1906 /* merge nodes of the same type AND we are not creating too long a string */
1907 if (word_types_compatable (word->type, next->type)) {
1908 if (MERGED_WORD_LT_FOLDLEN (next->end - word->start, MAX (word->type, next->type))) {
1909 /* the resulting word type is the MAX of the 2 types */
1910 word->type = MAX (word->type, next->type);
1911 word->encoding = MAX (word->encoding, next->encoding);
1912 word->end = next->end;
1913 words = g_list_remove_link (words, nextl);
1914 g_list_free_1 (nextl);
1915 g_free (next);
1916
1917 nextl = g_list_next (wordl);
1918
1919 merged = TRUE;
1920 } else {
1921 /* if it is going to be too long, make sure we include the
1922 * separating whitespace */
1923 word->end = next->start;
1924 break;
1925 }
1926 } else {
1927 break;
1928 }
1929 }
1930
1931 wordl = g_list_next (wordl);
1932 }
1933
1934 *wordsp = words;
1935
1936 return merged;
1937 }
1938
1939 /* encodes a phrase sequence (different quoting/encoding rules to strings) */
1940 /**
1941 * camel_header_encode_phrase:
1942 * @in: header to encode
1943 *
1944 * Encodes a 'phrase' header according to the rules in rfc2047.
1945 *
1946 * Returns: the encoded 'phrase'
1947 **/
1948 gchar *
camel_header_encode_phrase(const guchar * in)1949 camel_header_encode_phrase (const guchar *in)
1950 {
1951 struct _phrase_word *word = NULL, *last_word = NULL;
1952 GList *words, *wordl;
1953 const gchar *charset;
1954 GString *out;
1955
1956 if (in == NULL)
1957 return NULL;
1958
1959 words = header_encode_phrase_get_words (in);
1960 if (!words)
1961 return NULL;
1962
1963 while (header_encode_phrase_merge_words (&words))
1964 ;
1965
1966 out = g_string_new ("");
1967
1968 /* output words now with spaces between them */
1969 wordl = words;
1970 while (wordl) {
1971 const gchar *start;
1972 gsize len;
1973
1974 word = wordl->data;
1975
1976 /* append correct number of spaces between words */
1977 if (last_word && !(last_word->type == WORD_2047 && word->type == WORD_2047)) {
1978 /* one or both of the words are not encoded so we write the spaces out untouched */
1979 len = word->start - last_word->end;
1980 g_string_append_len (out, (gchar *) last_word->end, len);
1981 }
1982
1983 switch (word->type) {
1984 case WORD_ATOM:
1985 g_string_append_len (out, (gchar *) word->start, word->end - word->start);
1986 break;
1987 case WORD_QSTRING:
1988 quote_word (out, TRUE, (gchar *) word->start, word->end - word->start);
1989 break;
1990 case WORD_2047:
1991 if (last_word && last_word->type == WORD_2047) {
1992 /* include the whitespace chars between these 2 words in the
1993 * resulting rfc2047 encoded word. */
1994 len = word->end - last_word->end;
1995 start = (const gchar *) last_word->end;
1996
1997 /* encoded words need to be separated by linear whitespace */
1998 g_string_append_c (out, ' ');
1999 } else {
2000 len = word->end - word->start;
2001 start = (const gchar *) word->start;
2002 }
2003
2004 if (word->encoding == 1) {
2005 rfc2047_encode_word (out, start, len, "ISO-8859-1", CAMEL_MIME_IS_PSAFE);
2006 } else {
2007 if (!(charset = camel_charset_best (start, len)))
2008 charset = "UTF-8";
2009 rfc2047_encode_word (out, start, len, charset, CAMEL_MIME_IS_PSAFE);
2010 }
2011 break;
2012 }
2013
2014 g_free (last_word);
2015 wordl = g_list_next (wordl);
2016
2017 last_word = word;
2018 }
2019
2020 /* and we no longer need the list */
2021 g_free (word);
2022 g_list_free (words);
2023
2024 return g_string_free (out, FALSE);
2025 }
2026
2027 /* these are all internal parser functions */
2028
2029 static gchar *
decode_token(const gchar ** in)2030 decode_token (const gchar **in)
2031 {
2032 const gchar *inptr = *in;
2033 const gchar *start;
2034
2035 header_decode_lwsp (&inptr);
2036 start = inptr;
2037 while (camel_mime_is_ttoken (*inptr))
2038 inptr++;
2039 if (inptr > start) {
2040 *in = inptr;
2041 return g_strndup (start, inptr - start);
2042 } else {
2043 return NULL;
2044 }
2045 }
2046
2047 /**
2048 * camel_header_token_decode:
2049 * @in: input string
2050 *
2051 * Gets the first token in the string according to the rules of
2052 * rfc0822.
2053 *
2054 * Returns: a new string containing the first token in @in
2055 **/
2056 gchar *
camel_header_token_decode(const gchar * in)2057 camel_header_token_decode (const gchar *in)
2058 {
2059 if (in == NULL)
2060 return NULL;
2061
2062 return decode_token (&in);
2063 }
2064
2065 /*
2066 * <"> * ( <any gchar except <"> \, cr / \ <any char> ) <">
2067 */
2068 static gchar *
header_decode_quoted_string(const gchar ** in)2069 header_decode_quoted_string (const gchar **in)
2070 {
2071 const gchar *inptr = *in;
2072 gchar *out = NULL, *outptr;
2073 gsize outlen;
2074 gint c;
2075
2076 header_decode_lwsp (&inptr);
2077 if (*inptr == '"') {
2078 const gchar *intmp;
2079 gint skip = 0;
2080
2081 /* first, calc length */
2082 inptr++;
2083 intmp = inptr;
2084 while ( (c = *intmp++) && c!= '"') {
2085 if (c == '\\' && *intmp) {
2086 intmp++;
2087 skip++;
2088 } else if (c == '\n') {
2089 skip++;
2090 }
2091 }
2092 outlen = intmp - inptr - skip;
2093 out = outptr = g_malloc (outlen + 1);
2094 while ( (c = *inptr) && c!= '"') {
2095 inptr++;
2096 if (c == '\\' && *inptr) {
2097 c = *inptr++;
2098 } else if (c == '\n') {
2099 continue;
2100 }
2101 *outptr++ = c;
2102 }
2103 if (c)
2104 inptr++;
2105 *outptr = '\0';
2106 }
2107 *in = inptr;
2108 return out;
2109 }
2110
2111 static gchar *
header_decode_atom(const gchar ** in)2112 header_decode_atom (const gchar **in)
2113 {
2114 const gchar *inptr = *in, *start;
2115
2116 header_decode_lwsp (&inptr);
2117 start = inptr;
2118 while (camel_mime_is_atom (*inptr))
2119 inptr++;
2120 *in = inptr;
2121 if (inptr > start)
2122 return g_strndup (start, inptr - start);
2123 else
2124 return NULL;
2125 }
2126
2127 static gboolean
extract_rfc2047_encoded_word(const gchar ** in,gchar ** word)2128 extract_rfc2047_encoded_word (const gchar **in,
2129 gchar **word)
2130 {
2131 const gchar *inptr = *in, *start;
2132
2133 header_decode_lwsp (&inptr);
2134 start = inptr;
2135
2136 if (!strncmp (inptr, "=?", 2)) {
2137 inptr += 2;
2138
2139 /* skip past the charset (if one is even declared, sigh) */
2140 while (*inptr && *inptr != '?') {
2141 inptr++;
2142 }
2143
2144 /* sanity check encoding type */
2145 if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || !inptr[1] || inptr[2] != '?')
2146 return FALSE;
2147
2148 inptr += 3;
2149
2150 /* find the end of the rfc2047 encoded word token */
2151 while (*inptr && strncmp (inptr, "?=", 2) != 0) {
2152 inptr++;
2153 }
2154
2155 if (!strncmp (inptr, "?=", 2)) {
2156 inptr += 2;
2157
2158 *in = inptr;
2159 *word = g_strndup (start, inptr - start);
2160
2161 return TRUE;
2162 }
2163 }
2164
2165 return FALSE;
2166 }
2167
2168 static gchar *
header_decode_word(const gchar ** in)2169 header_decode_word (const gchar **in)
2170 {
2171 const gchar *inptr = *in;
2172 gchar *word = NULL;
2173
2174 header_decode_lwsp (&inptr);
2175 *in = inptr;
2176
2177 if (*inptr == '"') {
2178 return header_decode_quoted_string (in);
2179 } else if (*inptr == '=' && inptr[1] == '?' && extract_rfc2047_encoded_word (in, &word) && word) {
2180 return word;
2181 } else {
2182 return header_decode_atom (in);
2183 }
2184 }
2185
2186 static gchar *
header_decode_value(const gchar ** in)2187 header_decode_value (const gchar **in)
2188 {
2189 const gchar *inptr = *in;
2190
2191 header_decode_lwsp (&inptr);
2192 if (*inptr == '"') {
2193 d (printf ("decoding quoted string\n"));
2194 return header_decode_quoted_string (in);
2195 } else if (camel_mime_is_ttoken (*inptr)) {
2196 d (printf ("decoding token\n"));
2197 /* this may not have the right specials for all params? */
2198 return decode_token (in);
2199 }
2200 return NULL;
2201 }
2202
2203 /* should this return -1 for no int? */
2204
2205 /**
2206 * camel_header_decode_int:
2207 * @in: pointer to input string
2208 *
2209 * Extracts an integer token from @in and updates the pointer to point
2210 * to after the end of the integer token (sort of like strtol).
2211 *
2212 * Returns: the gint value
2213 **/
2214 gint
camel_header_decode_int(const gchar ** in)2215 camel_header_decode_int (const gchar **in)
2216 {
2217 const gchar *inptr = *in;
2218 gint c, v = 0;
2219
2220 header_decode_lwsp (&inptr);
2221 while ( (c=*inptr++ & 0xff)
2222 && isdigit (c) ) {
2223 v = v * 10 + (c - '0');
2224 }
2225 *in = inptr-1;
2226 return v;
2227 }
2228
2229 #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
2230
2231 static gchar *
hex_decode(const gchar * in,gsize len)2232 hex_decode (const gchar *in,
2233 gsize len)
2234 {
2235 const guchar *inend = (const guchar *) (in + len);
2236 guchar *inptr, *outptr;
2237 gchar *outbuf;
2238
2239 outbuf = (gchar *) g_malloc (len + 1);
2240 outptr = (guchar *) outbuf;
2241
2242 inptr = (guchar *) in;
2243 while (inptr < inend) {
2244 if (*inptr == '%') {
2245 if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
2246 *outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
2247 inptr += 3;
2248 } else
2249 *outptr++ = *inptr++;
2250 } else
2251 *outptr++ = *inptr++;
2252 }
2253
2254 *outptr = '\0';
2255
2256 return outbuf;
2257 }
2258
2259 /* Tries to convert @in @from charset @to charset. Any failure, we get no data out rather than partial conversion */
2260 static gchar *
header_convert(const gchar * to,const gchar * from,const gchar * in,gsize inlen)2261 header_convert (const gchar *to,
2262 const gchar *from,
2263 const gchar *in,
2264 gsize inlen)
2265 {
2266 GIConv ic;
2267 gsize outlen, ret;
2268 gchar *outbuf, *outbase, *result = NULL;
2269
2270 ic = camel_iconv_open (to, from);
2271 if (ic == (GIConv) -1)
2272 return NULL;
2273
2274 outlen = inlen * 6 + 16;
2275 outbuf = outbase = g_malloc (outlen);
2276
2277 ret = camel_iconv (ic, &in, &inlen, &outbuf, &outlen);
2278 if (ret != (gsize) -1) {
2279 camel_iconv (ic, NULL, NULL, &outbuf, &outlen);
2280 *outbuf = '\0';
2281 result = g_strdup (outbase);
2282 }
2283 camel_iconv_close (ic);
2284 g_free (outbase);
2285
2286 return result;
2287 }
2288
2289 /* an rfc2184 encoded string looks something like:
2290 * us-ascii'en'This%20is%20even%20more%20
2291 */
2292
2293 static gchar *
rfc2184_decode(const gchar * in,gsize len)2294 rfc2184_decode (const gchar *in,
2295 gsize len)
2296 {
2297 const gchar *inptr = in;
2298 const gchar *inend = in + len;
2299 const gchar *charset;
2300 gchar *decoded, *decword, *encoding;
2301
2302 inptr = memchr (inptr, '\'', len);
2303 if (!inptr)
2304 return NULL;
2305
2306 encoding = g_alloca (inptr - in + 1);
2307 memcpy (encoding, in, inptr - in);
2308 encoding[inptr - in] = 0;
2309 charset = camel_iconv_charset_name (encoding);
2310
2311 inptr = memchr (inptr + 1, '\'', inend - inptr - 1);
2312 if (!inptr)
2313 return NULL;
2314 inptr++;
2315 if (inptr >= inend)
2316 return NULL;
2317
2318 decword = hex_decode (inptr, inend - inptr);
2319 decoded = header_convert ("UTF-8", charset, decword, strlen (decword));
2320 g_free (decword);
2321
2322 return decoded;
2323 }
2324
2325 /**
2326 * camel_header_param:
2327 * @params: parameters
2328 * @name: name of param to find
2329 *
2330 * Searches @params for a param named @name and gets the value.
2331 *
2332 * Returns: (transfer none): the value of the @name param
2333 **/
2334 gchar *
camel_header_param(struct _camel_header_param * params,const gchar * name)2335 camel_header_param (struct _camel_header_param *params,
2336 const gchar *name)
2337 {
2338 while (params && params->name &&
2339 g_ascii_strcasecmp (params->name, name) != 0)
2340 params = params->next;
2341 if (params)
2342 return params->value;
2343
2344 return NULL;
2345 }
2346
2347 /**
2348 * camel_header_set_param:
2349 * @paramsp: poinetr to a list of params
2350 * @name: name of param to set
2351 * @value: value to set
2352 *
2353 * Set a parameter in the list.
2354 *
2355 * Returns: (transfer none): the set param
2356 **/
2357 struct _camel_header_param *
camel_header_set_param(struct _camel_header_param ** l,const gchar * name,const gchar * value)2358 camel_header_set_param (struct _camel_header_param **l,
2359 const gchar *name,
2360 const gchar *value)
2361 {
2362 struct _camel_header_param *p = (struct _camel_header_param *) l, *pn;
2363
2364 if (name == NULL)
2365 return NULL;
2366
2367 while (p->next) {
2368 pn = p->next;
2369 if (!g_ascii_strcasecmp (pn->name, name)) {
2370 g_free (pn->value);
2371 if (value) {
2372 pn->value = g_strdup (value);
2373 return pn;
2374 } else {
2375 p->next = pn->next;
2376 g_free (pn->name);
2377 g_free (pn);
2378 return NULL;
2379 }
2380 }
2381 p = pn;
2382 }
2383
2384 if (value == NULL)
2385 return NULL;
2386
2387 pn = g_malloc (sizeof (*pn));
2388 pn->next = NULL;
2389 pn->name = g_strdup (name);
2390 pn->value = g_strdup (value);
2391 p->next = pn;
2392
2393 return pn;
2394 }
2395
2396 /**
2397 * camel_content_type_param:
2398 * @content_type: a #CamelContentType
2399 * @name: name of param to find
2400 *
2401 * Searches the params on s #CamelContentType for a param named @name
2402 * and gets the value.
2403 *
2404 * Returns: the value of the @name param
2405 **/
2406 const gchar *
camel_content_type_param(CamelContentType * t,const gchar * name)2407 camel_content_type_param (CamelContentType *t,
2408 const gchar *name)
2409 {
2410 if (t == NULL)
2411 return NULL;
2412 return camel_header_param (t->params, name);
2413 }
2414
2415 /**
2416 * camel_content_type_set_param:
2417 * @content_type: a #CamelContentType
2418 * @name: name of param to set
2419 * @value: value of param to set
2420 *
2421 * Set a parameter on @content_type.
2422 **/
2423 void
camel_content_type_set_param(CamelContentType * t,const gchar * name,const gchar * value)2424 camel_content_type_set_param (CamelContentType *t,
2425 const gchar *name,
2426 const gchar *value)
2427 {
2428 g_return_if_fail (t != NULL);
2429
2430 camel_header_set_param (&t->params, name, value);
2431 }
2432
2433 /**
2434 * camel_content_type_is:
2435 * @content_type: A content type specifier, or %NULL.
2436 * @type: A type to check against.
2437 * @subtype: A subtype to check against, or "*" to match any subtype.
2438 *
2439 * The subtype of "*" will match any subtype. If @ct is %NULL, then
2440 * it will match the type "text/plain".
2441 *
2442 * Returns: %TRUE if the content type @ct is of type @type/@subtype or
2443 * %FALSE otherwise
2444 **/
2445 gboolean
camel_content_type_is(const CamelContentType * ct,const gchar * type,const gchar * subtype)2446 camel_content_type_is (const CamelContentType *ct,
2447 const gchar *type,
2448 const gchar *subtype)
2449 {
2450 /* no type == text/plain or text/"*" */
2451 if (ct == NULL || (ct->type == NULL && ct->subtype == NULL)) {
2452 return (!g_ascii_strcasecmp (type, "text")
2453 && (!g_ascii_strcasecmp (subtype, "plain")
2454 || !strcmp (subtype, "*")));
2455 }
2456
2457 return (ct->type != NULL
2458 && (!g_ascii_strcasecmp (ct->type, type)
2459 && ((ct->subtype != NULL
2460 && !g_ascii_strcasecmp (ct->subtype, subtype))
2461 || !strcmp ("*", subtype))));
2462 }
2463
2464 /**
2465 * camel_header_param_list_free:
2466 * @params: a list of params
2467 *
2468 * Free the list of params.
2469 **/
2470 void
camel_header_param_list_free(struct _camel_header_param * p)2471 camel_header_param_list_free (struct _camel_header_param *p)
2472 {
2473 struct _camel_header_param *n;
2474
2475 while (p) {
2476 n = p->next;
2477 g_free (p->name);
2478 g_free (p->value);
2479 g_free (p);
2480 p = n;
2481 }
2482 }
2483
2484 /**
2485 * camel_content_type_new:
2486 * @type: the major type of the new content-type
2487 * @subtype: the subtype
2488 *
2489 * Create a new #CamelContentType.
2490 *
2491 * Returns: the new #CamelContentType
2492 **/
2493 CamelContentType *
camel_content_type_new(const gchar * type,const gchar * subtype)2494 camel_content_type_new (const gchar *type,
2495 const gchar *subtype)
2496 {
2497 CamelContentType *t;
2498
2499 t = g_slice_new (CamelContentType);
2500 t->type = g_strdup (type);
2501 t->subtype = g_strdup (subtype);
2502 t->params = NULL;
2503 t->refcount = 1;
2504
2505 return t;
2506 }
2507
2508 /**
2509 * camel_content_type_ref:
2510 * @content_type: a #CamelContentType
2511 *
2512 * Refs the content type.
2513 **/
2514 CamelContentType *
camel_content_type_ref(CamelContentType * ct)2515 camel_content_type_ref (CamelContentType *ct)
2516 {
2517 if (ct)
2518 ct->refcount++;
2519
2520 return ct;
2521 }
2522
2523 /**
2524 * camel_content_type_unref:
2525 * @content_type: a #CamelContentType
2526 *
2527 * Unrefs, and potentially frees, the content type.
2528 **/
2529 void
camel_content_type_unref(CamelContentType * ct)2530 camel_content_type_unref (CamelContentType *ct)
2531 {
2532 if (ct) {
2533 if (ct->refcount <= 1) {
2534 camel_header_param_list_free (ct->params);
2535 g_free (ct->type);
2536 g_free (ct->subtype);
2537 g_slice_free (CamelContentType, ct);
2538 ct = NULL;
2539 } else {
2540 ct->refcount--;
2541 }
2542 }
2543 }
2544
2545 /* for decoding email addresses, canonically */
2546 static gchar *
header_decode_domain(const gchar ** in)2547 header_decode_domain (const gchar **in)
2548 {
2549 const gchar *inptr = *in;
2550 gint go = TRUE;
2551 GString *domain = g_string_new ("");
2552
2553 /* domain ref | domain literal */
2554 header_decode_lwsp (&inptr);
2555 while (go) {
2556 if (*inptr == '[') { /* domain literal */
2557 g_string_append_c (domain, '[');
2558 inptr++;
2559 header_decode_lwsp (&inptr);
2560 while (*inptr && camel_mime_is_dtext (*inptr)) {
2561 g_string_append_c (domain, *inptr);
2562 inptr++;
2563 }
2564 if (*inptr == ']') {
2565 g_string_append_c (domain, ']');
2566 inptr++;
2567 } else {
2568 w (g_warning ("closing ']' not found in domain: %s", *in));
2569 }
2570 } else {
2571 gchar *a = header_decode_atom (&inptr);
2572 if (a) {
2573 g_string_append (domain, a);
2574 g_free (a);
2575 } else {
2576 w (g_warning ("missing atom from domain-ref"));
2577 break;
2578 }
2579 }
2580 header_decode_lwsp (&inptr);
2581 if (*inptr == '.') { /* next sub-domain? */
2582 g_string_append_c (domain, '.');
2583 inptr++;
2584 header_decode_lwsp (&inptr);
2585 } else
2586 go = FALSE;
2587 }
2588
2589 *in = inptr;
2590
2591 return g_string_free (domain, FALSE);
2592 }
2593
2594 static gchar *
header_decode_addrspec(const gchar ** in)2595 header_decode_addrspec (const gchar **in)
2596 {
2597 const gchar *inptr = *in;
2598 gchar *word;
2599 GString *addr = g_string_new ("");
2600
2601 header_decode_lwsp (&inptr);
2602
2603 /* addr-spec */
2604 word = header_decode_word (&inptr);
2605 if (word) {
2606 g_string_append (addr, word);
2607 header_decode_lwsp (&inptr);
2608 g_free (word);
2609 while (*inptr == '.' && word) {
2610 inptr++;
2611 g_string_append_c (addr, '.');
2612 word = header_decode_word (&inptr);
2613 if (word) {
2614 g_string_append (addr, word);
2615 header_decode_lwsp (&inptr);
2616 g_free (word);
2617 } else {
2618 w (g_warning ("Invalid address spec: %s", *in));
2619 }
2620 }
2621 if (*inptr == '@') {
2622 inptr++;
2623 g_string_append_c (addr, '@');
2624 word = header_decode_domain (&inptr);
2625 if (word) {
2626 g_string_append (addr, word);
2627 g_free (word);
2628 } else {
2629 w (g_warning ("Invalid address, missing domain: %s", *in));
2630 }
2631 } else {
2632 w (g_warning ("Invalid addr-spec, missing @: %s", *in));
2633 }
2634 } else {
2635 w (g_warning ("invalid addr-spec, no local part"));
2636 g_string_free (addr, TRUE);
2637
2638 return NULL;
2639 }
2640
2641 /* FIXME: return null on error? */
2642
2643 *in = inptr;
2644 return g_string_free (addr, FALSE);
2645 }
2646
2647 /*
2648 * address:
2649 * word *('.' word) @ domain |
2650 * *(word) '<' [ *('@' domain ) ':' ] word *( '.' word) @ domain |
2651 *
2652 * 1 * word ':'[ word ... etc (mailbox, as above) ] ';'
2653 */
2654
2655 /* mailbox:
2656 * word *( '.' word ) '@' domain
2657 * *(word) '<' [ *('@' domain ) ':' ] word *( '.' word) @ domain
2658 * */
2659
2660 static CamelHeaderAddress *
header_decode_mailbox(const gchar ** in,const gchar * charset)2661 header_decode_mailbox (const gchar **in,
2662 const gchar *charset)
2663 {
2664 const gchar *inptr = *in;
2665 gchar *pre;
2666 gint closeme = FALSE;
2667 GString *addr;
2668 GString *name = NULL;
2669 CamelHeaderAddress *address = NULL;
2670 const gchar *comment = NULL;
2671
2672 addr = g_string_new ("");
2673
2674 start:
2675 /* for each address */
2676 pre = header_decode_word (&inptr);
2677 header_decode_lwsp (&inptr);
2678 if (!(*inptr == '.' || *inptr == '@' || *inptr == ',' || *inptr == '\0')) {
2679 /* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */
2680 if (!name)
2681 name = g_string_new ("");
2682 while (pre) {
2683 gchar *text, *last;
2684
2685 /* perform internationalised decoding, and append */
2686 text = header_decode_text (pre, FALSE, charset);
2687 g_string_append (name, text);
2688 last = pre;
2689 g_free (text);
2690
2691 pre = header_decode_word (&inptr);
2692 if (pre) {
2693 gsize l = strlen (last);
2694 gsize p = strlen (pre);
2695
2696 /* dont append ' ' between sucsessive encoded words */
2697 if ((l > 6 && last[l - 2] == '?' && last[l - 1] == '=')
2698 && (p > 6 && pre[0] == '=' && pre[1] == '?')) {
2699 /* dont append ' ' */
2700 } else {
2701 g_string_append_c (name, ' ');
2702 }
2703 } else {
2704 /* Fix for stupidly-broken-mailers that like to put '.''s in names unquoted */
2705 /* see bug #8147 */
2706 while (!pre && *inptr && *inptr != '<') {
2707 w (g_warning ("Working around stupid mailer bug #5: unescaped characters in names"));
2708 g_string_append_c (name, *inptr++);
2709 pre = header_decode_word (&inptr);
2710 }
2711 }
2712 g_free (last);
2713 }
2714 header_decode_lwsp (&inptr);
2715 if (*inptr == '<') {
2716 closeme = TRUE;
2717 try_address_again:
2718 inptr++;
2719 header_decode_lwsp (&inptr);
2720 if (*inptr == '@') {
2721 while (*inptr == '@') {
2722 inptr++;
2723 header_decode_domain (&inptr);
2724 header_decode_lwsp (&inptr);
2725 if (*inptr == ',') {
2726 inptr++;
2727 header_decode_lwsp (&inptr);
2728 }
2729 }
2730 if (*inptr == ':') {
2731 inptr++;
2732 } else {
2733 w (g_warning ("broken route-address, missing ':': %s", *in));
2734 }
2735 }
2736 pre = header_decode_word (&inptr);
2737 /*header_decode_lwsp(&inptr);*/
2738 } else {
2739 w (g_warning ("broken address? %s", *in));
2740 }
2741 }
2742
2743 if (pre) {
2744 g_string_append (addr, pre);
2745 } else {
2746 w (g_warning ("No local-part for email address: %s", *in));
2747 }
2748
2749 /* should be at word '.' localpart */
2750 while (*inptr == '.' && pre) {
2751 inptr++;
2752 g_free (pre);
2753 pre = header_decode_word (&inptr);
2754 g_string_append_c (addr, '.');
2755 if (pre)
2756 g_string_append (addr, pre);
2757 comment = inptr;
2758 header_decode_lwsp (&inptr);
2759 }
2760 g_free (pre);
2761
2762 /* now at '@' domain part */
2763 if (*inptr == '@') {
2764 gchar *dom;
2765
2766 inptr++;
2767 g_string_append_c (addr, '@');
2768 comment = inptr;
2769 dom = header_decode_domain (&inptr);
2770 g_string_append (addr, dom);
2771 g_free (dom);
2772 } else if (*inptr != '>' || !closeme) {
2773 /* If we get a <, the address was probably a name part, lets try again shall we? */
2774 /* Another fix for seriously-broken-mailers */
2775 if (*inptr && *inptr != ',') {
2776 gchar *text;
2777 const gchar *name_part;
2778 gboolean in_quote;
2779
2780 w (g_warning ("We didn't get an '@' where we expected in '%s', trying again", *in));
2781 w (g_warning ("Name is '%s', Addr is '%s' we're at '%s'\n", name ? name->str:"<UNSET>", addr->str, inptr));
2782
2783 /* need to keep *inptr, as try_address_again will drop the current character */
2784 if (*inptr == '<')
2785 closeme = TRUE;
2786 else
2787 g_string_append_c (addr, *inptr);
2788
2789 name_part = *in;
2790 in_quote = FALSE;
2791 while (*name_part && *name_part != ',') {
2792 if (*name_part == '\"')
2793 in_quote = !in_quote;
2794 else if (!in_quote && *name_part == '<')
2795 break;
2796 name_part++;
2797 }
2798
2799 if (*name_part == '<' && ((!strchr (name_part, ',') && strchr (name_part, '>')) || (strchr (name_part, ',') > strchr (name_part, '>')))) {
2800 /* it's of a form "display-name <addr-spec>" */
2801 if (name)
2802 g_string_free (name, TRUE);
2803 name = NULL;
2804 g_string_free (addr, TRUE);
2805
2806 if (name_part == *in)
2807 addr = g_string_new ("");
2808 else
2809 addr = g_string_new_len (*in, name_part - *in - (camel_mime_is_lwsp (name_part[-1]) ? 1 : 0));
2810 }
2811
2812 /* check for address is encoded word ... */
2813 text = header_decode_text (addr->str, FALSE, charset);
2814 if (name == NULL) {
2815 name = addr;
2816 addr = g_string_new ("");
2817 if (text) {
2818 g_string_truncate (name, 0);
2819 g_string_append (name, text);
2820 }
2821 }/* else {
2822 g_string_append (name, text ? text : addr->str);
2823 g_string_truncate (addr, 0);
2824 }*/
2825 g_free (text);
2826
2827 /* or maybe that we've added up a bunch of broken bits to make an encoded word */
2828 if ((text = rfc2047_decode_word (name->str, name->len, charset))) {
2829 g_string_truncate (name, 0);
2830 g_string_append (name, text);
2831 g_free (text);
2832 }
2833
2834 goto try_address_again;
2835 }
2836 w (g_warning ("invalid address, no '@' domain part at %c: %s", *inptr, *in));
2837 }
2838
2839 if (closeme) {
2840 header_decode_lwsp (&inptr);
2841 if (*inptr == '>') {
2842 inptr++;
2843 } else {
2844 w (g_warning ("invalid route address, no closing '>': %s", *in));
2845 }
2846 } else if (name == NULL && comment != NULL && inptr>comment) { /* check for comment after address */
2847 gchar *text, *tmp;
2848 const gchar *comstart, *comend;
2849
2850 /* this is a bit messy, we go from the last known position, because
2851 * decode_domain/etc skip over any comments on the way */
2852 /* FIXME: This wont detect comments inside the domain itself,
2853 * but nobody seems to use that feature anyway ... */
2854
2855 d (printf ("checking for comment from '%s'\n", comment));
2856
2857 comstart = strchr (comment, '(');
2858 if (comstart) {
2859 comstart++;
2860 header_decode_lwsp (&inptr);
2861 comend = inptr - 1;
2862 while (comend > comstart && comend[0] != ')')
2863 comend--;
2864
2865 if (comend > comstart) {
2866 d (printf (" looking at subset '%.*s'\n", comend - comstart, comstart));
2867 tmp = g_strndup (comstart, comend - comstart);
2868 text = header_decode_text (tmp, FALSE, charset);
2869 name = g_string_new (text);
2870 g_free (tmp);
2871 g_free (text);
2872 }
2873 }
2874 }
2875
2876 header_decode_lwsp (&inptr);
2877
2878 if (*inptr && *inptr != ',') {
2879 if (addr->len > 0) {
2880 if (!name) {
2881 name = g_string_sized_new (addr->len + 5);
2882 } else {
2883 g_string_append_c (name, ' ');
2884 }
2885
2886 g_string_append_c (name, '<');
2887 g_string_append (name, addr->str);
2888 g_string_append_c (name, '>');
2889 g_string_append_c (name, ' ');
2890
2891 g_string_truncate (addr, 0);
2892 }
2893
2894 goto start;
2895 }
2896
2897 *in = inptr;
2898
2899 if (name) {
2900 /* Trim any trailing spaces */
2901 while (name->len > 0 && name->str[name->len - 1] == ' ') {
2902 g_string_truncate (name, name->len - 1);
2903 }
2904 }
2905
2906 if (addr->len > 0) {
2907 if (!g_utf8_validate (addr->str, addr->len, NULL)) {
2908 /* workaround for invalid addr-specs containing 8bit chars (see bug #42170 for details) */
2909 const gchar *locale_charset;
2910 GString *out;
2911
2912 locale_charset = camel_iconv_locale_charset ();
2913
2914 out = g_string_new ("");
2915
2916 if ((charset == NULL || !append_8bit (out, addr->str, addr->len, charset))
2917 && (locale_charset == NULL || !append_8bit (out, addr->str, addr->len, locale_charset)))
2918 append_latin1 (out, addr->str, addr->len);
2919
2920 g_string_free (addr, TRUE);
2921 addr = out;
2922 }
2923
2924 if (!name) {
2925 gchar *text;
2926
2927 text = rfc2047_decode_word (addr->str, addr->len, charset);
2928 if (text) {
2929 g_string_truncate (addr, 0);
2930 g_string_append (addr, text);
2931 g_free (text);
2932
2933 make_string_utf8_valid (addr->str, addr->len);
2934 }
2935
2936 } else {
2937 make_string_utf8_valid (name->str, name->len);
2938 }
2939
2940 address = camel_header_address_new_name (name ? name->str : "", addr->str);
2941 } else if (name) {
2942 /* A name-only address, might be something wrong, but include it anyway */
2943 make_string_utf8_valid (name->str, name->len);
2944 address = camel_header_address_new_name (name->str, "");
2945 }
2946
2947 d (printf ("got mailbox: %s\n", addr->str));
2948
2949 g_string_free (addr, TRUE);
2950 if (name)
2951 g_string_free (name, TRUE);
2952
2953 return address;
2954 }
2955
2956 static CamelHeaderAddress *
header_decode_address(const gchar ** in,const gchar * charset)2957 header_decode_address (const gchar **in,
2958 const gchar *charset)
2959 {
2960 const gchar *inptr = *in;
2961 gchar *pre;
2962 GString *group = g_string_new ("");
2963 CamelHeaderAddress *addr = NULL, *member;
2964
2965 /* pre-scan, trying to work out format, discard results */
2966 header_decode_lwsp (&inptr);
2967 while ((pre = header_decode_word (&inptr))) {
2968 g_string_append (group, pre);
2969 g_string_append_c (group, ' ');
2970 g_free (pre);
2971 }
2972 header_decode_lwsp (&inptr);
2973 if (*inptr == ':') {
2974 d (printf ("group detected: %s\n", group->str));
2975 addr = camel_header_address_new_group (group->str);
2976 /* that was a group spec, scan mailbox's */
2977 inptr++;
2978 /* FIXME: check rfc 2047 encodings of words, here or above in the loop */
2979 header_decode_lwsp (&inptr);
2980 if (*inptr != ';') {
2981 gint go = TRUE;
2982 do {
2983 member = header_decode_mailbox (&inptr, charset);
2984 if (member)
2985 camel_header_address_add_member (addr, member);
2986 header_decode_lwsp (&inptr);
2987 if (*inptr == ',')
2988 inptr++;
2989 else
2990 go = FALSE;
2991 } while (go);
2992 if (*inptr == ';') {
2993 inptr++;
2994 } else {
2995 w (g_warning ("Invalid group spec, missing closing ';': %s", *in));
2996 }
2997 } else {
2998 inptr++;
2999 }
3000 *in = inptr;
3001 } else {
3002 addr = header_decode_mailbox (in, charset);
3003 }
3004
3005 g_string_free (group, TRUE);
3006
3007 return addr;
3008 }
3009
3010 static gchar *
header_msgid_decode_internal(const gchar ** in)3011 header_msgid_decode_internal (const gchar **in)
3012 {
3013 const gchar *inptr = *in;
3014 gchar *msgid = NULL;
3015
3016 d (printf ("decoding Message-ID: '%s'\n", *in));
3017
3018 header_decode_lwsp (&inptr);
3019 if (*inptr == '<') {
3020 inptr++;
3021 header_decode_lwsp (&inptr);
3022 msgid = header_decode_addrspec (&inptr);
3023 if (msgid) {
3024 header_decode_lwsp (&inptr);
3025 if (*inptr == '>') {
3026 inptr++;
3027 } else {
3028 w (g_warning ("Missing closing '>' on message id: %s", *in));
3029 }
3030 } else {
3031 w (g_warning ("Cannot find message id in: %s", *in));
3032 }
3033 } else {
3034 w (g_warning ("missing opening '<' on message id: %s", *in));
3035 }
3036 *in = inptr;
3037
3038 return msgid;
3039 }
3040
3041 /**
3042 * camel_header_msgid_decode:
3043 * @in: input string
3044 *
3045 * Extract a message-id token from @in.
3046 *
3047 * Returns: the msg-id
3048 **/
3049 gchar *
camel_header_msgid_decode(const gchar * in)3050 camel_header_msgid_decode (const gchar *in)
3051 {
3052 if (in == NULL)
3053 return NULL;
3054
3055 return header_msgid_decode_internal (&in);
3056 }
3057
3058 /**
3059 * camel_header_contentid_decode:
3060 * @in: input string
3061 *
3062 * Extract a content-id from @in.
3063 *
3064 * Returns: the extracted content-id
3065 **/
3066 gchar *
camel_header_contentid_decode(const gchar * in)3067 camel_header_contentid_decode (const gchar *in)
3068 {
3069 const gchar *inptr = in;
3070 gboolean at = FALSE;
3071 GString *addr;
3072 gchar *buf;
3073
3074 d (printf ("decoding Content-ID: '%s'\n", in));
3075
3076 header_decode_lwsp (&inptr);
3077
3078 /* some lame mailers quote the Content-Id */
3079 if (*inptr == '"')
3080 inptr++;
3081
3082 /* make sure the content-id is not "" which can happen if we get a
3083 * content-id such as <.@> (which Eudora likes to use...) */
3084 if ((buf = camel_header_msgid_decode (inptr)) != NULL && *buf)
3085 return buf;
3086
3087 g_free (buf);
3088
3089 /* ugh, not a valid msg-id - try to get something useful out of it then? */
3090 inptr = in;
3091 header_decode_lwsp (&inptr);
3092 if (*inptr == '<') {
3093 inptr++;
3094 header_decode_lwsp (&inptr);
3095 }
3096
3097 /* Eudora has been known to use <.@> as a content-id */
3098 if (!(buf = header_decode_word (&inptr)) && (*inptr == '\0' || !strchr (".@", *inptr)))
3099 return NULL;
3100
3101 addr = g_string_new ("");
3102 header_decode_lwsp (&inptr);
3103 while (buf != NULL || *inptr == '.' || (*inptr == '@' && !at)) {
3104 if (buf != NULL) {
3105 g_string_append (addr, buf);
3106 g_free (buf);
3107 buf = NULL;
3108 }
3109
3110 if (!at) {
3111 if (*inptr == '.') {
3112 g_string_append_c (addr, *inptr++);
3113 buf = header_decode_word (&inptr);
3114 } else if (*inptr == '@') {
3115 g_string_append_c (addr, *inptr++);
3116 buf = header_decode_word (&inptr);
3117 at = TRUE;
3118 }
3119 } else if (*inptr != '\0' && strchr (".[]", *inptr)) {
3120 g_string_append_c (addr, *inptr++);
3121 buf = header_decode_atom (&inptr);
3122 }
3123
3124 header_decode_lwsp (&inptr);
3125 }
3126
3127 return g_string_free (addr, FALSE);
3128 }
3129
3130 static void
header_references_decode_single(const gchar ** in,GSList ** list)3131 header_references_decode_single (const gchar **in, GSList **list)
3132 {
3133 const gchar *inptr = *in;
3134 GString *accum_word = NULL;
3135 gchar *id, *word;
3136
3137 while (*inptr) {
3138 header_decode_lwsp (&inptr);
3139 if (*inptr == '<') {
3140 id = header_msgid_decode_internal (&inptr);
3141 if (id) {
3142 *list = g_slist_prepend (*list, id);
3143 break;
3144 }
3145 } else {
3146 word = header_decode_word (&inptr);
3147 if (word) {
3148 /* To support broken clients, which do not enclose message IDs into angle brackets, as
3149 required in the RFC 2822: https://tools.ietf.org/html/rfc2822#section-3.6.4 */
3150 if (!*inptr || camel_mime_is_lwsp (*inptr)) {
3151 if (accum_word) {
3152 g_string_append (accum_word, word);
3153 *list = g_slist_prepend (*list, g_string_free (accum_word, FALSE));
3154 accum_word = NULL;
3155 } else {
3156 *list = g_slist_prepend (*list, word);
3157 word = NULL;
3158 }
3159 } else {
3160 if (accum_word)
3161 g_string_append (accum_word, word);
3162 else
3163 accum_word = g_string_new (word);
3164
3165 g_string_append_c (accum_word, *inptr);
3166 }
3167 g_free (word);
3168 } else if (*inptr != '\0')
3169 inptr++; /* Stupid mailer tricks */
3170 }
3171 }
3172
3173 if (accum_word)
3174 *list = g_slist_prepend (*list, g_string_free (accum_word, FALSE));
3175
3176 *in = inptr;
3177 }
3178
3179 /**
3180 * camel_header_references_decode:
3181 * @in: References header value
3182 *
3183 * Generate a list of references, from most recent up.
3184 *
3185 * Returns: (element-type utf8) (transfer full): a list of references decoedd from @in
3186 **/
3187 GSList *
camel_header_references_decode(const gchar * in)3188 camel_header_references_decode (const gchar *in)
3189 {
3190 GSList *refs = NULL;
3191
3192 if (in == NULL || in[0] == '\0')
3193 return NULL;
3194
3195 while (*in)
3196 header_references_decode_single (&in, &refs);
3197
3198 return refs;
3199 }
3200
3201 CamelHeaderAddress *
camel_header_mailbox_decode(const gchar * in,const gchar * charset)3202 camel_header_mailbox_decode (const gchar *in,
3203 const gchar *charset)
3204 {
3205 if (in == NULL)
3206 return NULL;
3207
3208 return header_decode_mailbox (&in, charset);
3209 }
3210
3211 CamelHeaderAddress *
camel_header_address_decode(const gchar * in,const gchar * charset)3212 camel_header_address_decode (const gchar *in,
3213 const gchar *charset)
3214 {
3215 const gchar *inptr = in, *last;
3216 CamelHeaderAddress *list = NULL, *addr;
3217
3218 d (printf ("decoding To: '%s'\n", in));
3219
3220 if (in == NULL)
3221 return NULL;
3222
3223 header_decode_lwsp (&inptr);
3224 if (*inptr == 0)
3225 return NULL;
3226
3227 do {
3228 last = inptr;
3229 addr = header_decode_address (&inptr, charset);
3230 if (addr)
3231 camel_header_address_list_append (&list, addr);
3232 header_decode_lwsp (&inptr);
3233 if (*inptr == ',')
3234 inptr++;
3235 else
3236 break;
3237 } while (inptr != last);
3238
3239 if (*inptr) {
3240 w (g_warning ("Invalid input detected at %c (%d): '%s'\n or at: '%s'", *inptr, (gint) (inptr - in), in, inptr));
3241 }
3242
3243 if (inptr == last) {
3244 w (g_warning ("detected invalid input loop at : '%s' for '%s'", last, in));
3245 }
3246
3247 return list;
3248 }
3249
3250 /**
3251 * camel_header_newsgroups_decode:
3252 * @in:
3253 *
3254 * Returns: (element-type utf8) (transfer full):
3255 **/
3256 GSList *
camel_header_newsgroups_decode(const gchar * in)3257 camel_header_newsgroups_decode (const gchar *in)
3258 {
3259 const gchar *inptr = in;
3260 register gchar c;
3261 GSList *list = NULL;
3262 const gchar *start;
3263
3264 do {
3265 header_decode_lwsp (&inptr);
3266 start = inptr;
3267 while ((c = *inptr++) && !camel_mime_is_lwsp (c) && c != ',')
3268 ;
3269 if (start != inptr - 1) {
3270 list = g_slist_prepend (list, g_strndup (start, inptr - start - 1));
3271 }
3272 } while (c);
3273
3274 return list;
3275 }
3276
3277 /* this must be kept in sync with the header */
3278 static const gchar *encodings[] = {
3279 "",
3280 "7bit",
3281 "8bit",
3282 "base64",
3283 "quoted-printable",
3284 "binary",
3285 "x-uuencode",
3286 };
3287
3288 const gchar *
camel_transfer_encoding_to_string(CamelTransferEncoding encoding)3289 camel_transfer_encoding_to_string (CamelTransferEncoding encoding)
3290 {
3291 if (encoding >= G_N_ELEMENTS (encodings))
3292 encoding = 0;
3293
3294 return encodings[encoding];
3295 }
3296
3297 CamelTransferEncoding
camel_transfer_encoding_from_string(const gchar * string)3298 camel_transfer_encoding_from_string (const gchar *string)
3299 {
3300 gint i;
3301
3302 if (string != NULL) {
3303 for (i = 0; i < G_N_ELEMENTS (encodings); i++)
3304 if (!g_ascii_strcasecmp (string, encodings[i]))
3305 return i;
3306
3307 if (!g_ascii_strcasecmp (string, "uuencode"))
3308 return CAMEL_TRANSFER_ENCODING_UUENCODE;
3309 }
3310
3311 return CAMEL_TRANSFER_ENCODING_DEFAULT;
3312 }
3313
3314 void
camel_header_mime_decode(const gchar * in,gint * maj,gint * min)3315 camel_header_mime_decode (const gchar *in,
3316 gint *maj,
3317 gint *min)
3318 {
3319 const gchar *inptr = in;
3320 gint major=-1, minor=-1;
3321
3322 d (printf ("decoding MIME-Version: '%s'\n", in));
3323
3324 if (in != NULL) {
3325 header_decode_lwsp (&inptr);
3326 if (isdigit (*inptr)) {
3327 major = camel_header_decode_int (&inptr);
3328 header_decode_lwsp (&inptr);
3329 if (*inptr == '.') {
3330 inptr++;
3331 header_decode_lwsp (&inptr);
3332 if (isdigit (*inptr))
3333 minor = camel_header_decode_int (&inptr);
3334 }
3335 }
3336 }
3337
3338 if (maj)
3339 *maj = major;
3340 if (min)
3341 *min = minor;
3342
3343 d (printf ("major = %d, minor = %d\n", major, minor));
3344 }
3345
3346 struct _rfc2184_param {
3347 struct _camel_header_param param;
3348 gint index;
3349 };
3350
3351 static gint
rfc2184_param_cmp(gconstpointer ap,gconstpointer bp)3352 rfc2184_param_cmp (gconstpointer ap,
3353 gconstpointer bp)
3354 {
3355 const struct _rfc2184_param *a = *(gpointer *) ap;
3356 const struct _rfc2184_param *b = *(gpointer *) bp;
3357 gint res;
3358
3359 res = strcmp (a->param.name, b->param.name);
3360 if (res == 0) {
3361 if (a->index > b->index)
3362 res = 1;
3363 else if (a->index < b->index)
3364 res = -1;
3365 }
3366
3367 return res;
3368 }
3369
3370 /* NB: Steals name and value */
3371 static struct _camel_header_param *
header_append_param(struct _camel_header_param * last,gchar * name,gchar * value)3372 header_append_param (struct _camel_header_param *last,
3373 gchar *name,
3374 gchar *value)
3375 {
3376 struct _camel_header_param *node;
3377
3378 /* This handles -
3379 * 8 bit data in parameters, illegal, tries to convert using locale, or just safens it up.
3380 * rfc2047 ecoded parameters, illegal, decodes them anyway. Some Outlook & Mozilla do this?
3381 */
3382 node = g_malloc (sizeof (*node));
3383 last->next = node;
3384 node->next = NULL;
3385 node->name = name;
3386 if (strncmp (value, "=?", 2) == 0
3387 && (node->value = header_decode_text (value, FALSE, NULL))) {
3388 g_free (value);
3389 } else if (g_ascii_strcasecmp (name, "boundary") != 0 && !g_utf8_validate (value, -1, NULL)) {
3390 const gchar *charset = camel_iconv_locale_charset ();
3391
3392 if ((node->value = header_convert ("UTF-8", charset ? charset:"ISO-8859-1", value, strlen (value)))) {
3393 g_free (value);
3394 } else {
3395 node->value = value;
3396 for (;*value; value++)
3397 if (!isascii ((guchar) * value))
3398 *value = '_';
3399 }
3400 } else
3401 node->value = value;
3402
3403 return node;
3404 }
3405
3406 static struct _camel_header_param *
header_decode_param_list(const gchar ** in)3407 header_decode_param_list (const gchar **in)
3408 {
3409 struct _camel_header_param *head = NULL, *last = (struct _camel_header_param *) &head;
3410 GPtrArray *split = NULL;
3411 const gchar *inptr = *in;
3412 struct _rfc2184_param *work;
3413 gchar *tmp;
3414
3415 /* Dump parameters into the output list, in the order found. RFC 2184 split parameters are kept in an array */
3416 header_decode_lwsp (&inptr);
3417 while (*inptr == ';') {
3418 gchar *name;
3419 gchar *value = NULL;
3420
3421 inptr++;
3422 name = decode_token (&inptr);
3423 header_decode_lwsp (&inptr);
3424 if (*inptr == '=') {
3425 inptr++;
3426 value = header_decode_value (&inptr);
3427 }
3428
3429 if (name && value) {
3430 gchar *index = strchr (name, '*');
3431
3432 if (index) {
3433 if (index[1] == 0) {
3434 /* VAL*="foo", decode immediately and append */
3435 *index = 0;
3436 tmp = rfc2184_decode (value, strlen (value));
3437 if (tmp) {
3438 g_free (value);
3439 value = tmp;
3440 }
3441 last = header_append_param (last, name, value);
3442 } else {
3443 /* VAL*1="foo", save for later */
3444 *index++ = 0;
3445 work = g_malloc (sizeof (*work));
3446 work->param.name = name;
3447 work->param.value = value;
3448 work->index = atoi (index);
3449 if (split == NULL)
3450 split = g_ptr_array_new ();
3451 g_ptr_array_add (split, work);
3452 }
3453 } else {
3454 last = header_append_param (last, name, value);
3455 }
3456 } else {
3457 g_free (name);
3458 g_free (value);
3459 }
3460
3461 header_decode_lwsp (&inptr);
3462 }
3463
3464 /* Rejoin any RFC 2184 split parameters in the proper order */
3465 /* Parameters with the same index will be concatenated in undefined order */
3466 if (split) {
3467 GString *value = g_string_new ("");
3468 struct _rfc2184_param *first;
3469 gint i;
3470
3471 qsort (split->pdata, split->len, sizeof (split->pdata[0]), rfc2184_param_cmp);
3472 first = split->pdata[0];
3473 for (i = 0; i < split->len; i++) {
3474 work = split->pdata[i];
3475 if (split->len - 1 == i)
3476 g_string_append (value, work->param.value);
3477 if (split->len - 1 == i || strcmp (work->param.name, first->param.name) != 0) {
3478 tmp = rfc2184_decode (value->str, value->len);
3479 if (tmp == NULL)
3480 tmp = g_strdup (value->str);
3481
3482 last = header_append_param (last, g_strdup (first->param.name), tmp);
3483 g_string_truncate (value, 0);
3484 first = work;
3485 }
3486 if (split->len - 1 != i)
3487 g_string_append (value, work->param.value);
3488 }
3489 g_string_free (value, TRUE);
3490 for (i = 0; i < split->len; i++) {
3491 work = split->pdata[i];
3492 g_free (work->param.name);
3493 g_free (work->param.value);
3494 g_free (work);
3495 }
3496 g_ptr_array_free (split, TRUE);
3497 }
3498
3499 *in = inptr;
3500
3501 return head;
3502 }
3503
3504 /**
3505 * camel_header_param_list_decode:
3506 * @in: (nullable): a header param value to decode
3507 *
3508 * Returns: (nullable) (transfer full): Decode list of parameters.
3509 * Free with camel_header_param_list_free() when done with it.
3510 **/
3511 struct _camel_header_param *
camel_header_param_list_decode(const gchar * in)3512 camel_header_param_list_decode (const gchar *in)
3513 {
3514 if (in == NULL)
3515 return NULL;
3516
3517 return header_decode_param_list (&in);
3518 }
3519
3520 static gchar *
header_encode_param(const guchar * in,gboolean * encoded,gboolean is_filename)3521 header_encode_param (const guchar *in,
3522 gboolean *encoded,
3523 gboolean is_filename)
3524 {
3525 const guchar *inptr = in;
3526 guchar *outbuf = NULL;
3527 const gchar *charset;
3528 GString *out;
3529 guint32 c;
3530
3531 *encoded = FALSE;
3532
3533 g_return_val_if_fail (in != NULL, NULL);
3534
3535 if (is_filename) {
3536 gchar *str;
3537 if (!g_utf8_validate ((gchar *) inptr, -1, NULL)) {
3538 GString *buff = g_string_new ("");
3539
3540 for (; inptr && *inptr; inptr++) {
3541 if (*inptr < 32)
3542 g_string_append_printf (buff, "%%%02X", (*inptr) & 0xFF);
3543 else
3544 g_string_append_c (buff, *inptr);
3545 }
3546
3547 outbuf = (guchar *) g_string_free (buff, FALSE);
3548 inptr = outbuf;
3549 }
3550
3551 /* do not set encoded flag for file names */
3552 str = header_encode_string_rfc2047 (inptr, TRUE);
3553 g_free (outbuf);
3554
3555 return str;
3556 }
3557
3558 /* if we have really broken utf8 passed in, we just treat it as binary data */
3559
3560 charset = camel_charset_best ((gchar *) in, strlen ((gchar *) in));
3561 if (charset == NULL) {
3562 return g_strdup ((gchar *) in);
3563 }
3564
3565 if (g_ascii_strcasecmp (charset, "UTF-8") != 0) {
3566 if ((outbuf = (guchar *) header_convert (charset, "UTF-8", (const gchar *) in, strlen ((gchar *) in))))
3567 inptr = outbuf;
3568 else
3569 return g_strdup ((gchar *) in);
3570 }
3571
3572 /* FIXME: set the 'language' as well, assuming we can get that info...? */
3573 out = g_string_new (charset);
3574 g_string_append (out, "''");
3575
3576 while ((c = *inptr++)) {
3577 if (camel_mime_is_attrchar (c))
3578 g_string_append_c (out, c);
3579 else
3580 g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
3581 }
3582 g_free (outbuf);
3583 *encoded = TRUE;
3584
3585 return g_string_free (out, FALSE);
3586 }
3587
3588 /* HACK: Set to non-zero when you want the 'filename' and 'name' headers encoded in RFC 2047 way,
3589 * otherwise they will be encoded in the correct RFC 2231 way. It's because Outlook and GMail
3590 * do not understand the correct standard and refuse attachments with localized name sent
3591 * from evolution. This seems to have been fixed in Exchange 2007 at least - not sure about
3592 * standalone Outlook. */
3593 gint camel_header_param_encode_filenames_in_rfc_2047 = 0;
3594
3595 void
camel_header_param_list_format_append(GString * out,struct _camel_header_param * p)3596 camel_header_param_list_format_append (GString *out,
3597 struct _camel_header_param *p)
3598 {
3599 gint used = out->len;
3600
3601 while (p) {
3602 gboolean is_filename = camel_header_param_encode_filenames_in_rfc_2047 && (g_ascii_strcasecmp (p->name, "filename") == 0 || g_ascii_strcasecmp (p->name, "name") == 0);
3603 gboolean encoded = FALSE;
3604 gboolean quote = FALSE;
3605 gint here = out->len;
3606 gsize nlen, vlen;
3607 gchar *value;
3608
3609 if (!p->value) {
3610 p = p->next;
3611 continue;
3612 }
3613
3614 value = header_encode_param ((guchar *) p->value, &encoded, is_filename);
3615 if (!value) {
3616 w (g_warning ("appending parameter %s=%s violates rfc2184", p->name, p->value));
3617 value = g_strdup (p->value);
3618 }
3619
3620 if (!encoded) {
3621 gchar *ch;
3622
3623 for (ch = value; ch && *ch; ch++) {
3624 if (camel_mime_is_tspecial (*ch) || camel_mime_is_lwsp (*ch))
3625 break;
3626 }
3627
3628 quote = ch && *ch;
3629 }
3630
3631 quote = quote || is_filename;
3632 nlen = strlen (p->name);
3633 vlen = strlen (value);
3634
3635 /* do not fold file names */
3636 if (!is_filename && used + nlen + vlen > CAMEL_FOLD_SIZE - 8) {
3637 g_string_append (out, ";\n\t");
3638 here = out->len;
3639 used = 0;
3640 } else
3641 g_string_append (out, "; ");
3642
3643 if (!is_filename && nlen + vlen > CAMEL_FOLD_SIZE - 8) {
3644 /* we need to do special rfc2184 parameter wrapping */
3645 gint maxlen = CAMEL_FOLD_SIZE - (nlen + 8);
3646 gchar *inptr, *inend;
3647 gint i = 0;
3648
3649 inptr = value;
3650 inend = value + vlen;
3651
3652 while (inptr < inend) {
3653 gchar *ptr = inptr + MIN (inend - inptr, maxlen);
3654
3655 if (encoded && ptr < inend) {
3656 /* be careful not to break an encoded gchar (ie %20) */
3657 gchar *q = ptr;
3658 gint j = 2;
3659
3660 for (; j > 0 && q > inptr && *q != '%'; j--, q--);
3661 if (*q == '%')
3662 ptr = q;
3663 }
3664
3665 if (i != 0) {
3666 g_string_append (out, ";\n\t");
3667 here = out->len;
3668 used = 0;
3669 }
3670
3671 g_string_append_printf (out, "%s*%d%s=", p->name, i++, encoded ? "*" : "");
3672 if (encoded || !quote)
3673 g_string_append_len (out, inptr, ptr - inptr);
3674 else
3675 quote_word (out, TRUE, inptr, ptr - inptr);
3676
3677 d (printf ("wrote: %s\n", out->str + here));
3678
3679 used += (out->len - here);
3680
3681 inptr = ptr;
3682 }
3683 } else {
3684 g_string_append_printf (out, "%s%s=", p->name, encoded ? "*" : "");
3685
3686 /* Quote even if we don't need to in order to
3687 * work around broken mail software like the
3688 * Jive Forums' NNTP gateway */
3689 if (encoded /*|| !quote */)
3690 g_string_append (out, value);
3691 else
3692 quote_word (out, TRUE, value, vlen);
3693
3694 used += (out->len - here);
3695 }
3696
3697 g_free (value);
3698
3699 p = p->next;
3700 }
3701 }
3702
3703 gchar *
camel_header_param_list_format(struct _camel_header_param * p)3704 camel_header_param_list_format (struct _camel_header_param *p)
3705 {
3706 GString *out = g_string_new ("");
3707
3708 camel_header_param_list_format_append (out, p);
3709 return g_string_free (out, FALSE);
3710 }
3711
3712 CamelContentType *
camel_content_type_decode(const gchar * in)3713 camel_content_type_decode (const gchar *in)
3714 {
3715 const gchar *inptr = in;
3716 gchar *type, *subtype = NULL;
3717 CamelContentType *t = NULL;
3718
3719 if (in == NULL)
3720 return NULL;
3721
3722 type = decode_token (&inptr);
3723 header_decode_lwsp (&inptr);
3724 if (type) {
3725 if (*inptr == '/') {
3726 inptr++;
3727 subtype = decode_token (&inptr);
3728 }
3729 if (subtype == NULL && (!g_ascii_strcasecmp (type, "text"))) {
3730 w (g_warning ("text type with no subtype, resorting to text/plain: %s", in));
3731 subtype = g_strdup ("plain");
3732 }
3733 if (subtype == NULL) {
3734 w (g_warning ("MIME type with no subtype: %s", in));
3735 }
3736
3737 t = camel_content_type_new (type, subtype);
3738 t->params = header_decode_param_list (&inptr);
3739 g_free (type);
3740 g_free (subtype);
3741 } else {
3742 g_free (type);
3743 d (printf ("cannot find MIME type in header (2) '%s'", in));
3744 }
3745 return t;
3746 }
3747
3748 void
camel_content_type_dump(CamelContentType * ct)3749 camel_content_type_dump (CamelContentType *ct)
3750 {
3751 struct _camel_header_param *p;
3752
3753 printf ("Content-Type: ");
3754 if (ct == NULL) {
3755 printf ("<NULL>\n");
3756 return;
3757 }
3758 printf ("%s / %s", ct->type, ct->subtype);
3759 p = ct->params;
3760 if (p) {
3761 while (p) {
3762 printf (";\n\t%s=\"%s\"", p->name, p->value);
3763 p = p->next;
3764 }
3765 }
3766 printf ("\n");
3767 }
3768
3769 gchar *
camel_content_type_format(CamelContentType * ct)3770 camel_content_type_format (CamelContentType *ct)
3771 {
3772 GString *out;
3773
3774 if (ct == NULL)
3775 return NULL;
3776
3777 out = g_string_new ("");
3778 if (ct->type == NULL) {
3779 g_string_append_printf (out, "text/plain");
3780 w (g_warning ("Content-Type with no main type"));
3781 } else if (ct->subtype == NULL) {
3782 w (g_warning ("Content-Type with no sub type: %s", ct->type));
3783 if (!g_ascii_strcasecmp (ct->type, "multipart"))
3784 g_string_append_printf (out, "%s/mixed", ct->type);
3785 else
3786 g_string_append_printf (out, "%s", ct->type);
3787 } else {
3788 g_string_append_printf (out, "%s/%s", ct->type, ct->subtype);
3789 }
3790 camel_header_param_list_format_append (out, ct->params);
3791
3792 return g_string_free (out, FALSE);
3793 }
3794
3795 gchar *
camel_content_type_simple(CamelContentType * ct)3796 camel_content_type_simple (CamelContentType *ct)
3797 {
3798 if (!ct)
3799 return NULL;
3800
3801 if (ct->type == NULL) {
3802 w (g_warning ("Content-Type with no main type"));
3803 return g_strdup ("text/plain");
3804 } else if (ct->subtype == NULL) {
3805 w (g_warning ("Content-Type with no sub type: %s", ct->type));
3806 if (!g_ascii_strcasecmp (ct->type, "multipart"))
3807 return g_strdup_printf ("%s/mixed", ct->type);
3808 else
3809 return g_strdup (ct->type);
3810 } else
3811 return g_strdup_printf ("%s/%s", ct->type, ct->subtype);
3812 }
3813
3814 gchar *
camel_content_transfer_encoding_decode(const gchar * in)3815 camel_content_transfer_encoding_decode (const gchar *in)
3816 {
3817 if (in)
3818 return decode_token (&in);
3819
3820 return NULL;
3821 }
3822
3823 CamelContentDisposition *
camel_content_disposition_new(void)3824 camel_content_disposition_new (void)
3825 {
3826 CamelContentDisposition *dd;
3827
3828 dd = g_malloc0 (sizeof (CamelContentDisposition));
3829 dd->refcount = 1;
3830 dd->disposition = NULL;
3831 dd->params = NULL;
3832
3833 return dd;
3834 }
3835
3836 CamelContentDisposition *
camel_content_disposition_decode(const gchar * in)3837 camel_content_disposition_decode (const gchar *in)
3838 {
3839 CamelContentDisposition *d = NULL;
3840 const gchar *inptr = in;
3841
3842 if (in == NULL)
3843 return NULL;
3844
3845 d = camel_content_disposition_new ();
3846 d->disposition = decode_token (&inptr);
3847 if (d->disposition == NULL) {
3848 w (g_warning ("Empty disposition type"));
3849 }
3850 d->params = header_decode_param_list (&inptr);
3851 return d;
3852 }
3853
3854 CamelContentDisposition *
camel_content_disposition_ref(CamelContentDisposition * d)3855 camel_content_disposition_ref (CamelContentDisposition *d)
3856 {
3857 if (d)
3858 d->refcount++;
3859
3860 return d;
3861 }
3862
3863 void
camel_content_disposition_unref(CamelContentDisposition * d)3864 camel_content_disposition_unref (CamelContentDisposition *d)
3865 {
3866 if (d) {
3867 if (d->refcount <= 1) {
3868 camel_header_param_list_free (d->params);
3869 g_free (d->disposition);
3870 g_free (d);
3871 } else {
3872 d->refcount--;
3873 }
3874 }
3875 }
3876
3877 gchar *
camel_content_disposition_format(CamelContentDisposition * d)3878 camel_content_disposition_format (CamelContentDisposition *d)
3879 {
3880 GString *out;
3881
3882 if (d == NULL)
3883 return NULL;
3884
3885 out = g_string_new (d->disposition ? d->disposition : "attachment");
3886 camel_header_param_list_format_append (out, d->params);
3887
3888 return g_string_free (out, FALSE);
3889 }
3890
3891 gboolean
camel_content_disposition_is_attachment(const CamelContentDisposition * disposition,const CamelContentType * content_type)3892 camel_content_disposition_is_attachment (const CamelContentDisposition *disposition,
3893 const CamelContentType *content_type)
3894 {
3895 return camel_content_disposition_is_attachment_ex (disposition, content_type, NULL);
3896 }
3897
3898 gboolean
camel_content_disposition_is_attachment_ex(const CamelContentDisposition * disposition,const CamelContentType * content_type,const CamelContentType * parent_content_type)3899 camel_content_disposition_is_attachment_ex (const CamelContentDisposition *disposition,
3900 const CamelContentType *content_type,
3901 const CamelContentType *parent_content_type)
3902 {
3903 if (content_type && (
3904 camel_content_type_is (content_type, "application", "pkcs7-mime") ||
3905 camel_content_type_is (content_type, "application", "xpkcs7mime") ||
3906 camel_content_type_is (content_type, "application", "xpkcs7-mime") ||
3907 camel_content_type_is (content_type, "application", "x-pkcs7-mime")))
3908 return FALSE;
3909
3910 if (content_type && (
3911 camel_content_type_is (content_type, "application", "pgp-encrypted")))
3912 return !parent_content_type || !camel_content_type_is (parent_content_type, "multipart", "encrypted");
3913
3914 if (content_type && camel_content_type_is (content_type, "application", "octet-stream") &&
3915 parent_content_type && camel_content_type_is (parent_content_type, "multipart", "encrypted"))
3916 return FALSE;
3917
3918 if (content_type && (
3919 camel_content_type_is (content_type, "application", "pkcs7-signature") ||
3920 camel_content_type_is (content_type, "application", "xpkcs7signature") ||
3921 camel_content_type_is (content_type, "application", "xpkcs7-signature") ||
3922 camel_content_type_is (content_type, "application", "x-pkcs7-signature") ||
3923 camel_content_type_is (content_type, "application", "pgp-signature")))
3924 return !parent_content_type || !camel_content_type_is (parent_content_type, "multipart", "signed");
3925
3926 if (parent_content_type && content_type && camel_content_type_is (content_type, "message", "rfc822"))
3927 return TRUE;
3928
3929 if (!disposition)
3930 return FALSE;
3931
3932 if (disposition->disposition && g_ascii_strcasecmp (disposition->disposition, "attachment") == 0)
3933 return TRUE;
3934
3935 /* If the Content-Disposition isn't an attachment, then call everything with a "filename"
3936 parameter an attachment, but only if there is no Content-Disposition header, or it's
3937 not the "inline" or it's neither text/... nor image/... Content-Type, which can be usually
3938 shown in the UI inline.
3939
3940 The test for Content-Type was added for Apple Mail, which marks also for example .pdf
3941 attachments as 'inline', which broke the previous logic here.
3942 */
3943 if (!disposition->disposition ||
3944 g_ascii_strcasecmp (disposition->disposition, "inline") != 0 ||
3945 (content_type && !camel_content_type_is (content_type, "text", "*") && !camel_content_type_is (content_type, "image", "*"))) {
3946 const struct _camel_header_param *param;
3947
3948 for (param = disposition->params; param; param = param->next) {
3949 if (param->name && param->value && *param->value && g_ascii_strcasecmp (param->name, "filename") == 0)
3950 return TRUE;
3951 }
3952 }
3953
3954 return FALSE;
3955 }
3956
3957 /* date parser macros */
3958 #define NUMERIC_CHARS "1234567890"
3959 #define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
3960 #define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
3961 #define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
3962 #define TIMEZONE_NUMERIC_CHARS "-+1234567890"
3963 #define TIME_CHARS "1234567890:"
3964
3965 #define DATE_TOKEN_NON_NUMERIC (1 << 0)
3966 #define DATE_TOKEN_NON_WEEKDAY (1 << 1)
3967 #define DATE_TOKEN_NON_MONTH (1 << 2)
3968 #define DATE_TOKEN_NON_TIME (1 << 3)
3969 #define DATE_TOKEN_HAS_COLON (1 << 4)
3970 #define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
3971 #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
3972 #define DATE_TOKEN_HAS_SIGN (1 << 7)
3973
3974 static guchar camel_datetok_table[256] = {
3975 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3976 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3977 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
3978 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
3979 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
3980 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
3981 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
3982 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
3983 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3984 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3985 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3986 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3987 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3988 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3989 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3990 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3991 };
3992
3993 static struct {
3994 const gchar *name;
3995 gint offset;
3996 } tz_offsets[] = {
3997 { "UT", 0 },
3998 { "GMT", 0 },
3999 { "EST", -500 }, /* these are all US timezones. bloody yanks */
4000 { "EDT", -400 },
4001 { "CST", -600 },
4002 { "CDT", -500 },
4003 { "MST", -700 },
4004 { "MDT", -600 },
4005 { "PST", -800 },
4006 { "PDT", -700 },
4007 { "Z", 0 },
4008 { "A", -100 },
4009 { "M", -1200 },
4010 { "N", 100 },
4011 { "Y", 1200 },
4012 };
4013
4014 static const gchar tm_months[][4] = {
4015 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
4016 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
4017 };
4018
4019 static const gchar tm_days[][4] = {
4020 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
4021 };
4022
4023 /**
4024 * camel_header_format_date:
4025 * @date: time_t date representation
4026 * @tz_offset: Timezone offset
4027 *
4028 * Allocates a string buffer containing the rfc822 formatted date
4029 * string represented by @time and @tz_offset.
4030 *
4031 * Returns: a valid string representation of the date.
4032 **/
4033 gchar *
camel_header_format_date(time_t date,gint tz_offset)4034 camel_header_format_date (time_t date,
4035 gint tz_offset)
4036 {
4037 struct tm tm;
4038
4039 d (printf ("offset = %d\n", tz_offset));
4040
4041 d (printf ("converting date %s", ctime (&date)));
4042
4043 date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60;
4044
4045 d (printf ("converting date %s", ctime (&date)));
4046
4047 gmtime_r (&date, &tm);
4048
4049 return g_strdup_printf (
4050 "%s, %02d %s %04d %02d:%02d:%02d %+05d",
4051 tm_days[tm.tm_wday],
4052 tm.tm_mday,
4053 tm_months[tm.tm_mon],
4054 tm.tm_year + 1900,
4055 tm.tm_hour,
4056 tm.tm_min,
4057 tm.tm_sec,
4058 tz_offset);
4059 }
4060
4061 /* This is where it gets ugly... */
4062
4063 struct _date_token {
4064 struct _date_token *next;
4065 guchar mask;
4066 const gchar *start;
4067 gsize len;
4068 };
4069
4070 static struct _date_token *
datetok(const gchar * date)4071 datetok (const gchar *date)
4072 {
4073 struct _date_token *tokens = NULL, *token, *tail = (struct _date_token *) &tokens;
4074 const gchar *start, *end;
4075 guchar mask;
4076
4077 start = date;
4078 while (*start) {
4079 /* kill leading whitespace */
4080 while (*start == ' ' || *start == '\t')
4081 start++;
4082
4083 if (*start == '\0')
4084 break;
4085
4086 mask = camel_datetok_table[(guchar) *start];
4087
4088 /* find the end of this token */
4089 end = start + 1;
4090 while (*end && !strchr ("-/,\t\r\n ", *end))
4091 mask |= camel_datetok_table[(guchar) *end++];
4092
4093 if (end != start) {
4094 token = g_malloc (sizeof (struct _date_token));
4095 token->next = NULL;
4096 token->start = start;
4097 token->len = end - start;
4098 token->mask = mask;
4099
4100 tail->next = token;
4101 tail = token;
4102 }
4103
4104 if (*end)
4105 start = end + 1;
4106 else
4107 break;
4108 }
4109
4110 return tokens;
4111 }
4112
4113 static gint
decode_int(const gchar * in,gsize inlen)4114 decode_int (const gchar *in,
4115 gsize inlen)
4116 {
4117 register const gchar *inptr;
4118 gint sign = 1, val = 0;
4119 const gchar *inend;
4120
4121 inptr = in;
4122 inend = in + inlen;
4123
4124 if (*inptr == '-') {
4125 sign = -1;
4126 inptr++;
4127 } else if (*inptr == '+')
4128 inptr++;
4129
4130 for (; inptr < inend; inptr++) {
4131 if (!(*inptr >= '0' && *inptr <= '9'))
4132 return -1;
4133 else
4134 val = (val * 10) + (*inptr - '0');
4135 }
4136
4137 val *= sign;
4138
4139 return val;
4140 }
4141
4142 #if 0
4143 static gint
4144 get_days_in_month (gint month,
4145 gint year)
4146 {
4147 switch (month) {
4148 case 1:
4149 case 3:
4150 case 5:
4151 case 7:
4152 case 8:
4153 case 10:
4154 case 12:
4155 return 31;
4156 case 4:
4157 case 6:
4158 case 9:
4159 case 11:
4160 return 30;
4161 case 2:
4162 if (g_date_is_leap_year (year))
4163 return 29;
4164 else
4165 return 28;
4166 default:
4167 return 0;
4168 }
4169 }
4170 #endif
4171
4172 static gint
get_wday(const gchar * in,gsize inlen)4173 get_wday (const gchar *in,
4174 gsize inlen)
4175 {
4176 gint wday;
4177
4178 g_return_val_if_fail (in != NULL, -1);
4179
4180 if (inlen < 3)
4181 return -1;
4182
4183 for (wday = 0; wday < 7; wday++) {
4184 if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
4185 return wday;
4186 }
4187
4188 return -1; /* unknown week day */
4189 }
4190
4191 static gint
get_mday(const gchar * in,gsize inlen)4192 get_mday (const gchar *in,
4193 gsize inlen)
4194 {
4195 gint mday;
4196
4197 g_return_val_if_fail (in != NULL, -1);
4198
4199 mday = decode_int (in, inlen);
4200
4201 if (mday < 0 || mday > 31)
4202 mday = -1;
4203
4204 return mday;
4205 }
4206
4207 static gint
get_month(const gchar * in,gsize inlen)4208 get_month (const gchar *in,
4209 gsize inlen)
4210 {
4211 gint i;
4212
4213 g_return_val_if_fail (in != NULL, -1);
4214
4215 if (inlen < 3)
4216 return -1;
4217
4218 for (i = 0; i < 12; i++) {
4219 if (!g_ascii_strncasecmp (in, tm_months[i], 3))
4220 return i;
4221 }
4222
4223 return -1; /* unknown month */
4224 }
4225
4226 static gint
get_year(const gchar * in,gsize inlen)4227 get_year (const gchar *in,
4228 gsize inlen)
4229 {
4230 gint year;
4231
4232 g_return_val_if_fail (in != NULL, -1);
4233
4234 if ((year = decode_int (in, inlen)) == -1)
4235 return -1;
4236
4237 if (year < 100)
4238 year += (year < 70) ? 2000 : 1900;
4239
4240 if (year < 1969)
4241 return -1;
4242
4243 return year;
4244 }
4245
4246 static gboolean
get_time(const gchar * in,gsize inlen,gint * hour,gint * min,gint * sec)4247 get_time (const gchar *in,
4248 gsize inlen,
4249 gint *hour,
4250 gint *min,
4251 gint *sec)
4252 {
4253 register const gchar *inptr;
4254 gint *val, colons = 0;
4255 const gchar *inend;
4256
4257 *hour = *min = *sec = 0;
4258
4259 inend = in + inlen;
4260 val = hour;
4261 for (inptr = in; inptr < inend; inptr++) {
4262 if (*inptr == ':') {
4263 colons++;
4264 switch (colons) {
4265 case 1:
4266 val = min;
4267 break;
4268 case 2:
4269 val = sec;
4270 break;
4271 default:
4272 return FALSE;
4273 }
4274 } else if (!(*inptr >= '0' && *inptr <= '9'))
4275 return FALSE;
4276 else
4277 *val = (*val * 10) + (*inptr - '0');
4278 }
4279
4280 return TRUE;
4281 }
4282
4283 static gint
get_tzone(struct _date_token ** token)4284 get_tzone (struct _date_token **token)
4285 {
4286 const gchar *inptr, *inend;
4287 gsize inlen;
4288 gint i, t;
4289
4290 for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
4291 inptr = (*token)->start;
4292 inlen = (*token)->len;
4293 inend = inptr + inlen;
4294
4295 if (*inptr == '+' || *inptr == '-') {
4296 return decode_int (inptr, inlen);
4297 } else {
4298 if (*inptr == '(') {
4299 inptr++;
4300 if (*(inend - 1) == ')')
4301 inlen -= 2;
4302 else
4303 inlen--;
4304 }
4305
4306 for (t = 0; t < 15; t++) {
4307 gsize len = strlen (tz_offsets[t].name);
4308
4309 if (len != inlen)
4310 continue;
4311
4312 if (!strncmp (inptr, tz_offsets[t].name, len))
4313 return tz_offsets[t].offset;
4314 }
4315 }
4316 }
4317
4318 return -1;
4319 }
4320
4321 static time_t
parse_rfc822_date(struct _date_token * tokens,gint * tzone)4322 parse_rfc822_date (struct _date_token *tokens,
4323 gint *tzone)
4324 {
4325 gint hour, min, sec, offset, n;
4326 struct _date_token *token;
4327 struct tm tm;
4328 time_t t;
4329
4330 g_return_val_if_fail (tokens != NULL, (time_t) 0);
4331
4332 token = tokens;
4333
4334 memset ((gpointer) &tm, 0, sizeof (struct tm));
4335
4336 if ((n = get_wday (token->start, token->len)) != -1) {
4337 /* not all dates may have this... */
4338 tm.tm_wday = n;
4339 token = token->next;
4340 }
4341
4342 /* get the mday */
4343 if (!token || (n = get_mday (token->start, token->len)) == -1)
4344 return (time_t) 0;
4345
4346 tm.tm_mday = n;
4347 token = token->next;
4348
4349 /* get the month */
4350 if (!token || (n = get_month (token->start, token->len)) == -1)
4351 return (time_t) 0;
4352
4353 tm.tm_mon = n;
4354 token = token->next;
4355
4356 /* get the year */
4357 if (!token || (n = get_year (token->start, token->len)) == -1)
4358 return (time_t) 0;
4359
4360 tm.tm_year = n - 1900;
4361 token = token->next;
4362
4363 /* get the hour/min/sec */
4364 if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
4365 return (time_t) 0;
4366
4367 tm.tm_hour = hour;
4368 tm.tm_min = min;
4369 tm.tm_sec = sec;
4370 token = token->next;
4371
4372 if (token && token->start && (
4373 g_ascii_strncasecmp (token->start, "AM", 2) == 0 ||
4374 g_ascii_strncasecmp (token->start, "PM", 2) == 0)) {
4375 /* not a valid RFC 822 time representation */
4376 return 0;
4377 }
4378
4379 /* get the timezone */
4380 if (!token || (n = get_tzone (&token)) == -1) {
4381 /* I guess we assume tz is GMT? */
4382 offset = 0;
4383 } else {
4384 offset = n;
4385 }
4386
4387 t = camel_mktime_utc (&tm);
4388
4389 /* t is now GMT of the time we want, but not offset by the timezone ... */
4390
4391 /* this should convert the time to the GMT equiv time */
4392 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
4393
4394 if (tzone)
4395 *tzone = offset;
4396
4397 return t;
4398 }
4399
4400 #define date_token_mask(t) (((struct _date_token *) t)->mask)
4401 #define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
4402 #define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
4403 #define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
4404 #define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
4405 #define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
4406 #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
4407 #define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
4408
4409 static time_t
parse_broken_date(struct _date_token * tokens,gint * tzone)4410 parse_broken_date (struct _date_token *tokens,
4411 gint *tzone)
4412 {
4413 gboolean got_wday, got_month, got_tzone, is_pm;
4414 gint hour, min, sec, offset, n;
4415 struct _date_token *token;
4416 struct tm tm;
4417 time_t t;
4418
4419 memset ((gpointer) &tm, 0, sizeof (struct tm));
4420 got_wday = got_month = got_tzone = FALSE;
4421 is_pm = FALSE;
4422 offset = 0;
4423
4424 token = tokens;
4425 while (token) {
4426 if (is_weekday (token) && !got_wday) {
4427 if ((n = get_wday (token->start, token->len)) != -1) {
4428 d (printf ("weekday; "));
4429 got_wday = TRUE;
4430 tm.tm_wday = n;
4431 goto next;
4432 }
4433 }
4434
4435 if (is_month (token) && !got_month) {
4436 if ((n = get_month (token->start, token->len)) != -1) {
4437 d (printf ("month; "));
4438 got_month = TRUE;
4439 tm.tm_mon = n;
4440 goto next;
4441 }
4442 }
4443
4444 if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
4445 if (get_time (token->start, token->len, &hour, &min, &sec)) {
4446 d (printf ("time; "));
4447 tm.tm_hour = hour;
4448 tm.tm_min = min;
4449 tm.tm_sec = sec;
4450 goto next;
4451 }
4452 }
4453
4454 if (!got_tzone && token->start && (
4455 g_ascii_strncasecmp (token->start, "AM", 2) == 0 ||
4456 g_ascii_strncasecmp (token->start, "PM", 2) == 0)) {
4457 is_pm = g_ascii_strncasecmp (token->start, "PM", 2) == 0;
4458
4459 goto next;
4460 }
4461
4462 if (is_tzone (token) && !got_tzone) {
4463 struct _date_token *t = token;
4464
4465 if ((n = get_tzone (&t)) != -1) {
4466 d (printf ("tzone; "));
4467 got_tzone = TRUE;
4468 offset = n;
4469 goto next;
4470 }
4471 }
4472
4473 if (is_numeric (token)) {
4474 if (token->len == 4 && !tm.tm_year) {
4475 if ((n = get_year (token->start, token->len)) != -1) {
4476 d (printf ("year; "));
4477 tm.tm_year = n - 1900;
4478 goto next;
4479 }
4480 } else {
4481 /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
4482 if (!got_month && token->next && is_numeric (token->next)) {
4483 if ((n = decode_int (token->start, token->len)) > 12) {
4484 goto mday;
4485 } else if (n > 0) {
4486 d (printf ("mon; "));
4487 got_month = TRUE;
4488 tm.tm_mon = n - 1;
4489 }
4490 goto next;
4491 } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
4492 mday:
4493 d (printf ("mday; "));
4494 tm.tm_mday = n;
4495 goto next;
4496 } else if (!tm.tm_year) {
4497 if ((n = get_year (token->start, token->len)) != -1) {
4498 d (printf ("2-digit year; "));
4499 tm.tm_year = n - 1900;
4500 }
4501 goto next;
4502 }
4503 }
4504 }
4505
4506 d (printf ("???; "));
4507
4508 next:
4509
4510 token = token->next;
4511 }
4512
4513 d (printf ("\n"));
4514
4515 t = camel_mktime_utc (&tm);
4516
4517 /* t is now GMT of the time we want, but not offset by the timezone ... */
4518
4519 /* this should convert the time to the GMT equiv time */
4520 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
4521
4522 if (is_pm)
4523 t += 12 * 60 * 60;
4524
4525 if (tzone)
4526 *tzone = offset;
4527
4528 return t;
4529 }
4530
4531 /**
4532 * camel_header_decode_date:
4533 * @str: input date string
4534 * @tz_offset: timezone offset
4535 *
4536 * Decodes the rfc822 date string and saves the GMT offset into
4537 * @tz_offset if non-NULL.
4538 *
4539 * Returns: the time_t representation of the date string specified by
4540 * @str or (time_t) 0 on error. If @tz_offset is non-NULL, the value
4541 * of the timezone offset will be stored.
4542 **/
4543 time_t
camel_header_decode_date(const gchar * str,gint * tz_offset)4544 camel_header_decode_date (const gchar *str,
4545 gint *tz_offset)
4546 {
4547 struct _date_token *token, *tokens;
4548 time_t date;
4549
4550 if (!str || !(tokens = datetok (str))) {
4551 if (tz_offset)
4552 *tz_offset = 0;
4553
4554 return (time_t) 0;
4555 }
4556
4557 if (!(date = parse_rfc822_date (tokens, tz_offset)))
4558 date = parse_broken_date (tokens, tz_offset);
4559
4560 /* cleanup */
4561 while (tokens) {
4562 token = tokens;
4563 tokens = tokens->next;
4564 g_free (token);
4565 }
4566
4567 return date;
4568 }
4569
4570 gchar *
camel_header_location_decode(const gchar * in)4571 camel_header_location_decode (const gchar *in)
4572 {
4573 gint quote = 0;
4574 GString *out = g_string_new ("");
4575 gchar c;
4576
4577 /* Sigh. RFC2557 says:
4578 * content-location = "Content-Location:" [CFWS] URI [CFWS]
4579 * where URI is restricted to the syntax for URLs as
4580 * defined in Uniform Resource Locators [URL] until
4581 * IETF specifies other kinds of URIs.
4582 *
4583 * But Netscape puts quotes around the URI when sending web
4584 * pages.
4585 *
4586 * Which is required as defined in rfc2017 [3.1]. Although
4587 * outlook doesn't do this.
4588 *
4589 * Since we get headers already unfolded, we need just drop
4590 * all whitespace. URL's cannot contain whitespace or quoted
4591 * characters, even when included in quotes.
4592 */
4593
4594 header_decode_lwsp (&in);
4595 if (*in == '"') {
4596 in++;
4597 quote = 1;
4598 }
4599
4600 while ((c = *in++)) {
4601 if (quote && c == '"')
4602 break;
4603 if (!camel_mime_is_lwsp (c))
4604 g_string_append_c (out, c);
4605 }
4606
4607 return g_string_free (out, FALSE);
4608 }
4609
4610 /**
4611 * camel_header_msgid_generate:
4612 * @domain: domain to use (like "example.com") for the ID suffix; can be NULL
4613 *
4614 * Either the @domain is used, or the user's local hostname,
4615 * in case it's NULL or empty.
4616 *
4617 * Returns: Unique message ID.
4618 **/
4619 gchar *
camel_header_msgid_generate(const gchar * domain)4620 camel_header_msgid_generate (const gchar *domain)
4621 {
4622 static GMutex count_lock;
4623 #define LOOKUP_LOCK() g_mutex_lock (&count_lock)
4624 #define LOOKUP_UNLOCK() g_mutex_unlock (&count_lock)
4625 static volatile gint counter = 0;
4626 static gchar *cached_hostname = NULL;
4627 struct addrinfo *ai = NULL;
4628 GChecksum *checksum;
4629 gchar *msgid;
4630
4631 LOOKUP_LOCK ();
4632 if (!cached_hostname && (!domain || !*domain)) {
4633 gchar host[MAXHOSTNAMELEN];
4634 struct addrinfo hints = { 0 };
4635 const gchar *name;
4636 gint retval;
4637
4638 domain = NULL;
4639
4640 retval = gethostname (host, sizeof (host));
4641 if (retval == 0 && *host) {
4642 hints.ai_flags = AI_CANONNAME;
4643 ai = camel_getaddrinfo (
4644 host, NULL, &hints, NULL, NULL);
4645 if (ai && ai->ai_canonname)
4646 name = ai->ai_canonname;
4647 else
4648 name = host;
4649 } else
4650 name = "localhost.localdomain";
4651
4652 cached_hostname = g_strdup (name);
4653 }
4654
4655 checksum = g_checksum_new (G_CHECKSUM_SHA1);
4656
4657 #define add_i64(_x) G_STMT_START { \
4658 gint64 i64 = (_x); \
4659 g_checksum_update (checksum, (const guchar *) &i64, sizeof (gint64)); \
4660 } G_STMT_END
4661
4662 #define add_str(_x, _def) G_STMT_START { \
4663 const gchar *str = (_x); \
4664 if (!str) \
4665 str = (_def); \
4666 g_checksum_update (checksum, (const guchar *) str, strlen (str)); \
4667 } G_STMT_END
4668
4669 add_i64 (g_get_monotonic_time ());
4670 add_i64 (g_get_real_time ());
4671 add_i64 (getpid ());
4672 add_i64 (getgid ());
4673 add_i64 (getppid ());
4674 add_i64 (g_atomic_int_add (&counter, 1));
4675
4676 add_str (domain, "localhost");
4677 add_str (cached_hostname, "localhost");
4678 add_str (g_get_host_name (), "localhost");
4679 add_str (g_get_user_name (), "user");
4680 add_str (g_get_real_name (), "User");
4681
4682 #undef add_i64
4683 #undef add_str
4684
4685 msgid = g_strdup_printf ("%s.camel@%s", g_checksum_get_string (checksum), domain ? domain : cached_hostname);
4686
4687 g_checksum_free (checksum);
4688
4689 LOOKUP_UNLOCK ();
4690
4691 if (ai)
4692 camel_freeaddrinfo (ai);
4693
4694 return msgid;
4695 }
4696
4697 static struct {
4698 const gchar *name;
4699 const gchar *pattern;
4700 regex_t regex;
4701 } mail_list_magic[] = {
4702 /* List-Post: <mailto:gnome-hackers@gnome.org> */
4703 /* List-Post: <mailto:gnome-hackers> */
4704 { "List-Post", "[ \t]*<mailto:([^@>]+)@?([^ \n\t\r>]*)" },
4705 /* List-Id: GNOME stuff <gnome-hackers.gnome.org> */
4706 /* List-Id: <gnome-hackers.gnome.org> */
4707 /* List-Id: <gnome-hackers> */
4708 /* This old one wasn't very useful: { "List-Id", " *([^<]+)" },*/
4709 { "List-Id", "[^<]*<([^\\.>]+)\\.?([^ \n\t\r>]*)" },
4710 /* Mailing-List: list gnome-hackers@gnome.org; contact gnome-hackers-owner@gnome.org */
4711 { "Mailing-List", "[ \t]*list ([^@]+)@?([^ \n\t\r>;]*)" },
4712 /* Originator: gnome-hackers@gnome.org */
4713 { "Originator", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4714 /* X-Mailing-List: <gnome-hackers@gnome.org> arcive/latest/100 */
4715 /* X-Mailing-List: gnome-hackers@gnome.org */
4716 /* X-Mailing-List: gnome-hackers */
4717 /* X-Mailing-List: <gnome-hackers> */
4718 { "X-Mailing-List", "[ \t]*<?([^@>]+)@?([^ \n\t\r>]*)" },
4719 /* X-Loop: gnome-hackers@gnome.org */
4720 { "X-Loop", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4721 /* X-List: gnome-hackers */
4722 /* X-List: gnome-hackers@gnome.org */
4723 { "X-List", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4724 /* Sender: owner-gnome-hackers@gnome.org */
4725 /* Sender: owner-gnome-hacekrs */
4726 { "Sender", "[ \t]*owner-([^@]+)@?([^ @\n\t\r>]*)" },
4727 /* Sender: gnome-hackers-owner@gnome.org */
4728 /* Sender: gnome-hackers-owner */
4729 { "Sender", "[ \t]*([^@]+)-owner@?([^ @\n\t\r>]*)" },
4730 /* Delivered-To: mailing list gnome-hackers@gnome.org */
4731 /* Delivered-To: mailing list gnome-hackers */
4732 { "Delivered-To", "[ \t]*mailing list ([^@]+)@?([^ \n\t\r>]*)" },
4733 /* Sender: owner-gnome-hackers@gnome.org */
4734 /* Sender: <owner-gnome-hackers@gnome.org> */
4735 /* Sender: owner-gnome-hackers */
4736 /* Sender: <owner-gnome-hackers> */
4737 { "Return-Path", "[ \t]*<?owner-([^@>]+)@?([^ \n\t\r>]*)" },
4738 /* X-BeenThere: gnome-hackers@gnome.org */
4739 /* X-BeenThere: gnome-hackers */
4740 { "X-BeenThere", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4741 /* List-Unsubscribe: <mailto:gnome-hackers-unsubscribe@gnome.org> */
4742 { "List-Unsubscribe", "<mailto:(.+)-unsubscribe@([^ \n\t\r>]*)" },
4743 };
4744
4745 static gpointer
mailing_list_init(gpointer param)4746 mailing_list_init (gpointer param)
4747 {
4748 gint i, errcode, failed = 0;
4749
4750 /* precompile regex's for speed at runtime */
4751 for (i = 0; i < G_N_ELEMENTS (mail_list_magic); i++) {
4752 errcode = regcomp (&mail_list_magic[i].regex, mail_list_magic[i].pattern, REG_EXTENDED | REG_ICASE);
4753 if (errcode != 0) {
4754 gchar *errstr;
4755 gsize len;
4756
4757 len = regerror (errcode, &mail_list_magic[i].regex, NULL, 0);
4758 errstr = g_malloc0 (len + 1);
4759 regerror (errcode, &mail_list_magic[i].regex, errstr, len);
4760
4761 g_warning ("Internal error, compiling regex failed: %s: %s", mail_list_magic[i].pattern, errstr);
4762 g_free (errstr);
4763 failed++;
4764 }
4765 }
4766
4767 g_warn_if_fail (failed == 0);
4768
4769 return NULL;
4770 }
4771
4772 /**
4773 * camel_headers_dup_mailing_list:
4774 * @headers: a #CamelNameValueArray with headers
4775 *
4776 * Searches for a mailing list information among known headers and returns
4777 * a newly allocated string with its value.
4778 *
4779 * Returns: (nullable) (transfer full): The mailing list header, or %NULL, if none is found
4780 **/
4781 gchar *
camel_headers_dup_mailing_list(const CamelNameValueArray * headers)4782 camel_headers_dup_mailing_list (const CamelNameValueArray *headers)
4783 {
4784 static GOnce once = G_ONCE_INIT;
4785 const gchar *v;
4786 regmatch_t match[3];
4787 gint i, j;
4788
4789 g_once (&once, mailing_list_init, NULL);
4790
4791 for (i = 0; i < G_N_ELEMENTS (mail_list_magic); i++) {
4792 v = camel_name_value_array_get_named (headers, CAMEL_COMPARE_CASE_INSENSITIVE, mail_list_magic[i].name);
4793 for (j = 0; j < 3; j++) {
4794 match[j].rm_so = -1;
4795 match[j].rm_eo = -1;
4796 }
4797 if (v != NULL && regexec (&mail_list_magic[i].regex, v, 3, match, 0) == 0 && match[1].rm_so != -1) {
4798 gint len1, len2;
4799 gchar *mlist;
4800
4801 len1 = match[1].rm_eo - match[1].rm_so;
4802 len2 = match[2].rm_eo - match[2].rm_so;
4803
4804 mlist = g_malloc (len1 + len2 + 2);
4805 memcpy (mlist, v + match[1].rm_so, len1);
4806 if (len2) {
4807 mlist[len1] = '@';
4808 memcpy (mlist + len1 + 1, v + match[2].rm_so, len2);
4809 mlist[len1 + len2 + 1] = '\0';
4810 } else {
4811 mlist[len1] = '\0';
4812 }
4813
4814 return mlist;
4815 }
4816 }
4817
4818 return NULL;
4819 }
4820
4821 /* ok, here's the address stuff, what a mess ... */
4822 CamelHeaderAddress *
camel_header_address_new(void)4823 camel_header_address_new (void)
4824 {
4825 CamelHeaderAddress *h;
4826 h = g_malloc0 (sizeof (*h));
4827 h->type = CAMEL_HEADER_ADDRESS_NONE;
4828 h->refcount = 1;
4829 return h;
4830 }
4831
4832 CamelHeaderAddress *
camel_header_address_new_name(const gchar * name,const gchar * addr)4833 camel_header_address_new_name (const gchar *name,
4834 const gchar *addr)
4835 {
4836 CamelHeaderAddress *h;
4837 h = camel_header_address_new ();
4838 h->type = CAMEL_HEADER_ADDRESS_NAME;
4839 h->name = g_strdup (name);
4840 h->v.addr = g_strdup (addr);
4841 return h;
4842 }
4843
4844 CamelHeaderAddress *
camel_header_address_new_group(const gchar * name)4845 camel_header_address_new_group (const gchar *name)
4846 {
4847 CamelHeaderAddress *h;
4848
4849 h = camel_header_address_new ();
4850 h->type = CAMEL_HEADER_ADDRESS_GROUP;
4851 h->name = g_strdup (name);
4852 return h;
4853 }
4854
4855 CamelHeaderAddress *
camel_header_address_ref(CamelHeaderAddress * addrlist)4856 camel_header_address_ref (CamelHeaderAddress *addrlist)
4857 {
4858 if (addrlist)
4859 addrlist->refcount++;
4860
4861 return addrlist;
4862 }
4863
4864 void
camel_header_address_unref(CamelHeaderAddress * addrlist)4865 camel_header_address_unref (CamelHeaderAddress *addrlist)
4866 {
4867 if (addrlist) {
4868 if (addrlist->refcount <= 1) {
4869 if (addrlist->type == CAMEL_HEADER_ADDRESS_GROUP) {
4870 camel_header_address_list_clear (&addrlist->v.members);
4871 } else if (addrlist->type == CAMEL_HEADER_ADDRESS_NAME) {
4872 g_free (addrlist->v.addr);
4873 }
4874 g_free (addrlist->name);
4875 g_free (addrlist);
4876 } else {
4877 addrlist->refcount--;
4878 }
4879 }
4880 }
4881
4882 void
camel_header_address_set_name(CamelHeaderAddress * addrlist,const gchar * name)4883 camel_header_address_set_name (CamelHeaderAddress *addrlist,
4884 const gchar *name)
4885 {
4886 if (addrlist) {
4887 g_free (addrlist->name);
4888 addrlist->name = g_strdup (name);
4889 }
4890 }
4891
4892 void
camel_header_address_set_addr(CamelHeaderAddress * addrlist,const gchar * addr)4893 camel_header_address_set_addr (CamelHeaderAddress *addrlist,
4894 const gchar *addr)
4895 {
4896 if (addrlist) {
4897 if (addrlist->type == CAMEL_HEADER_ADDRESS_NAME
4898 || addrlist->type == CAMEL_HEADER_ADDRESS_NONE) {
4899 addrlist->type = CAMEL_HEADER_ADDRESS_NAME;
4900 g_free (addrlist->v.addr);
4901 addrlist->v.addr = g_strdup (addr);
4902 } else {
4903 g_warning ("Trying to set the address on a group");
4904 }
4905 }
4906 }
4907
4908 /**
4909 * camel_header_address_set_members:
4910 * @addrlist: a #CamelHeaderAddress object
4911 * @group: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress
4912 *
4913 * TODO: Document me.
4914 *
4915 **/
4916 void
camel_header_address_set_members(CamelHeaderAddress * addrlist,CamelHeaderAddress * group)4917 camel_header_address_set_members (CamelHeaderAddress *addrlist,
4918 CamelHeaderAddress *group)
4919 {
4920 if (addrlist) {
4921 if (addrlist->type == CAMEL_HEADER_ADDRESS_GROUP
4922 || addrlist->type == CAMEL_HEADER_ADDRESS_NONE) {
4923 addrlist->type = CAMEL_HEADER_ADDRESS_GROUP;
4924 camel_header_address_list_clear (&addrlist->v.members);
4925 /* should this ref them? */
4926 addrlist->v.members = group;
4927 } else {
4928 g_warning ("Trying to set the members on a name, not group");
4929 }
4930 }
4931 }
4932
4933 void
camel_header_address_add_member(CamelHeaderAddress * addrlist,CamelHeaderAddress * member)4934 camel_header_address_add_member (CamelHeaderAddress *addrlist,
4935 CamelHeaderAddress *member)
4936 {
4937 if (addrlist) {
4938 if (addrlist->type == CAMEL_HEADER_ADDRESS_GROUP
4939 || addrlist->type == CAMEL_HEADER_ADDRESS_NONE) {
4940 addrlist->type = CAMEL_HEADER_ADDRESS_GROUP;
4941 camel_header_address_list_append (&addrlist->v.members, member);
4942 }
4943 }
4944 }
4945
4946 /**
4947 * camel_header_address_list_append_list:
4948 * @addrlistp: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
4949 * @addrs: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress to add
4950 *
4951 * TODO: Document me.
4952 *
4953 **/
4954 void
camel_header_address_list_append_list(CamelHeaderAddress ** addrlistp,CamelHeaderAddress ** addrs)4955 camel_header_address_list_append_list (CamelHeaderAddress **addrlistp,
4956 CamelHeaderAddress **addrs)
4957 {
4958 if (addrlistp) {
4959 CamelHeaderAddress *n = (CamelHeaderAddress *) addrlistp;
4960
4961 while (n->next)
4962 n = n->next;
4963 n->next = *addrs;
4964 }
4965 }
4966
4967 /**
4968 * camel_header_address_list_append:
4969 * @addrlistp: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
4970 * @addr: the #CamelHeaderAddress to add
4971 *
4972 * TODO: Document me.
4973 *
4974 **/
4975 void
camel_header_address_list_append(CamelHeaderAddress ** addrlistp,CamelHeaderAddress * addr)4976 camel_header_address_list_append (CamelHeaderAddress **addrlistp,
4977 CamelHeaderAddress *addr)
4978 {
4979 if (addr) {
4980 camel_header_address_list_append_list (addrlistp, &addr);
4981 addr->next = NULL;
4982 }
4983 }
4984
4985 /**
4986 * camel_header_address_list_clear:
4987 * @addrlistp: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
4988 *
4989 * TODO: Document me.
4990 *
4991 **/
4992 void
camel_header_address_list_clear(CamelHeaderAddress ** addrlistp)4993 camel_header_address_list_clear (CamelHeaderAddress **addrlistp)
4994 {
4995 CamelHeaderAddress *a, *n;
4996 a = *addrlistp;
4997 while (a) {
4998 n = a->next;
4999 camel_header_address_unref (a);
5000 a = n;
5001 }
5002 *addrlistp = NULL;
5003 }
5004
5005 static gchar *
maybe_quote_name(const gchar * name,gboolean * out_free_result)5006 maybe_quote_name (const gchar *name,
5007 gboolean *out_free_result)
5008 {
5009 if (out_free_result)
5010 *out_free_result = FALSE;
5011
5012 if (name && *name && (strchr (name, ',') || strchr (name, ';') || strchr (name, '\"') || strchr (name, '<') || strchr (name, '>'))) {
5013 GString *quoted;
5014
5015 if (out_free_result)
5016 *out_free_result = TRUE;
5017
5018 quoted = g_string_sized_new (strlen (name) + 2);
5019 g_string_append_c (quoted, '\"');
5020
5021 while (*name) {
5022 if (*name != '\"')
5023 g_string_append_c (quoted, *name);
5024 name++;
5025 }
5026
5027 g_string_append_c (quoted, '\"');
5028
5029 return g_string_free (quoted, FALSE);
5030 }
5031
5032 return (gchar *) name;
5033 }
5034
5035 /* if encode is true, then the result is suitable for mailing, otherwise
5036 * the result is suitable for display only (and may not even be re-parsable) */
5037 static void
header_address_list_encode_append(GString * out,gint encode,CamelHeaderAddress * a)5038 header_address_list_encode_append (GString *out,
5039 gint encode,
5040 CamelHeaderAddress *a)
5041 {
5042 while (a) {
5043 gchar *text = NULL;
5044 gboolean free_text = FALSE;
5045
5046 switch (a->type) {
5047 case CAMEL_HEADER_ADDRESS_NAME:
5048 if (encode)
5049 text = camel_header_encode_phrase ((guchar *) a->name);
5050 else
5051 text = maybe_quote_name (a->name, &free_text);
5052 if (text && *text)
5053 g_string_append_printf (out, "%s <%s>", text, a->v.addr);
5054 else
5055 g_string_append (out, a->v.addr);
5056 if (encode)
5057 g_free (text);
5058 break;
5059 case CAMEL_HEADER_ADDRESS_GROUP:
5060 if (encode)
5061 text = camel_header_encode_phrase ((guchar *) a->name);
5062 else
5063 text = maybe_quote_name (a->name, &free_text);
5064 g_string_append_printf (out, "%s: ", text);
5065 header_address_list_encode_append (out, encode, a->v.members);
5066 g_string_append_printf (out, ";");
5067 if (encode)
5068 g_free (text);
5069 break;
5070 default:
5071 g_warning ("Invalid address type");
5072 break;
5073 }
5074
5075 a = a->next;
5076 if (a)
5077 g_string_append (out, ", ");
5078
5079 if (free_text)
5080 g_free (text);
5081 }
5082 }
5083
5084 /**
5085 * camel_header_address_list_encode:
5086 * @addrlist: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
5087 *
5088 * TODO: Document me.
5089 *
5090 **/
5091 gchar *
camel_header_address_list_encode(CamelHeaderAddress * addrlist)5092 camel_header_address_list_encode (CamelHeaderAddress *addrlist)
5093 {
5094 GString *out;
5095
5096 if (!addrlist)
5097 return NULL;
5098
5099 out = g_string_new ("");
5100 header_address_list_encode_append (out, TRUE, addrlist);
5101 return g_string_free (out, FALSE);
5102 }
5103
5104 /**
5105 * camel_header_address_list_format:
5106 * @addrlist: (array zero-terminated=1): a NULL-terminated list of #CamelHeaderAddress objects
5107 *
5108 * TODO: Document me.
5109 *
5110 **/
5111 gchar *
camel_header_address_list_format(CamelHeaderAddress * addrlist)5112 camel_header_address_list_format (CamelHeaderAddress *addrlist)
5113 {
5114 GString *out;
5115
5116 if (!addrlist)
5117 return NULL;
5118
5119 out = g_string_new ("");
5120
5121 header_address_list_encode_append (out, FALSE, addrlist);
5122
5123 return g_string_free (out, FALSE);
5124 }
5125
5126 gchar *
camel_header_address_fold(const gchar * in,gsize headerlen)5127 camel_header_address_fold (const gchar *in,
5128 gsize headerlen)
5129 {
5130 gsize len, outlen;
5131 const gchar *inptr = in, *space, *p, *n;
5132 GString *out;
5133 gint i, needunfold = FALSE;
5134
5135 if (in == NULL)
5136 return NULL;
5137
5138 /* first, check to see if we even need to fold */
5139 len = headerlen + 2;
5140 p = in;
5141 while (*p) {
5142 n = strchr (p, '\n');
5143 if (n == NULL) {
5144 len += strlen (p);
5145 break;
5146 }
5147
5148 needunfold = TRUE;
5149 len += n - p;
5150
5151 if (len >= CAMEL_FOLD_SIZE)
5152 break;
5153 len = 0;
5154 p = n + 1;
5155 }
5156 if (len < CAMEL_FOLD_SIZE)
5157 return g_strdup (in);
5158
5159 /* we need to fold, so first unfold (if we need to), then process */
5160 if (needunfold)
5161 inptr = in = camel_header_unfold (in);
5162
5163 out = g_string_new ("");
5164 outlen = headerlen + 2;
5165 while (*inptr) {
5166 space = strchr (inptr, ' ');
5167 if (space) {
5168 len = space - inptr + 1;
5169 } else {
5170 len = strlen (inptr);
5171 }
5172
5173 d (printf ("next word '%.*s'\n", len, inptr));
5174
5175 if (outlen + len > CAMEL_FOLD_SIZE) {
5176 d (printf ("outlen = %d wordlen = %d\n", outlen, len));
5177 /* strip trailing space */
5178 if (out->len > 0 && out->str[out->len - 1] == ' ')
5179 g_string_truncate (out, out->len - 1);
5180 g_string_append (out, "\n\t");
5181 outlen = 1;
5182 }
5183
5184 outlen += len;
5185 for (i = 0; i < len; i++) {
5186 g_string_append_c (out, inptr[i]);
5187 }
5188
5189 inptr += len;
5190 }
5191 if (needunfold)
5192 g_free ((gchar *) in);
5193
5194 return g_string_free (out, FALSE);
5195 }
5196
5197 /* simple header folding */
5198 /* will work even if the header is already folded */
5199 gchar *
camel_header_fold(const gchar * in,gsize headerlen)5200 camel_header_fold (const gchar *in,
5201 gsize headerlen)
5202 {
5203 gsize len, outlen, tmplen;
5204 const gchar *inptr = in, *space, *p, *n;
5205 GString *out;
5206 gint needunfold = FALSE;
5207 gchar spc;
5208
5209 if (in == NULL)
5210 return NULL;
5211
5212 /* first, check to see if we even need to fold */
5213 len = headerlen + 2;
5214 p = in;
5215 while (*p) {
5216 n = strchr (p, '\n');
5217 if (n == NULL) {
5218 len += strlen (p);
5219 break;
5220 }
5221
5222 needunfold = TRUE;
5223 len += n - p;
5224
5225 if (len >= CAMEL_FOLD_SIZE)
5226 break;
5227 len = 0;
5228 p = n + 1;
5229 }
5230 if (len < CAMEL_FOLD_SIZE)
5231 return g_strdup (in);
5232
5233 /* we need to fold, so first unfold (if we need to), then process */
5234 if (needunfold)
5235 inptr = in = camel_header_unfold (in);
5236
5237 out = g_string_new ("");
5238 outlen = headerlen + 2;
5239 while (*inptr) {
5240 space = inptr;
5241 while (*space && *space != ' ' && *space != '\t')
5242 space++;
5243
5244 if (*space)
5245 len = space - inptr + 1;
5246 else
5247 len = space - inptr;
5248
5249 d (printf ("next word '%.*s'\n", len, inptr));
5250 if (outlen + len > CAMEL_FOLD_SIZE) {
5251 d (printf ("outlen = %d wordlen = %d\n", outlen, len));
5252 /* strip trailing space */
5253 if (out->len > 0 && (out->str[out->len - 1] == ' ' || out->str[out->len - 1] == '\t')) {
5254 spc = out->str[out->len - 1];
5255 g_string_truncate (out, out->len - 1);
5256 g_string_append_c (out, '\n');
5257 g_string_append_c (out, spc);
5258 outlen = 1;
5259 }
5260
5261 /* check for very long words, just cut them up */
5262 while (outlen + len > CAMEL_FOLD_MAX_SIZE) {
5263 tmplen = CAMEL_FOLD_MAX_SIZE - outlen;
5264 g_string_append_len (out, inptr, tmplen);
5265 g_string_append (out, "\n\t");
5266 inptr += tmplen;
5267 len -= tmplen;
5268 outlen = 1;
5269 }
5270 }
5271
5272 g_string_append_len (out, inptr, len);
5273 outlen += len;
5274 inptr += len;
5275 }
5276 if (needunfold)
5277 g_free ((gchar *) in);
5278
5279 return g_string_free (out, FALSE);
5280 }
5281
5282 gchar *
camel_header_unfold(const gchar * in)5283 camel_header_unfold (const gchar *in)
5284 {
5285 const gchar *inptr = in;
5286 gchar c, *o, *out;
5287
5288 if (in == NULL)
5289 return NULL;
5290
5291 out = g_malloc (strlen (in) + 1);
5292
5293 o = out;
5294 while ((c = *inptr++)) {
5295 if (c == '\n') {
5296 if (camel_mime_is_lwsp (*inptr)) {
5297 do {
5298 inptr++;
5299 } while (camel_mime_is_lwsp (*inptr));
5300 *o++ = ' ';
5301 } else {
5302 *o++ = c;
5303 }
5304 } else {
5305 *o++ = c;
5306 }
5307 }
5308 *o = 0;
5309
5310 return out;
5311 }
5312