1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
4  *
5  * This library is free software: you can redistribute it and/or modify it
6  * under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation.
8  *
9  * This library is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
12  * for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library. If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Authors: Michael Zucchi <notzed@ximian.com>
18  */
19 
20 /* What should hopefully be a fast mail parser */
21 
22 /* Do not change this code without asking me (Michael Zucchi) first
23  *
24  * There is almost always a reason something was done a certain way.
25  */
26 
27 #include <errno.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32 #include <sys/types.h>
33 
34 #include "camel-mempool.h"
35 #include "camel-mime-filter.h"
36 #include "camel-mime-parser.h"
37 #include "camel-mime-utils.h"
38 #include "camel-stream.h"
39 
40 #define r(x)
41 #define h(x)
42 #define c(x)
43 #define d(x)
44 
45 #define PRESERVE_HEADERS
46 
47 /*#define PURIFY*/
48 
49 #define MEMPOOL
50 
51 #ifdef PURIFY
52 gint inend_id = -1,
53   inbuffer_id = -1;
54 #endif
55 
56 #define SCAN_BUF 4096		/* size of read buffer */
57 #define SCAN_HEAD 128		/* headroom guaranteed to be before each read buffer */
58 
59 /* a little hacky, but i couldn't be bothered renaming everything */
60 #define _header_scan_state _CamelMimeParserPrivate
61 #define _PRIVATE(obj) (((CamelMimeParser *)(obj))->priv)
62 
63 /* a raw rfc822 header */
64 /* the value MUST be US-ASCII */
65 typedef struct _camel_header_raw {
66 	struct _camel_header_raw *next;
67 	gchar *name;
68 	gchar *value;
69 	gint offset;		/* in file, if known */
70 } CamelHeaderRaw;
71 
72 struct _header_scan_state {
73 
74     /* global state */
75 
76 	CamelMimeParserState state;
77 
78 	/* for building headers during scanning */
79 	gchar *outbuf;
80 	gchar *outptr;
81 	gchar *outend;
82 
83 	gint fd;			/* input for a fd input */
84 	CamelStream *stream;		/* or for a stream */
85 	GInputStream *input_stream;
86 
87 	gint ioerrno;		/* io error state */
88 
89 	/* for scanning input buffers */
90 	gchar *realbuf;		/* the real buffer, SCAN_HEAD *2 + SCAN_BUF bytes */
91 	gchar *inbuf;		/* points to a subset of the allocated memory, the underflow */
92 	gchar *inptr;		/* (upto SCAN_HEAD) is for use by filters so they dont copy all data */
93 	gchar *inend;
94 
95 	gint atleast;
96 
97 	goffset seek;		/* current offset to start of buffer */
98 	gint unstep;		/* how many states to 'unstep' (repeat the current state) */
99 
100 	guint midline:1;		/* are we mid-line interrupted? */
101 	guint check_header_folded:1;	/* check whether header is folded first? */
102 	guint scan_from:1;	/* do we care about From lines? */
103 	guint scan_pre_from:1;	/* do we return pre-from data? */
104 	guint eof:1;		/* reached eof? */
105 
106 	goffset start_of_from;	/* where from started */
107 	goffset start_of_boundary; /* where the last boundary started */
108 	goffset start_of_headers;	/* where headers started from the last scan */
109 
110 	goffset header_start;	/* start of last header, or -1 */
111 
112 	/* filters to apply to all content before output */
113 	gint filterid;		/* id of next filter */
114 	struct _header_scan_filter *filters;
115 
116     /* per message/part info */
117 	struct _header_scan_stack *parts;
118 
119 };
120 
121 struct _header_scan_stack {
122 	struct _header_scan_stack *parent;
123 
124 	CamelMimeParserState savestate; /* state at invocation of this part */
125 
126 #ifdef MEMPOOL
127 	CamelMemPool *pool;	/* memory pool to keep track of headers/etc at this level */
128 #endif
129 	CamelHeaderRaw *headers;	/* headers for this part */
130 
131 	CamelContentType *content_type;
132 
133 	/* I dont use GString's casue you can't efficiently append a buffer to them */
134 	GByteArray *pretext;	/* for multipart types, save the pre-boundary data here */
135 	GByteArray *posttext;	/* for multipart types, save the post-boundary data here */
136 	gint prestage;		/* used to determine if it is a pre-boundary or post-boundary data segment */
137 
138 	GByteArray *from_line;	/* the from line */
139 
140 	gchar *boundary;		/* for multipart/ * boundaries, including leading -- and trailing -- for the final part */
141 	gint boundarylen;	/* actual length of boundary, including leading -- if there is one */
142 	gint boundarylenfinal;	/* length of boundary, including trailing -- if there is one */
143 	gint atleast;		/* the biggest boundary from here to the parent */
144 };
145 
146 struct _header_scan_filter {
147 	struct _header_scan_filter *next;
148 	gint id;
149 	CamelMimeFilter *filter;
150 };
151 
152 static void folder_scan_reset (struct _header_scan_state *s);
153 static void folder_scan_step (struct _header_scan_state *s, gchar **databuffer, gsize *datalength);
154 static void folder_scan_drop_step (struct _header_scan_state *s);
155 static gint folder_scan_init_with_fd (struct _header_scan_state *s, gint fd);
156 static gint folder_scan_init_with_stream (struct _header_scan_state *s, CamelStream *stream, GError **error);
157 static struct _header_scan_state *folder_scan_init (void);
158 static void folder_scan_close (struct _header_scan_state *s);
159 static struct _header_scan_stack *folder_scan_content (struct _header_scan_state *s, gint *lastone, gchar **data, gsize *length);
160 static struct _header_scan_stack *folder_scan_header (struct _header_scan_state *s, gint *lastone);
161 static gint folder_scan_skip_line (struct _header_scan_state *s, GByteArray *save);
162 static goffset folder_seek (struct _header_scan_state *s, goffset offset, gint whence);
163 static goffset folder_tell (struct _header_scan_state *s);
164 static gint folder_read (struct _header_scan_state *s);
165 static void folder_push_part (struct _header_scan_state *s, struct _header_scan_stack *h);
166 
167 static const gchar * header_raw_find (CamelHeaderRaw **list, const gchar *name, gint *offset);
168 
169 #ifdef MEMPOOL
170 static void header_append_mempool (struct _header_scan_state *s, struct _header_scan_stack *h, gchar *header, gint offset);
171 #else
172 static void header_raw_free (CamelHeaderRaw *l);
173 static void header_raw_clear (CamelHeaderRaw *l);
174 #endif
175 
176 #if d(!)0
177 static gchar *states[] = {
178 	"CAMEL_MIME_PARSER_STATE_INITIAL",
179 	"CAMEL_MIME_PARSER_STATE_PRE_FROM",	/* pre-from data */
180 	"CAMEL_MIME_PARSER_STATE_FROM",		/* got 'From' line */
181 	"CAMEL_MIME_PARSER_STATE_HEADER",		/* toplevel header */
182 	"CAMEL_MIME_PARSER_STATE_BODY",		/* scanning body of message */
183 	"CAMEL_MIME_PARSER_STATE_MULTIPART",	/* got multipart header */
184 	"CAMEL_MIME_PARSER_STATE_MESSAGE",	/* rfc822/news message */
185 
186 	"CAMEL_MIME_PARSER_STATE_PART",		/* part of a multipart */
187 
188 	"CAMEL_MIME_PARSER_STATE_EOF",		/* end of file */
189 	"CAMEL_MIME_PARSER_STATE_PRE_FROM_END",
190 	"CAMEL_MIME_PARSER_STATE_FROM_END",
191 	"CAMEL_MIME_PARSER_STATE_HEAER_END",
192 	"CAMEL_MIME_PARSER_STATE_BODY_END",
193 	"CAMEL_MIME_PARSER_STATE_MULTIPART_END",
194 	"CAMEL_MIME_PARSER_STATE_MESSAGE_END",
195 };
196 #endif
197 
G_DEFINE_TYPE(CamelMimeParser,camel_mime_parser,G_TYPE_OBJECT)198 G_DEFINE_TYPE (CamelMimeParser, camel_mime_parser, G_TYPE_OBJECT)
199 
200 static void
201 mime_parser_finalize (GObject *object)
202 {
203 	struct _header_scan_state *s = _PRIVATE (object);
204 
205 #ifdef PURIFY
206 	purify_watch_remove_all ();
207 #endif
208 
209 	folder_scan_close (s);
210 
211 	/* Chain up to parent's finalize() method. */
212 	G_OBJECT_CLASS (camel_mime_parser_parent_class)->finalize (object);
213 }
214 
215 static void
camel_mime_parser_class_init(CamelMimeParserClass * class)216 camel_mime_parser_class_init (CamelMimeParserClass *class)
217 {
218 	GObjectClass *object_class;
219 
220 	object_class = G_OBJECT_CLASS (class);
221 	object_class->finalize = mime_parser_finalize;
222 }
223 
224 static void
camel_mime_parser_init(CamelMimeParser * parser)225 camel_mime_parser_init (CamelMimeParser *parser)
226 {
227 	parser->priv = folder_scan_init ();
228 }
229 
230 /**
231  * camel_mime_parser_new:
232  *
233  * Create a new CamelMimeParser object.
234  *
235  * Returns: (transfer full): A new #CamelMimeParser object
236  **/
237 CamelMimeParser *
camel_mime_parser_new(void)238 camel_mime_parser_new (void)
239 {
240 	return g_object_new (CAMEL_TYPE_MIME_PARSER, NULL);
241 }
242 
243 /**
244  * camel_mime_parser_filter_add:
245  * @m: a #CamelMimeParser
246  * @mf: a #CamelMimeFilter
247  *
248  * Add a filter that will be applied to any body content before it is passed
249  * to the caller.  Filters may be pipelined to perform multi-pass operations
250  * on the content, and are applied in the order they were added.
251  *
252  * Note that filters are only applied to the body content of messages, and once
253  * a filter has been set, all content returned by a camel_mime_parser_step()
254  * with a state of CAMEL_MIME_PARSER_STATE_BODY will have passed through the
255  * filter.
256  *
257  * Returns: An id that may be passed to camel_mime_parser_filter_remove() to
258  * remove the filter, or -1 if the operation failed.
259  *
260  * Since: 2.22
261  **/
262 gint
camel_mime_parser_filter_add(CamelMimeParser * m,CamelMimeFilter * mf)263 camel_mime_parser_filter_add (CamelMimeParser *m,
264                               CamelMimeFilter *mf)
265 {
266 	struct _header_scan_state *s = _PRIVATE (m);
267 	struct _header_scan_filter *f, *new;
268 
269 	new = g_malloc (sizeof (*new));
270 	new->filter = mf;
271 	new->id = s->filterid++;
272 	if (s->filterid == -1)
273 		s->filterid++;
274 	new->next = NULL;
275 	g_object_ref (G_OBJECT (mf));
276 
277 	/* yes, this is correct, since 'next' is the first element of the struct */
278 	f = (struct _header_scan_filter *) &s->filters;
279 	while (f->next)
280 		f = f->next;
281 	f->next = new;
282 	return new->id;
283 }
284 
285 /**
286  * camel_mime_parser_filter_remove:
287  * @m: a #CamelMimeParser
288  * @id: id of the filter to remove, as returned from camel_mime_parser_filter_add()
289  *
290  * Remove a processing filter from the pipeline.  There is no
291  * restriction on the order the filters can be removed.
292  *
293  * Since: 2.22
294  **/
295 void
camel_mime_parser_filter_remove(CamelMimeParser * m,gint id)296 camel_mime_parser_filter_remove (CamelMimeParser *m,
297                                  gint id)
298 {
299 	struct _header_scan_state *s = _PRIVATE (m);
300 	struct _header_scan_filter *f, *old;
301 
302 	f = (struct _header_scan_filter *) &s->filters;
303 	while (f && f->next) {
304 		old = f->next;
305 		if (old->id == id) {
306 			g_object_unref (old->filter);
307 			f->next = old->next;
308 			g_free (old);
309 			/* there should only be a single matching id, but
310 			 * scan the whole lot anyway */
311 		}
312 		f = f->next;
313 	}
314 }
315 
316 /**
317  * camel_mime_parser_header:
318  * @m: a #CamelMimeParser
319  * @name: Name of header.
320  * @offset: Pointer that can receive the offset of the header in
321  * the stream from the start of parsing.
322  *
323  * Lookup a header by name.
324  *
325  * Returns: The header value, or NULL if the header is not
326  * defined.
327  **/
328 const gchar *
camel_mime_parser_header(CamelMimeParser * m,const gchar * name,gint * offset)329 camel_mime_parser_header (CamelMimeParser *m,
330                           const gchar *name,
331                           gint *offset)
332 {
333 	struct _header_scan_state *s = _PRIVATE (m);
334 
335 	if (s->parts && s->parts->headers)
336 		return header_raw_find (&s->parts->headers, name, offset);
337 
338 	return NULL;
339 }
340 
341 /**
342  * camel_mime_parser_dup_headers:
343  * @m: a #CamelMimeParser
344  *
345  * Get the list of the raw headers which are defined for the
346  * current state of the parser.  These headers are valid
347  * until the next call to camel_mime_parser_step(), or camel_mime_parser_drop_step().
348  *
349  * Returns: (transfer full): The headers, or %NULL, if there are no headers
350  * defined for the current part or state. Free it with camel_name_value_array_free().
351  *
352  * Since: 3.24
353  **/
354 CamelNameValueArray *
camel_mime_parser_dup_headers(CamelMimeParser * m)355 camel_mime_parser_dup_headers (CamelMimeParser *m)
356 {
357 	struct _header_scan_state *s = _PRIVATE (m);
358 
359 	if (s->parts) {
360 		CamelHeaderRaw *header = s->parts->headers;
361 		CamelNameValueArray *header_copy = camel_name_value_array_new ();
362 		while (header) {
363 			camel_name_value_array_append (header_copy, header->name, header->value);
364 			header = header->next;
365 		}
366 
367 		return header_copy;
368 	}
369 	return NULL;
370 }
371 
372 static const gchar *
byte_array_to_string(GByteArray * array)373 byte_array_to_string (GByteArray *array)
374 {
375 	if (array == NULL)
376 		return NULL;
377 
378 	if (array->len == 0 || array->data[array->len - 1] != '\0')
379 		g_byte_array_append (array, (guint8 *) "", 1);
380 
381 	return (const gchar *) array->data;
382 }
383 
384 /**
385  * camel_mime_parser_preface:
386  * @m: a #CamelMimeParser
387  *
388  * Retrieve the preface text for the current multipart.
389  * Can only be used when the state is CAMEL_MIME_PARSER_STATE_MULTIPART_END.
390  *
391  * Returns: The preface text, or NULL if there wasn't any.
392  *
393  * Since: 2.22
394  **/
395 const gchar *
camel_mime_parser_preface(CamelMimeParser * m)396 camel_mime_parser_preface (CamelMimeParser *m)
397 {
398 	struct _header_scan_state *s = _PRIVATE (m);
399 
400 	if (s->parts)
401 		return byte_array_to_string (s->parts->pretext);
402 
403 	return NULL;
404 }
405 
406 /**
407  * camel_mime_parser_postface:
408  * @m: a #CamelMimeParser
409  *
410  * Retrieve the postface text for the current multipart.
411  * Only returns valid data when the current state if
412  * CAMEL_MIME_PARSER_STATE_MULTIPART_END.
413  *
414  * Returns: The postface text, or NULL if there wasn't any.
415  *
416  * Since: 2.22
417  **/
418 const gchar *
camel_mime_parser_postface(CamelMimeParser * m)419 camel_mime_parser_postface (CamelMimeParser *m)
420 {
421 	struct _header_scan_state *s = _PRIVATE (m);
422 
423 	if (s->parts)
424 		return byte_array_to_string (s->parts->posttext);
425 
426 	return NULL;
427 }
428 
429 /**
430  * camel_mime_parser_from_line:
431  * @m: a #CamelMimeParser
432  *
433  * Get the last scanned "From " line, from a recently scanned from.
434  * This should only be called in the CAMEL_MIME_PARSER_STATE_FROM state.  The
435  * from line will include the closing \n found (if there was one).
436  *
437  * The return value will remain valid while in the CAMEL_MIME_PARSER_STATE_FROM
438  * state, or any deeper state.
439  *
440  * Returns: The From line, or NULL if called out of context.
441  *
442  * Since: 2.22
443  **/
444 const gchar *
camel_mime_parser_from_line(CamelMimeParser * m)445 camel_mime_parser_from_line (CamelMimeParser *m)
446 {
447 	struct _header_scan_state *s = _PRIVATE (m);
448 
449 	if (s->parts)
450 		return byte_array_to_string (s->parts->from_line);
451 
452 	return NULL;
453 }
454 
455 /**
456  * camel_mime_parser_init_with_fd:
457  * @m: a #CamelMimeParser
458  * @fd: A valid file descriptor.
459  *
460  * Initialise the scanner with an fd.  The scanner's offsets
461  * will be relative to the current file position of the file
462  * descriptor.  As a result, seekable descritors should
463  * be seeked using the parser seek functions.
464  *
465  * Returns: Returns -1 on error.
466  **/
467 gint
camel_mime_parser_init_with_fd(CamelMimeParser * m,gint fd)468 camel_mime_parser_init_with_fd (CamelMimeParser *m,
469                                 gint fd)
470 {
471 	struct _header_scan_state *s = _PRIVATE (m);
472 
473 	return folder_scan_init_with_fd (s, fd);
474 }
475 
476 /**
477  * camel_mime_parser_init_with_stream:
478  * @m: a #CamelMimeParser
479  * @stream: a #CamelStream to init with
480  * @error: return location for a #GError, or %NULL
481  *
482  * Initialise the scanner with a source stream.  The scanner's
483  * offsets will be relative to the current file position of
484  * the stream.  As a result, seekable streams should only
485  * be seeked using the parser seek function.
486  *
487  * Returns: -1 on error.
488  **/
489 gint
camel_mime_parser_init_with_stream(CamelMimeParser * parser,CamelStream * stream,GError ** error)490 camel_mime_parser_init_with_stream (CamelMimeParser *parser,
491                                     CamelStream *stream,
492                                     GError **error)
493 {
494 	struct _header_scan_state *s = _PRIVATE (parser);
495 
496 	return folder_scan_init_with_stream (s, stream, error);
497 }
498 
499 /**
500  * camel_mime_parser_init_with_input_stream:
501  * @parser: a #CamelMimeParser
502  * @input_stream: a #GInputStream
503  *
504  * Initialize the scanner with @input_stream.  The scanner's offsets will
505  * be relative to the current file position of the stream.  As a result,
506  * seekable streams should only be seeked using the parser seek function.
507  *
508  * Since: 3.12
509  **/
510 void
camel_mime_parser_init_with_input_stream(CamelMimeParser * parser,GInputStream * input_stream)511 camel_mime_parser_init_with_input_stream (CamelMimeParser *parser,
512                                           GInputStream *input_stream)
513 {
514 	struct _header_scan_state *s = _PRIVATE (parser);
515 
516 	folder_scan_reset (s);
517 	s->input_stream = g_object_ref (input_stream);
518 }
519 
520 /**
521  * camel_mime_parser_init_with_bytes:
522  * @parser: a #CamelMimeParser
523  * @bytes: a #GBytes containing the message content
524  *
525  * Convenience function creates a #GMemoryInputStream from @bytes and hands
526  * it off to camel_mime_parser_init_with_input_stream().
527  *
528  * Since: 3.12
529  **/
530 void
camel_mime_parser_init_with_bytes(CamelMimeParser * parser,GBytes * bytes)531 camel_mime_parser_init_with_bytes (CamelMimeParser *parser,
532                                    GBytes *bytes)
533 {
534 	GInputStream *input_stream;
535 
536 	g_return_if_fail (CAMEL_IS_MIME_PARSER (parser));
537 	g_return_if_fail (bytes != NULL);
538 
539 	input_stream = g_memory_input_stream_new_from_bytes (bytes);
540 	camel_mime_parser_init_with_input_stream (parser, input_stream);
541 	g_object_unref (input_stream);
542 }
543 
544 /**
545  * camel_mime_parser_scan_from:
546  * @parser: MIME parser object
547  * @scan_from: %TRUE if the scanner should scan From lines.
548  *
549  * Tell the scanner if it should scan "^From " lines or not.
550  *
551  * If the scanner is scanning from lines, two additional
552  * states CAMEL_MIME_PARSER_STATE_FROM and CAMEL_MIME_PARSER_STATE_FROM_END will be returned
553  * to the caller during parsing.
554  *
555  * This may also be preceeded by an optional
556  * CAMEL_MIME_PARSER_STATE_PRE_FROM state which contains the scanned data
557  * found before the From line is encountered.  See also
558  * camel_mime_parser_scan_pre_from().
559  **/
560 void
camel_mime_parser_scan_from(CamelMimeParser * parser,gboolean scan_from)561 camel_mime_parser_scan_from (CamelMimeParser *parser,
562                              gboolean scan_from)
563 {
564 	struct _header_scan_state *s = _PRIVATE (parser);
565 
566 	s->scan_from = scan_from;
567 }
568 
569 /**
570  * camel_mime_parser_scan_pre_from:
571  * @parser: MIME parser object
572  * @scan_pre_from: %TRUE if we want to get pre-from data.
573  *
574  * Tell the scanner whether we want to know abou the pre-from
575  * data during a scan.  If we do, then we may get an additional
576  * state CAMEL_MIME_PARSER_STATE_PRE_FROM which returns the specified data.
577  **/
578 void
camel_mime_parser_scan_pre_from(CamelMimeParser * parser,gboolean scan_pre_from)579 camel_mime_parser_scan_pre_from (CamelMimeParser *parser,
580                                  gboolean scan_pre_from)
581 {
582 	struct _header_scan_state *s = _PRIVATE (parser);
583 
584 	s->scan_pre_from = scan_pre_from;
585 }
586 
587 /**
588  * camel_mime_parser_content_type:
589  * @parser: MIME parser object
590  *
591  * Get the content type defined in the current part.
592  *
593  * Returns: A content_type structure, or NULL if there
594  * is no content-type defined for this part of state of the
595  * parser.
596  **/
597 CamelContentType *
camel_mime_parser_content_type(CamelMimeParser * parser)598 camel_mime_parser_content_type (CamelMimeParser *parser)
599 {
600 	struct _header_scan_state *s = _PRIVATE (parser);
601 
602 	/* FIXME: should this search up until it's found the 'right'
603 	 * content-type?  can it? */
604 	if (s->parts)
605 		return s->parts->content_type;
606 
607 	return NULL;
608 }
609 
610 /**
611  * camel_mime_parser_unstep:
612  * @parser: MIME parser object
613  *
614  * Cause the last step operation to repeat itself.  If this is
615  * called repeated times, then the same step will be repeated
616  * that many times.
617  *
618  * Note that it is not possible to scan back using this function,
619  * only to have a way of peeking the next state.
620  **/
621 void
camel_mime_parser_unstep(CamelMimeParser * parser)622 camel_mime_parser_unstep (CamelMimeParser *parser)
623 {
624 	struct _header_scan_state *s = _PRIVATE (parser);
625 
626 	s->unstep++;
627 }
628 
629 /**
630  * camel_mime_parser_drop_step:
631  * @parser: MIME parser object
632  *
633  * Drop the last step call.  This should only be used
634  * in conjunction with seeking of the stream as the
635  * stream may be in an undefined state relative to the
636  * state of the parser.
637  *
638  * Use this call with care.
639  **/
640 void
camel_mime_parser_drop_step(CamelMimeParser * parser)641 camel_mime_parser_drop_step (CamelMimeParser *parser)
642 {
643 	struct _header_scan_state *s = _PRIVATE (parser);
644 
645 	s->unstep = 0;
646 	folder_scan_drop_step (s);
647 }
648 
649 /**
650  * camel_mime_parser_step:
651  * @parser: MIME parser object
652  * @databuffer: (inout) (array length=datalength) (optional) (element-type guint8): Pointer to
653  * accept a pointer to the data associated with this step (if any).  May be %NULL,
654  * in which case datalength is also ingored.
655  * @datalength: (inout) (optional): Pointer to accept a pointer to the data
656  * length associated with this step (if any).
657  *
658  * Parse the next part of the MIME message.  If camel_mime_parser_unstep()
659  * has been called, then continue to return the same state
660  * for that many calls.
661  *
662  * If the step is CAMEL_MIME_PARSER_STATE_BODY then the databuffer and datalength
663  * pointers will be setup to point to the internal data buffer
664  * of the scanner and may be processed as required.  Any
665  * filters will have already been applied to this data.
666  *
667  * Refer to the state diagram elsewhere for a full listing of
668  * the states an application is gauranteed to get from the
669  * scanner.
670  *
671  * Returns: The current new state of the parser
672  * is returned.
673  **/
674 CamelMimeParserState
camel_mime_parser_step(CamelMimeParser * parser,gchar ** databuffer,gsize * datalength)675 camel_mime_parser_step (CamelMimeParser *parser,
676                         gchar **databuffer,
677                         gsize *datalength)
678 {
679 	struct _header_scan_state *s = _PRIVATE (parser);
680 
681 	d (printf ("OLD STATE:  '%s' :\n", states[s->state]));
682 
683 	if (s->unstep <= 0) {
684 		gchar *dummy;
685 		gsize dummylength;
686 
687 		if (databuffer == NULL) {
688 			databuffer = &dummy;
689 			datalength = &dummylength;
690 		}
691 
692 		folder_scan_step (s, databuffer, datalength);
693 	} else
694 		s->unstep--;
695 
696 	d (printf ("NEW STATE:  '%s' :\n", states[s->state]));
697 
698 	return s->state;
699 }
700 
701 /**
702  * camel_mime_parser_read:
703  * @parser: MIME parser object
704  * @databuffer: (out) (array) (element-type guint8): The data buffer
705  * @len: The length of data to read
706  * @error: return location for a #GError, or %NULL
707  *
708  * Read at most @len bytes from the internal mime parser buffer.
709  *
710  * Returns the address of the internal buffer in @databuffer,
711  * and the length of useful data.
712  *
713  * @len may be specified as %G_MAXSSIZE, in which case you will
714  * get the full remainder of the buffer at each call.
715  *
716  * Note that no parsing of the data read through this function
717  * occurs, so no state changes occur, but the seek position
718  * is updated appropriately.
719  *
720  * Returns: The number of bytes available, or -1 on error.
721  **/
722 gssize
camel_mime_parser_read(CamelMimeParser * parser,const gchar ** databuffer,gssize len,GError ** error)723 camel_mime_parser_read (CamelMimeParser *parser,
724                         const gchar **databuffer,
725                         gssize len,
726                         GError **error)
727 {
728 	struct _header_scan_state *s = _PRIVATE (parser);
729 	gintptr there;
730 
731 	if (len == 0)
732 		return 0;
733 
734 	d (printf ("parser::read() reading %d bytes\n", len));
735 
736 	there = MIN (s->inend - s->inptr, len);
737 	d (printf ("parser::read() there = %d bytes\n", there));
738 	if (there > 0) {
739 		*databuffer = s->inptr;
740 		s->inptr += there;
741 		return there;
742 	}
743 
744 	if (folder_read (s) == -1) {
745 		gint err = camel_mime_parser_errno (parser);
746 
747 		g_set_error (
748 			error, G_IO_ERROR,
749 			g_io_error_from_errno (err),
750 			"%s", g_strerror (err));
751 		return -1;
752 	}
753 
754 	there = MIN (s->inend - s->inptr, len);
755 	d (printf ("parser::read() had to re-read, now there = %d bytes\n", there));
756 
757 	*databuffer = s->inptr;
758 	s->inptr += there;
759 
760 	return there;
761 }
762 
763 /**
764  * camel_mime_parser_tell:
765  * @parser: MIME parser object
766  *
767  * Return the current scanning offset.  The meaning of this
768  * value will depend on the current state of the parser.
769  *
770  * An incomplete listing of the states:
771  *
772  * CAMEL_MIME_PARSER_STATE_INITIAL, The start of the current message.
773  * CAMEL_MIME_PARSER_STATE_HEADER, CAMEL_MIME_PARSER_STATE_MESSAGE, CAMEL_MIME_PARSER_STATE_MULTIPART, the character
774  * position immediately after the end of the header.
775  * CAMEL_MIME_PARSER_STATE_BODY, Position within the message of the start
776  * of the current data block.
777  * CAMEL_MIME_PARSER_STATE_*_END, The position of the character starting
778  * the next section of the scan (the last position + 1 of
779  * the respective current state).
780  *
781  * Returns: See above.
782  *
783  * Since: 2.22
784  **/
785 goffset
camel_mime_parser_tell(CamelMimeParser * parser)786 camel_mime_parser_tell (CamelMimeParser *parser)
787 {
788 	struct _header_scan_state *s = _PRIVATE (parser);
789 
790 	return folder_tell (s);
791 }
792 
793 /**
794  * camel_mime_parser_tell_start_headers:
795  * @parser: MIME parser object
796  *
797  * Find out the position within the file of where the
798  * headers started, this is cached by the parser
799  * at the time.
800  *
801  * Returns: The header start position, or -1 if
802  * no headers were scanned in the current state.
803  *
804  * Since: 2.22
805  **/
806 goffset
camel_mime_parser_tell_start_headers(CamelMimeParser * parser)807 camel_mime_parser_tell_start_headers (CamelMimeParser *parser)
808 {
809 	struct _header_scan_state *s = _PRIVATE (parser);
810 
811 	return s->start_of_headers;
812 }
813 
814 /**
815  * camel_mime_parser_tell_start_from:
816  * @parser: MIME parser object
817  *
818  * If the parser is scanning From lines, then this returns
819  * the position of the start of the From line.
820  *
821  * Returns: The start of the from line, or -1 if there
822  * was no From line, or From lines are not being scanned.
823  *
824  * Since: 2.22
825  **/
826 goffset
camel_mime_parser_tell_start_from(CamelMimeParser * parser)827 camel_mime_parser_tell_start_from (CamelMimeParser *parser)
828 {
829 	struct _header_scan_state *s = _PRIVATE (parser);
830 
831 	return s->start_of_from;
832 }
833 
834 /**
835  * camel_mime_parser_tell_start_boundary:
836  * @parser: MIME parser object
837  *
838  * When parsing a multipart, this returns the start of the last
839  * boundary.
840  *
841  * Returns: The start of the boundary, or -1 if there
842  * was no boundary encountered yet.
843  *
844  * Since: 2.22
845  **/
846 goffset
camel_mime_parser_tell_start_boundary(CamelMimeParser * parser)847 camel_mime_parser_tell_start_boundary (CamelMimeParser *parser)
848 {
849 	struct _header_scan_state *s = _PRIVATE (parser);
850 
851 	return s->start_of_boundary;
852 }
853 
854 /**
855  * camel_mime_parser_seek:
856  * @parser: MIME parser object
857  * @offset: Number of bytes to offset the seek by.
858  * @whence: SEEK_SET, SEEK_CUR, SEEK_END
859  *
860  * Reset the source position to a known value.
861  *
862  * Note that if the source stream/descriptor was not
863  * positioned at 0 to begin with, and an absolute seek
864  * is specified (whence != SEEK_CUR), then the seek
865  * position may not match the desired seek position.
866  *
867  * Returns: The new seek offset, or -1 on
868  * an error (for example, trying to seek on a non-seekable
869  * stream or file descriptor).
870  *
871  * Since: 2.22
872  **/
873 goffset
camel_mime_parser_seek(CamelMimeParser * parser,goffset offset,gint whence)874 camel_mime_parser_seek (CamelMimeParser *parser,
875                         goffset offset,
876                         gint whence)
877 {
878 	struct _header_scan_state *s = _PRIVATE (parser);
879 
880 	return folder_seek (s, offset, whence);
881 }
882 
883 /**
884  * camel_mime_parser_state:
885  * @parser: MIME parser object
886  *
887  * Get the current parser state.
888  *
889  * Returns: The current parser state.
890  **/
891 CamelMimeParserState
camel_mime_parser_state(CamelMimeParser * parser)892 camel_mime_parser_state (CamelMimeParser *parser)
893 {
894 	struct _header_scan_state *s = _PRIVATE (parser);
895 
896 	return s->state;
897 }
898 
899 /**
900  * camel_mime_parser_push_state:
901  * @mp: MIME parser object
902  * @newstate: New state
903  * @boundary: Boundary marker for state.
904  *
905  * Pre-load a new parser state.  Used to post-parse multipart content
906  * without headers.
907  **/
908 void
camel_mime_parser_push_state(CamelMimeParser * mp,CamelMimeParserState newstate,const gchar * boundary)909 camel_mime_parser_push_state (CamelMimeParser *mp,
910                               CamelMimeParserState newstate,
911                               const gchar *boundary)
912 {
913 	struct _header_scan_stack *h;
914 	struct _header_scan_state *s = _PRIVATE (mp);
915 	gsize boundary_len;
916 
917 	h = g_malloc0 (sizeof (*h));
918 	h->boundarylen = strlen (boundary) + 2;
919 	h->boundarylenfinal = h->boundarylen + 2;
920 	boundary_len = h->boundarylen + 3;
921 	h->boundary = g_malloc (boundary_len);
922 	g_snprintf (h->boundary, boundary_len, "--%s--", boundary);
923 	folder_push_part (s, h);
924 	s->state = newstate;
925 }
926 
927 /**
928  * camel_mime_parser_stream:
929  * @parser: MIME parser object
930  *
931  * Get the stream, if any, the parser has been initialised
932  * with.  May be used to setup sub-streams, but should not
933  * be read from directly (without saving and restoring
934  * the seek position in between).
935  *
936  * Returns: (transfer none) (nullable): The stream from camel_mime_parser_init_with_stream(),
937  * or NULL if the parser is reading from a file descriptor or is
938  * uninitialised.
939  **/
940 CamelStream *
camel_mime_parser_stream(CamelMimeParser * parser)941 camel_mime_parser_stream (CamelMimeParser *parser)
942 {
943 	struct _header_scan_state *s = _PRIVATE (parser);
944 
945 	return s->stream;
946 }
947 
948 /* Return errno of the parser, incase any error occurred during processing */
949 gint
camel_mime_parser_errno(CamelMimeParser * parser)950 camel_mime_parser_errno (CamelMimeParser *parser)
951 {
952 	struct _header_scan_state *s = _PRIVATE (parser);
953 
954 	return s->ioerrno;
955 }
956 
957 /* ********************************************************************** */
958 /*    Implementation							  */
959 /* ********************************************************************** */
960 
961 /* read the next bit of data, ensure there is enough room 'atleast' bytes */
962 static gint
folder_read(struct _header_scan_state * s)963 folder_read (struct _header_scan_state *s)
964 {
965 	gint len;
966 	gint inoffset;
967 
968 	if (s->inptr < s->inend - s->atleast || s->eof)
969 		return s->inend - s->inptr;
970 #ifdef PURIFY
971 	purify_watch_remove (inend_id);
972 	purify_watch_remove (inbuffer_id);
973 #endif
974 	/* check for any remaning bytes (under the atleast limit( */
975 	inoffset = s->inend - s->inptr;
976 	if (inoffset > 0) {
977 		memmove (s->inbuf, s->inptr, inoffset);
978 	}
979 	if (s->stream) {
980 		len = camel_stream_read (
981 			s->stream, s->inbuf + inoffset, SCAN_BUF - inoffset, NULL, NULL);
982 	} else if (s->input_stream != NULL) {
983 		len = g_input_stream_read (
984 			s->input_stream, s->inbuf + inoffset,
985 			SCAN_BUF - inoffset, NULL, NULL);
986 	} else {
987 		len = read (s->fd, s->inbuf + inoffset, SCAN_BUF - inoffset);
988 	}
989 	r (printf ("read %d bytes, offset = %d\n", len, inoffset));
990 	if (len >= 0) {
991 		/* add on the last read block */
992 		s->seek += s->inptr - s->inbuf;
993 		s->inptr = s->inbuf;
994 		s->inend = s->inbuf + len + inoffset;
995 		s->eof = (len == 0);
996 		r (printf ("content = %d '%.*s'\n",s->inend - s->inptr,  s->inend - s->inptr, s->inptr));
997 	} else {
998 		s->ioerrno = errno ? errno : EIO;
999 	}
1000 
1001 	g_return_val_if_fail (s->inptr <= s->inend, 0);
1002 #ifdef PURIFY
1003 	inend_id = purify_watch (&s->inend);
1004 	inbuffer_id = purify_watch_n (s->inend + 1, SCAN_HEAD - 1, "rw");
1005 #endif
1006 	r (printf ("content = %d '%.*s'\n", s->inend - s->inptr,  s->inend - s->inptr, s->inptr));
1007 	/* set a sentinal, for the inner loops to check against */
1008 	s->inend[0] = '\n';
1009 	return s->inend - s->inptr;
1010 }
1011 
1012 /* return the current absolute position of the data pointer */
1013 static goffset
folder_tell(struct _header_scan_state * s)1014 folder_tell (struct _header_scan_state *s)
1015 {
1016 	return s->seek + (s->inptr - s->inbuf);
1017 }
1018 
1019 /*
1020  * need some way to prime the parser state, so this actually works for
1021  * other than top-level messages
1022  */
1023 static goffset
folder_seek(struct _header_scan_state * s,goffset offset,gint whence)1024 folder_seek (struct _header_scan_state *s,
1025              goffset offset,
1026              gint whence)
1027 {
1028 	goffset newoffset;
1029 
1030 	if (s->stream) {
1031 		if (G_IS_SEEKABLE (s->stream)) {
1032 			/* NOTE: assumes whence seekable stream == whence libc, which is probably
1033 			 * the case (or bloody well should've been) */
1034 			g_seekable_seek (
1035 				G_SEEKABLE (s->stream),
1036 				offset, whence, NULL, NULL);
1037 			newoffset = g_seekable_tell (G_SEEKABLE (s->stream));
1038 		} else {
1039 			newoffset = -1;
1040 			errno = EINVAL;
1041 		}
1042 	} else if (s->input_stream != NULL) {
1043 		if (G_IS_SEEKABLE (s->input_stream)) {
1044 			/* NOTE: assumes whence seekable stream == whence libc, which is probably
1045 			 * the case (or bloody well should've been) */
1046 			g_seekable_seek (
1047 				G_SEEKABLE (s->input_stream),
1048 				offset, whence, NULL, NULL);
1049 			newoffset = g_seekable_tell (G_SEEKABLE (s->input_stream));
1050 		} else {
1051 			newoffset = -1;
1052 			errno = EINVAL;
1053 		}
1054 	} else {
1055 		newoffset = lseek (s->fd, offset, whence);
1056 	}
1057 #ifdef PURIFY
1058 	purify_watch_remove (inend_id);
1059 	purify_watch_remove (inbuffer_id);
1060 #endif
1061 	if (newoffset != -1) {
1062 		s->seek = newoffset;
1063 		s->inptr = s->inbuf;
1064 		s->inend = s->inbuf;
1065 		s->eof = FALSE;
1066 	} else {
1067 		s->ioerrno = errno ? errno : EIO;
1068 	}
1069 #ifdef PURIFY
1070 	inend_id = purify_watch (&s->inend);
1071 	inbuffer_id = purify_watch_n (s->inend + 1, SCAN_HEAD - 1, "rw");
1072 #endif
1073 	return newoffset;
1074 }
1075 
1076 static void
folder_push_part(struct _header_scan_state * s,struct _header_scan_stack * h)1077 folder_push_part (struct _header_scan_state *s,
1078                   struct _header_scan_stack *h)
1079 {
1080 	if (s->parts && s->parts->atleast > h->boundarylenfinal)
1081 		h->atleast = s->parts->atleast;
1082 	else
1083 		h->atleast = MAX (h->boundarylenfinal, 1);
1084 
1085 	h->parent = s->parts;
1086 	s->parts = h;
1087 }
1088 
1089 static void
folder_scan_stack_free(struct _header_scan_stack * h)1090 folder_scan_stack_free (struct _header_scan_stack *h)
1091 {
1092 	if (h) {
1093 		g_free (h->boundary);
1094 #ifdef MEMPOOL
1095 		camel_mempool_destroy (h->pool);
1096 #else
1097 		camel_header_raw_clear (&h->headers);
1098 #endif
1099 		camel_content_type_unref (h->content_type);
1100 		if (h->pretext)
1101 			g_byte_array_free (h->pretext, TRUE);
1102 		if (h->posttext)
1103 			g_byte_array_free (h->posttext, TRUE);
1104 		if (h->from_line)
1105 			g_byte_array_free (h->from_line, TRUE);
1106 		g_free (h);
1107 	}
1108 }
1109 
1110 static void
folder_pull_part(struct _header_scan_state * s)1111 folder_pull_part (struct _header_scan_state *s)
1112 {
1113 	struct _header_scan_stack *h;
1114 
1115 	h = s->parts;
1116 	if (h) {
1117 		s->parts = h->parent;
1118 
1119 		folder_scan_stack_free (h);
1120 	} else {
1121 		g_warning ("Header stack underflow!\n");
1122 	}
1123 }
1124 
1125 static gint
folder_scan_skip_line(struct _header_scan_state * s,GByteArray * save)1126 folder_scan_skip_line (struct _header_scan_state *s,
1127                        GByteArray *save)
1128 {
1129 	gint atleast = s->atleast;
1130 	register gchar *inptr, *inend, c;
1131 	gint len;
1132 
1133 	s->atleast = 1;
1134 
1135 	d (printf ("skipping line\n"));
1136 
1137 	while ( (len = folder_read (s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */
1138 		inptr = s->inptr;
1139 		inend = s->inend;
1140 
1141 		c = -1;
1142 		while (inptr < inend
1143 		       && (c = *inptr++) != '\n') {
1144 			d (printf ("(%2x,%c)", c, isprint (c) ? c : '.'));
1145 			;
1146 		}
1147 
1148 		if (save)
1149 			g_byte_array_append (save, (guint8 *) s->inptr, inptr - s->inptr);
1150 
1151 		s->inptr = inptr;
1152 
1153 		if (c == '\n') {
1154 			s->atleast = atleast;
1155 			return 0;
1156 		}
1157 	}
1158 
1159 	d (printf ("couldn't find end of line?\n"));
1160 
1161 	s->atleast = atleast;
1162 
1163 	return -1;		/* not found */
1164 }
1165 
1166 /* TODO: Is there any way to make this run faster?  It gets called a lot ... */
1167 static struct _header_scan_stack *
folder_boundary_check(struct _header_scan_state * s,const gchar * boundary,gint * lastone)1168 folder_boundary_check (struct _header_scan_state *s,
1169                        const gchar *boundary,
1170                        gint *lastone)
1171 {
1172 	struct _header_scan_stack *part;
1173 	gint len = s->inend - boundary; /* make sure we dont access past the buffer */
1174 
1175 	h (printf ("checking boundary marker upto %d bytes\n", len));
1176 	part = s->parts;
1177 	while (part) {
1178 		h (printf ("  boundary: %s\n", part->boundary));
1179 		h (printf ("   against: '%.*s'\n", part->boundarylen, boundary));
1180 		if (part->boundary
1181 		    && part->boundarylen <= len
1182 		    && memcmp (boundary, part->boundary, part->boundarylen) == 0) {
1183 			h (printf ("matched boundary: %s\n", part->boundary));
1184 			/* again, make sure we're in range */
1185 			if (part->boundarylenfinal <= len) {
1186 				gint extra = part->boundarylenfinal - part->boundarylen;
1187 
1188 				/* check the extra stuff on a final boundary, normally -- for mime parts */
1189 				if (extra > 0) {
1190 					*lastone = memcmp(&boundary[part->boundarylen],
1191 							  &part->boundary[part->boundarylen],
1192 							  extra) == 0;
1193 				} else {
1194 					*lastone = TRUE;
1195 				}
1196 				h (printf ("checking lastone = %s\n", *lastone?"TRUE":"FALSE"));
1197 			} else {
1198 				h (printf ("not enough room to check last one?\n"));
1199 				*lastone = FALSE;
1200 			}
1201 			/*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/
1202 			return part;
1203 		}
1204 		part = part->parent;
1205 	}
1206 	return NULL;
1207 }
1208 
1209 #ifdef MEMPOOL
1210 static void
header_append_mempool(struct _header_scan_state * s,struct _header_scan_stack * h,gchar * header,gint offset)1211 header_append_mempool (struct _header_scan_state *s,
1212                        struct _header_scan_stack *h,
1213                        gchar *header,
1214                        gint offset)
1215 {
1216 	CamelHeaderRaw *l, *n;
1217 	gchar *content;
1218 
1219 	content = strchr (header, ':');
1220 	if (content) {
1221 		register gint len;
1222 		n = camel_mempool_alloc (h->pool, sizeof (*n));
1223 		n->next = NULL;
1224 
1225 		len = content - header;
1226 		n->name = camel_mempool_alloc (h->pool, len + 1);
1227 		memcpy (n->name, header, len);
1228 		n->name[len] = 0;
1229 
1230 		content++;
1231 
1232 		len = s->outptr - content;
1233 		n->value = camel_mempool_alloc (h->pool, len + 1);
1234 		memcpy (n->value, content, len);
1235 		n->value[len] = 0;
1236 
1237 		n->offset = offset;
1238 
1239 		l = (CamelHeaderRaw *) &h->headers;
1240 		while (l->next) {
1241 			l = l->next;
1242 		}
1243 		l->next = n;
1244 	}
1245 
1246 }
1247 
1248 #define header_raw_append_parse(a, b, c) (header_append_mempool(s, h, b, c))
1249 
1250 #endif
1251 
1252 /* Copy the string start->inptr into the header buffer (s->outbuf),
1253  * grow if necessary
1254  * remove trailing \r chars (\n's assumed already removed)
1255  * and track the start offset of the header */
1256 /* Basically an optimised version of g_byte_array_append() */
1257 #define header_append(s, start, inptr) \
1258 { \
1259 	register gintptr headerlen = inptr - start; \
1260  \
1261 	if (headerlen > 0) { \
1262 		if (headerlen >= (s->outend - s->outptr)) { \
1263 			register gchar *outnew; \
1264 			register gintptr olen = ((s->outend - s->outbuf) + headerlen) * 2 + 1; \
1265 			outnew = g_realloc (s->outbuf, olen); \
1266 			s->outptr = s->outptr - s->outbuf + outnew; \
1267 			s->outbuf = outnew; \
1268 			s->outend = outnew + olen; \
1269 		} \
1270 		if (start[headerlen - 1] == '\r') \
1271 			headerlen--; \
1272 		memcpy (s->outptr, start, headerlen); \
1273 		s->outptr += headerlen; \
1274 	} \
1275 	if (s->header_start == -1) \
1276 		s->header_start = (start - s->inbuf) + s->seek; \
1277 }
1278 
1279 static struct _header_scan_stack *
folder_scan_header(struct _header_scan_state * s,gint * lastone)1280 folder_scan_header (struct _header_scan_state *s,
1281                     gint *lastone)
1282 {
1283 	gint atleast = s->atleast, newatleast;
1284 	gchar *start = NULL;
1285 	gint len;
1286 	struct _header_scan_stack *h;
1287 	gchar *inend;
1288 	register gchar *inptr;
1289 
1290 	h (printf ("scanning first bit\n"));
1291 
1292 	h = g_malloc0 (sizeof (*h));
1293 #ifdef MEMPOOL
1294 	h->pool = camel_mempool_new (8192, 4096, CAMEL_MEMPOOL_ALIGN_STRUCT);
1295 #endif
1296 
1297 	if (s->parts)
1298 		newatleast = s->parts->atleast;
1299 	else
1300 		newatleast = 1;
1301 	*lastone = FALSE;
1302 
1303 	do {
1304 		s->atleast = newatleast;
1305 
1306 		h (printf ("atleast = %d\n", s->atleast));
1307 
1308 		while ((len = folder_read (s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
1309 			inptr = s->inptr;
1310 			inend = s->inend - s->atleast + 1;
1311 
1312 			while (inptr < inend) {
1313 				start = inptr;
1314 				if (!s->midline) {
1315 					if (folder_boundary_check (s, inptr, lastone)) {
1316 						if ((s->outptr > s->outbuf))
1317 							goto header_truncated; /* may not actually be truncated */
1318 
1319 						goto header_done;
1320 					}
1321 				} else if (s->check_header_folded) {
1322 					if (*inptr != ' ' && *inptr != '\t') {
1323 						s->outptr[0] = 0;
1324 
1325 						/* The outbuf can contain an extra \r\n, thus remove it */
1326 						if (s->outptr > s->outbuf && (s->outptr[-1] == '\r' || s->outptr[-1] == '\n'))
1327 							s->outptr[-1] = 0;
1328 
1329 						if (s->outptr - 1 > s->outbuf && (s->outptr[-2] == '\r' || s->outptr[-2] == '\n'))
1330 							s->outptr[-2] = 0;
1331 
1332 						if (s->outbuf == s->outptr || !*s->outbuf) {
1333 							s->check_header_folded = FALSE;
1334 							s->midline = FALSE;
1335 							goto header_done;
1336 						}
1337 
1338 						h (printf ("header not folded '%s' at %d\n", s->outbuf, (gint) s->header_start));
1339 
1340 						header_raw_append_parse (&h->headers, s->outbuf, s->header_start);
1341 						s->outptr = s->outbuf;
1342 						s->header_start = -1;
1343 					}
1344 				}
1345 
1346 				/* goto next line/sentinal */
1347 				while ((*inptr++) != '\n')
1348 					;
1349 
1350 				if (inptr > s->inend + 1) {
1351 					g_warn_if_fail (inptr <= s->inend + 1);
1352 					folder_scan_stack_free (h);
1353 					return NULL;
1354 				}
1355 
1356 				/* check for sentinal or real end of line */
1357 				if (inptr >= inend) {
1358 					h (printf ("not at end of line yet, going further\n"));
1359 					/* didn't find end of line within our allowed area */
1360 					s->midline = TRUE;
1361 					s->check_header_folded = inptr == inend;
1362 					inptr = inend;
1363 					header_append (s, start, inptr + (s->check_header_folded ? -1 : 0));
1364 				} else {
1365 					h (printf ("got line part: '%.*s'\n", (gint) (inptr - 1 - start), start));
1366 					/* got a line, strip and add it, process it */
1367 					s->midline = FALSE;
1368 					s->check_header_folded = FALSE;
1369 					header_append (s, start, inptr - 1);
1370 
1371 					/* check for end of headers */
1372 					if (s->outbuf == s->outptr)
1373 						goto header_done;
1374 
1375 					/* check for continuation/compress headers, we have atleast 1 gchar here to work with */
1376 					if (inptr[0] ==  ' ' || inptr[0] == '\t') {
1377 						h (printf ("continuation\n"));
1378 
1379 #ifdef PRESERVE_HEADERS
1380 						if (inptr - 1 >= start) {
1381 							start = inptr - 1;
1382 							header_append (s, start, inptr);
1383 						}
1384 #endif
1385 #ifndef PRESERVE_HEADERS
1386 						/* TODO: this wont catch multiple space continuation across a read boundary, but
1387 						 * that is assumed rare, and not fatal anyway */
1388 						do
1389 							inptr++;
1390 						while (*inptr == ' ' || *inptr == '\t');
1391 						inptr--;
1392 						*inptr = ' ';
1393 #endif
1394 					} else {
1395 						/* otherwise, complete header, add it */
1396 						s->outptr[0] = 0;
1397 
1398 						h (printf ("header '%s' at %d\n", s->outbuf, (gint) s->header_start));
1399 
1400 						header_raw_append_parse (&h->headers, s->outbuf, s->header_start);
1401 						s->outptr = s->outbuf;
1402 						s->header_start = -1;
1403 					}
1404 				}
1405 			}
1406 			s->inptr = inptr;
1407 		}
1408 		h (printf ("end of file?  read %d bytes\n", len));
1409 		newatleast = 1;
1410 	} while (s->atleast > 1);
1411 
1412 	if ((s->outptr > s->outbuf) || s->inend > s->inptr) {
1413 		start = s->inptr;
1414 		inptr = s->inend;
1415 		if (inptr > start) {
1416 			if (inptr[-1] == '\n')
1417 				inptr--;
1418 		}
1419 		goto header_truncated;
1420 	}
1421 
1422 	s->atleast = atleast;
1423 
1424 	return h;
1425 
1426 header_truncated:
1427 	header_append (s, start, inptr);
1428 
1429 	s->outptr[0] = 0;
1430 
1431 	if (s->check_header_folded && s->midline) {
1432 		/* The outbuf can contain an extra \r\n, thus remove it */
1433 		if (s->outptr > s->outbuf && (s->outptr[-1] == '\r' || s->outptr[-1] == '\n'))
1434 			s->outptr[-1] = 0;
1435 
1436 		if (s->outptr - 1 > s->outbuf && (s->outptr[-2] == '\r' || s->outptr[-2] == '\n'))
1437 			s->outptr[-2] = 0;
1438 	}
1439 
1440 	if (s->outbuf == s->outptr)
1441 		goto header_done;
1442 
1443 	header_raw_append_parse (&h->headers, s->outbuf, s->header_start);
1444 
1445 	s->outptr = s->outbuf;
1446 header_done:
1447 	s->inptr = inptr;
1448 	s->atleast = atleast;
1449 	s->header_start = -1;
1450 	return h;
1451 }
1452 
1453 static struct _header_scan_stack *
folder_scan_content(struct _header_scan_state * s,gint * lastone,gchar ** data,gsize * length)1454 folder_scan_content (struct _header_scan_state *s,
1455                      gint *lastone,
1456                      gchar **data,
1457                      gsize *length)
1458 {
1459 	gint atleast = s->atleast, newatleast;
1460 	register gchar *inptr;
1461 	gchar *inend;
1462 	gchar *start;
1463 	gint len;
1464 	struct _header_scan_stack *part;
1465 	gint onboundary = FALSE;
1466 
1467 	c (printf ("scanning content\n"));
1468 
1469 	part = s->parts;
1470 	if (part)
1471 		newatleast = part->atleast;
1472 	else
1473 		newatleast = 1;
1474 	*lastone = FALSE;
1475 
1476 	c (printf ("atleast = %d\n", newatleast));
1477 
1478 	do {
1479 		s->atleast = newatleast;
1480 
1481 		while ((len = folder_read (s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
1482 			inptr = s->inptr;
1483 			if (s->eof)
1484 				inend = s->inend;
1485 			else
1486 				inend = s->inend - s->atleast + 1;
1487 			start = inptr;
1488 
1489 			c (printf ("inptr = %p, inend = %p\n", inptr, inend));
1490 
1491 			while (inptr < inend) {
1492 				if (!s->midline
1493 				    && (part = folder_boundary_check (s, inptr, lastone))) {
1494 					onboundary = TRUE;
1495 
1496 					/* since we truncate the boundary data, we need at least 1 gchar here spare,
1497 					 * to remain in the same state */
1498 					if ( (inptr - start) > 1)
1499 						goto content;
1500 
1501 					/* otherwise, jump to the state of the boundary we actually found */
1502 					goto normal_exit;
1503 				}
1504 
1505 				/* goto the next line */
1506 				while ((*inptr++) != '\n')
1507 					;
1508 
1509 				/* check the sentinal, if we went past the atleast limit, and reset it to there */
1510 				if (inptr > inend) {
1511 					s->midline = TRUE;
1512 					inptr = inend;
1513 				} else {
1514 					s->midline = FALSE;
1515 				}
1516 			}
1517 
1518 			goto content;
1519 		}
1520 		newatleast = 1;
1521 	} while (s->atleast > 1);
1522 
1523 	c (printf ("length read = %d\n", len));
1524 
1525 	if (s->inend > s->inptr) {
1526 		start = s->inptr;
1527 		inptr = s->inend;
1528 		goto content;
1529 	}
1530 
1531 	*length = 0;
1532 	*data = s->inptr;
1533 	s->atleast = atleast;
1534 	return NULL;
1535 
1536 content:
1537 	/* treat eof as the last boundary in From mode */
1538 	if (s->scan_from && s->eof && s->atleast <= 1) {
1539 		onboundary = TRUE;
1540 		part = NULL;
1541 	} else {
1542 		part = s->parts;
1543 	}
1544 normal_exit:
1545 	s->atleast = atleast;
1546 	s->inptr = inptr;
1547 
1548 	*data = start;
1549 	/* if we hit a boundary, we should not include the closing \n */
1550 	if (onboundary && (inptr - start) > 0)
1551 		*length = inptr-start-1;
1552 	else
1553 		*length = inptr-start;
1554 
1555 	/*printf("got %scontent: '%.*s'\n", s->midline?"partial ":"", inptr-start, start);*/
1556 
1557 	return part;
1558 }
1559 
1560 static void
folder_scan_close(struct _header_scan_state * s)1561 folder_scan_close (struct _header_scan_state *s)
1562 {
1563 	g_free (s->realbuf);
1564 	g_free (s->outbuf);
1565 	while (s->parts)
1566 		folder_pull_part (s);
1567 	if (s->fd != -1)
1568 		close (s->fd);
1569 	g_clear_object (&s->stream);
1570 	g_clear_object (&s->input_stream);
1571 	g_free (s);
1572 }
1573 
1574 static struct _header_scan_state *
folder_scan_init(void)1575 folder_scan_init (void)
1576 {
1577 	struct _header_scan_state *s;
1578 
1579 	s = g_malloc (sizeof (*s));
1580 
1581 	s->fd = -1;
1582 	s->stream = NULL;
1583 	s->input_stream = NULL;
1584 	s->ioerrno = 0;
1585 
1586 	s->outbuf = g_malloc (1024);
1587 	s->outptr = s->outbuf;
1588 	s->outend = s->outbuf + 1024;
1589 
1590 	s->realbuf = g_malloc0 (SCAN_BUF + SCAN_HEAD * 2);
1591 	s->inbuf = s->realbuf + SCAN_HEAD;
1592 	s->inptr = s->inbuf;
1593 	s->inend = s->inbuf;
1594 	s->atleast = 0;
1595 
1596 	s->seek = 0;		/* current character position in file of the last read block */
1597 	s->unstep = 0;
1598 
1599 	s->header_start = -1;
1600 
1601 	s->start_of_from = -1;
1602 	s->start_of_headers = -1;
1603 	s->start_of_boundary = -1;
1604 
1605 	s->midline = FALSE;
1606 	s->check_header_folded = FALSE;
1607 	s->scan_from = FALSE;
1608 	s->scan_pre_from = FALSE;
1609 	s->eof = FALSE;
1610 
1611 	s->filters = NULL;
1612 	s->filterid = 1;
1613 
1614 	s->parts = NULL;
1615 
1616 	s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1617 	return s;
1618 }
1619 
1620 static void
drop_states(struct _header_scan_state * s)1621 drop_states (struct _header_scan_state *s)
1622 {
1623 	while (s->parts) {
1624 		folder_scan_drop_step (s);
1625 	}
1626 	s->unstep = 0;
1627 	s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1628 }
1629 
1630 static void
folder_scan_reset(struct _header_scan_state * s)1631 folder_scan_reset (struct _header_scan_state *s)
1632 {
1633 	drop_states (s);
1634 	s->inend = s->inbuf;
1635 	s->inptr = s->inbuf;
1636 	s->inend[0] = '\n';
1637 	if (s->fd != -1) {
1638 		close (s->fd);
1639 		s->fd = -1;
1640 	}
1641 	g_clear_object (&s->stream);
1642 	g_clear_object (&s->input_stream);
1643 	s->ioerrno = 0;
1644 	s->eof = FALSE;
1645 }
1646 
1647 static gint
folder_scan_init_with_fd(struct _header_scan_state * s,gint fd)1648 folder_scan_init_with_fd (struct _header_scan_state *s,
1649 			  gint fd)
1650 {
1651 	folder_scan_reset (s);
1652 	s->fd = fd;
1653 
1654 	return 0;
1655 }
1656 
1657 static gint
folder_scan_init_with_stream(struct _header_scan_state * s,CamelStream * stream,GError ** error)1658 folder_scan_init_with_stream (struct _header_scan_state *s,
1659                               CamelStream *stream,
1660                               GError **error)
1661 {
1662 	folder_scan_reset (s);
1663 	s->stream = g_object_ref (stream);
1664 
1665 	return 0;
1666 }
1667 
1668 static gboolean
part_is_encoded(CamelHeaderRaw ** headers)1669 part_is_encoded (CamelHeaderRaw **headers)
1670 {
1671 	const gchar *encoding;
1672 
1673 	encoding = header_raw_find (headers, "Content-Transfer-Encoding", NULL);
1674 
1675 	if (!encoding || !*encoding)
1676 		return FALSE;
1677 
1678 	if (*encoding == ' ' || *encoding == '\t')
1679 		encoding++;
1680 
1681 	switch (camel_transfer_encoding_from_string (encoding)) {
1682 	case CAMEL_TRANSFER_ENCODING_BASE64:
1683 	case CAMEL_TRANSFER_ENCODING_QUOTEDPRINTABLE:
1684 	case CAMEL_TRANSFER_ENCODING_UUENCODE:
1685 		return TRUE;
1686 	default:
1687 		break;
1688 	}
1689 
1690 	return FALSE;
1691 }
1692 
1693 #define USE_FROM
1694 
1695 static void
folder_scan_step(struct _header_scan_state * s,gchar ** databuffer,gsize * datalength)1696 folder_scan_step (struct _header_scan_state *s,
1697                   gchar **databuffer,
1698                   gsize *datalength)
1699 {
1700 	struct _header_scan_stack *h, *hb;
1701 	const gchar *content;
1702 	const gchar *bound;
1703 	gint type, state, seenlast;
1704 	CamelContentType *ct = NULL;
1705 	struct _header_scan_filter *f;
1706 	gsize presize;
1707 	gulong boundary_len;
1708 
1709 /*	printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/
1710 
1711 tail_recurse:
1712 	d ({
1713 		printf ("\nSCAN STACK:\n");
1714 		printf (" '%s' :\n", states[s->state]);
1715 		hb = s->parts;
1716 		while (hb) {
1717 			printf ("  '%s' : %s ", states[hb->savestate], hb->boundary);
1718 			if (hb->content_type) {
1719 				printf ("(%s/%s)", hb->content_type->type, hb->content_type->subtype);
1720 			} else {
1721 				printf ("(default)");
1722 			}
1723 			printf ("\n");
1724 			hb = hb->parent;
1725 		}
1726 		printf ("\n");
1727 	});
1728 
1729 	switch (s->state) {
1730 
1731 #ifdef USE_FROM
1732 	case CAMEL_MIME_PARSER_STATE_INITIAL:
1733 		if (s->scan_from) {
1734 			h = g_malloc0 (sizeof (*h));
1735 			h->boundary = g_strdup ("From ");
1736 			h->boundarylen = strlen (h->boundary);
1737 			h->boundarylenfinal = h->boundarylen;
1738 			h->from_line = g_byte_array_new ();
1739 			folder_push_part (s, h);
1740 			s->state = CAMEL_MIME_PARSER_STATE_PRE_FROM;
1741 			goto scan_pre_from;
1742 		} else {
1743 			s->start_of_from = -1;
1744 			goto scan_header;
1745 		}
1746 
1747 	case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1748 
1749  scan_pre_from:
1750 		h = s->parts;
1751 		do {
1752 			hb = folder_scan_content (s, &state, databuffer, datalength);
1753 			if (s->scan_pre_from && *datalength > 0) {
1754 				d (printf ("got pre-from content %d bytes\n", *datalength));
1755 				return;
1756 			}
1757 		} while (hb == h && *datalength > 0);
1758 
1759 		if (*datalength == 0 && hb == h) {
1760 			d (printf ("found 'From '\n"));
1761 			s->start_of_from = folder_tell (s);
1762 			folder_scan_skip_line (s, h->from_line);
1763 			h->savestate = CAMEL_MIME_PARSER_STATE_INITIAL;
1764 			s->state = CAMEL_MIME_PARSER_STATE_FROM;
1765 		} else {
1766 			folder_pull_part (s);
1767 			s->state = CAMEL_MIME_PARSER_STATE_EOF;
1768 		}
1769 		return;
1770 #else
1771 	case CAMEL_MIME_PARSER_STATE_INITIAL:
1772 	case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1773 #endif /* USE_FROM */
1774 
1775 	scan_header:
1776 	case CAMEL_MIME_PARSER_STATE_FROM:
1777 		s->start_of_headers = folder_tell (s);
1778 		h = folder_scan_header (s, &state);
1779 #ifdef USE_FROM
1780 		if (s->scan_from)
1781 			h->savestate = CAMEL_MIME_PARSER_STATE_FROM_END;
1782 		else
1783 #endif
1784 			h->savestate = CAMEL_MIME_PARSER_STATE_EOF;
1785 
1786 		/* FIXME: should this check for MIME-Version: 1.0 as well? */
1787 
1788 		type = CAMEL_MIME_PARSER_STATE_HEADER;
1789 		if ((content = header_raw_find (&h->headers, "Content-Type", NULL))
1790 		     && (ct = camel_content_type_decode (content))) {
1791 			if (!g_ascii_strcasecmp (ct->type, "multipart")) {
1792 				if (!camel_content_type_is (ct, "multipart", "signed")
1793 				    && (bound = camel_content_type_param (ct, "boundary"))) {
1794 					d (printf ("multipart, boundary = %s\n", bound));
1795 					h->boundarylen = strlen (bound) + 2;
1796 					h->boundarylenfinal = h->boundarylen + 2;
1797 					boundary_len = h->boundarylen + 3;
1798 					h->boundary = g_malloc (boundary_len);
1799 					g_snprintf (h->boundary, boundary_len, "--%s--", bound);
1800 					type = CAMEL_MIME_PARSER_STATE_MULTIPART;
1801 				} else {
1802 					/*camel_content_type_unref(ct);
1803 					  ct = camel_content_type_decode ("text/plain");*/
1804 /* We can't quite do this, as it will mess up all the offsets ... */
1805 /*					camel_header_raw_replace(&h->headers, "Content-Type", "text/plain", offset); */
1806 					/*g_warning("Multipart with no boundary, treating as text/plain");*/
1807 				}
1808 			} else if (!g_ascii_strcasecmp (ct->type, "message")) {
1809 				if ((!g_ascii_strcasecmp (ct->subtype, "rfc2822") ||
1810 				    !g_ascii_strcasecmp (ct->subtype, "rfc822") ||
1811 				    !g_ascii_strcasecmp (ct->subtype, "global") ||
1812 				    !g_ascii_strcasecmp (ct->subtype, "news")) &&
1813 				    !part_is_encoded (&h->headers)) {
1814 					type = CAMEL_MIME_PARSER_STATE_MESSAGE;
1815 				}
1816 			}
1817 		} else {
1818 			/* make the default type for multipart/digest be message/rfc822 */
1819 			if (s->parts &&
1820 			    camel_content_type_is (s->parts->content_type, "multipart", "digest") &&
1821 			    !part_is_encoded (&h->headers)) {
1822 				ct = camel_content_type_decode ("message/rfc822");
1823 				type = CAMEL_MIME_PARSER_STATE_MESSAGE;
1824 				d (printf ("parent was multipart/digest, autoupgrading to message/rfc822?\n"));
1825 				/* maybe we should do this too?
1826 				 * header_raw_append_parse(&h->headers, "Content-Type: message/rfc822", -1);*/
1827 			} else {
1828 				ct = camel_content_type_decode ("text/plain");
1829 			}
1830 		}
1831 		h->content_type = ct;
1832 		folder_push_part (s, h);
1833 		s->state = type;
1834 		return;
1835 
1836 	case CAMEL_MIME_PARSER_STATE_HEADER:
1837 		s->state = CAMEL_MIME_PARSER_STATE_BODY;
1838 		/* coverity[fallthrough] */
1839 		/* falls through */
1840 
1841 	case CAMEL_MIME_PARSER_STATE_BODY:
1842 		h = s->parts;
1843 		*datalength = 0;
1844 		presize = SCAN_HEAD;
1845 		f = s->filters;
1846 
1847 		do {
1848 			hb = folder_scan_content (s, &state, databuffer, datalength);
1849 
1850 			d (printf ("\n\nOriginal content: '"));
1851 			d (fwrite (*databuffer, sizeof (gchar), *datalength, stdout));
1852 			d (printf ("'\n"));
1853 
1854 			if (*datalength > 0) {
1855 				while (f) {
1856 					camel_mime_filter_filter (
1857 						f->filter,
1858 						*databuffer, *datalength, presize,
1859 						databuffer, datalength, &presize);
1860 					d (fwrite (*databuffer, sizeof (gchar), *datalength, stdout));
1861 					d (printf ("'\n"));
1862 					f = f->next;
1863 				}
1864 				return;
1865 			}
1866 		} while (hb == h && *datalength > 0);
1867 
1868 		/* check for any filter completion data */
1869 		while (f) {
1870 			camel_mime_filter_complete (
1871 				f->filter, *databuffer, *datalength, presize,
1872 				databuffer, datalength, &presize);
1873 			f = f->next;
1874 		}
1875 
1876 		if (*datalength > 0)
1877 			return;
1878 
1879 		s->state = CAMEL_MIME_PARSER_STATE_BODY_END;
1880 		break;
1881 
1882 	case CAMEL_MIME_PARSER_STATE_MULTIPART:
1883 		h = s->parts;
1884 		/* This mess looks for the next boundary on this
1885 		 * level.  Once it finds the last one, it keeps going,
1886 		 * looking for post-multipart content ('postface').
1887 		 * Because messages might have duplicate boundaries for
1888 		 * different parts, it makes sure it stops if its already
1889 		 * found an end boundary for this part.  It handles
1890 		 * truncated and missing boundaries appropriately too. */
1891 		seenlast = FALSE;
1892 		do {
1893 			do {
1894 				hb = folder_scan_content (s, &state, databuffer, datalength);
1895 				if (*datalength > 0) {
1896 					/* instead of a new state, we'll just store it locally and provide
1897 					 * an accessor function */
1898 					d (printf (
1899 						"Multipart %s Content %p: '%.*s'\n",
1900 						h->prestage > 0 ? "post" : "pre",
1901 						h, *datalength, *databuffer));
1902 					if (h->prestage > 0) {
1903 						if (h->posttext == NULL)
1904 							h->posttext = g_byte_array_new ();
1905 						g_byte_array_append (h->posttext, (guint8 *) *databuffer, *datalength);
1906 					} else {
1907 						if (h->pretext == NULL)
1908 							h->pretext = g_byte_array_new ();
1909 						g_byte_array_append (h->pretext, (guint8 *) *databuffer, *datalength);
1910 					}
1911 				}
1912 			} while (hb == h && *datalength > 0);
1913 			h->prestage++;
1914 			if (*datalength == 0 && hb == h && !seenlast) {
1915 				d (printf ("got boundary: %s last=%d\n", hb->boundary, state));
1916 				s->start_of_boundary = folder_tell (s);
1917 				folder_scan_skip_line (s, NULL);
1918 				if (!state) {
1919 					s->state = CAMEL_MIME_PARSER_STATE_FROM;
1920 					folder_scan_step (s, databuffer, datalength);
1921 					s->parts->savestate = CAMEL_MIME_PARSER_STATE_MULTIPART; /* set return state for the new head part */
1922 					return;
1923 				} else
1924 					seenlast = TRUE;
1925 			} else {
1926 				break;
1927 			}
1928 		} while (1);
1929 
1930 		s->state = CAMEL_MIME_PARSER_STATE_MULTIPART_END;
1931 		break;
1932 
1933 	case CAMEL_MIME_PARSER_STATE_MESSAGE:
1934 		s->state = CAMEL_MIME_PARSER_STATE_FROM;
1935 		folder_scan_step (s, databuffer, datalength);
1936 		s->parts->savestate = CAMEL_MIME_PARSER_STATE_MESSAGE_END;
1937 		break;
1938 
1939 	case CAMEL_MIME_PARSER_STATE_FROM_END:
1940 	case CAMEL_MIME_PARSER_STATE_BODY_END:
1941 	case CAMEL_MIME_PARSER_STATE_MULTIPART_END:
1942 	case CAMEL_MIME_PARSER_STATE_MESSAGE_END:
1943 		s->state = s->parts->savestate;
1944 		folder_pull_part (s);
1945 		if (s->state & CAMEL_MIME_PARSER_STATE_END)
1946 			return;
1947 		goto tail_recurse;
1948 
1949 	case CAMEL_MIME_PARSER_STATE_EOF:
1950 		return;
1951 
1952 	default:
1953 		g_warning ("Invalid state in camel-mime-parser: %u", s->state);
1954 		break;
1955 	}
1956 
1957 	return;
1958 }
1959 
1960 /* drops the current state back one */
1961 static void
folder_scan_drop_step(struct _header_scan_state * s)1962 folder_scan_drop_step (struct _header_scan_state *s)
1963 {
1964 	switch (s->state) {
1965 	case CAMEL_MIME_PARSER_STATE_EOF:
1966 		s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1967 	case CAMEL_MIME_PARSER_STATE_INITIAL:
1968 		return;
1969 
1970 	case CAMEL_MIME_PARSER_STATE_FROM:
1971 	case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1972 		s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1973 		folder_pull_part (s);
1974 		return;
1975 
1976 	case CAMEL_MIME_PARSER_STATE_MESSAGE:
1977 	case CAMEL_MIME_PARSER_STATE_HEADER:
1978 	case CAMEL_MIME_PARSER_STATE_MULTIPART:
1979 
1980 	case CAMEL_MIME_PARSER_STATE_FROM_END:
1981 	case CAMEL_MIME_PARSER_STATE_BODY_END:
1982 	case CAMEL_MIME_PARSER_STATE_MULTIPART_END:
1983 	case CAMEL_MIME_PARSER_STATE_MESSAGE_END:
1984 
1985 		s->state = s->parts->savestate;
1986 		folder_pull_part (s);
1987 		if (s->state & CAMEL_MIME_PARSER_STATE_END) {
1988 			s->state &= ~CAMEL_MIME_PARSER_STATE_END;
1989 		}
1990 		return;
1991 	default:
1992 		/* FIXME: not sure if this is entirely right */
1993 		break;
1994 	}
1995 }
1996 
1997 static CamelHeaderRaw *
header_raw_find_node(CamelHeaderRaw ** list,const gchar * name)1998 header_raw_find_node (CamelHeaderRaw **list,
1999                       const gchar *name)
2000 {
2001 	CamelHeaderRaw *l;
2002 
2003 	l = *list;
2004 	while (l) {
2005 		if (!g_ascii_strcasecmp (l->name, name))
2006 			break;
2007 		l = l->next;
2008 	}
2009 	return l;
2010 }
2011 
2012 static const gchar *
header_raw_find(CamelHeaderRaw ** list,const gchar * name,gint * offset)2013 header_raw_find (CamelHeaderRaw **list,
2014 		 const gchar *name,
2015 		 gint *offset)
2016 {
2017 	CamelHeaderRaw *l;
2018 
2019 	l = header_raw_find_node (list, name);
2020 	if (l) {
2021 		if (offset)
2022 			*offset = l->offset;
2023 		return l->value;
2024 	} else
2025 		return NULL;
2026 }
2027 
2028 #ifndef MEMPOOL
2029 static void
header_raw_free(CamelHeaderRaw * l)2030 header_raw_free (CamelHeaderRaw *l)
2031 {
2032 	g_free (l->name);
2033 	g_free (l->value);
2034 	g_free (l);
2035 }
2036 
2037 static void
header_raw_clear(CamelHeaderRaw ** list)2038 header_raw_clear (CamelHeaderRaw **list)
2039 {
2040 	CamelHeaderRaw *l, *n;
2041 	l = *list;
2042 	while (l) {
2043 		n = l->next;
2044 		header_raw_free (l);
2045 		l = n;
2046 	}
2047 	*list = NULL;
2048 }
2049 #endif
2050 
2051 #ifdef STANDALONE
main(gint argc,gchar ** argv)2052 gint main (gint argc, gchar **argv)
2053 {
2054 	gint fd;
2055 	struct _header_scan_state *s;
2056 	gchar *data;
2057 	gsize len;
2058 	gint state;
2059 	gchar *name = "/tmp/evmail/Inbox";
2060 	struct _header_scan_stack *h;
2061 	gint i;
2062 	gint attach = 0;
2063 
2064 	if (argc == 2)
2065 		name = argv[1];
2066 
2067 	printf ("opening: %s", name);
2068 
2069 	for (i = 1; i < argc; i++) {
2070 		const gchar *encoding = NULL, *charset = NULL;
2071 		gchar *attachname;
2072 
2073 		name = argv[i];
2074 		printf ("opening: %s", name);
2075 
2076 		fd = g_open (name, O_RDONLY | O_BINARY, 0);
2077 		if (fd==-1) {
2078 			perror ("Cannot open mailbox");
2079 			exit (1);
2080 		}
2081 		s = folder_scan_init ();
2082 		folder_scan_init_with_fd (s, fd);
2083 		s->scan_from = FALSE;
2084 #if 0
2085 		h = g_malloc0 (sizeof (*h));
2086 		h->savestate = CAMEL_MIME_PARSER_STATE_EOF;
2087 		folder_push_part (s, h);
2088 #endif
2089 		while (s->state != CAMEL_MIME_PARSER_STATE_EOF) {
2090 			folder_scan_step (s, &data, &len);
2091 			printf ("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]);
2092 			switch (s->state) {
2093 			case CAMEL_MIME_PARSER_STATE_HEADER:
2094 				if (s->parts->content_type
2095 				    && (charset = camel_content_type_param (s->parts->content_type, "charset"))) {
2096 					if (g_ascii_strcasecmp (charset, "us-ascii")) {
2097 #if 0
2098 						folder_push_filter_charset (s, "UTF-8", charset);
2099 #endif
2100 					} else {
2101 						charset = NULL;
2102 					}
2103 				} else {
2104 					charset = NULL;
2105 				}
2106 
2107 				encoding = header_raw_find (&s->parts->headers, "Content-transfer-encoding", NULL);
2108 				printf ("encoding = '%s'\n", encoding);
2109 				if (encoding && !g_ascii_strncasecmp (encoding, " base64", 7)) {
2110 					printf ("adding base64 filter\n");
2111 					attachname = g_strdup_printf ("attach.%d.%d", i, attach++);
2112 #if 0
2113 					folder_push_filter_save (s, attachname);
2114 #endif
2115 					g_free (attachname);
2116 #if 0
2117 					folder_push_filter_mime (s, 0);
2118 #endif
2119 				}
2120 				if (encoding && !g_ascii_strncasecmp (encoding, " quoted-printable", 17)) {
2121 					printf ("adding quoted-printable filter\n");
2122 					attachname = g_strdup_printf ("attach.%d.%d", i, attach++);
2123 #if 0
2124 					folder_push_filter_save (s, attachname);
2125 #endif
2126 					g_free (attachname);
2127 #if 0
2128 					folder_push_filter_mime (s, 1);
2129 #endif
2130 				}
2131 
2132 				break;
2133 			case CAMEL_MIME_PARSER_STATE_BODY:
2134 				printf ("got body %d '%.*s'\n",  len, len, data);
2135 				break;
2136 			case CAMEL_MIME_PARSER_STATE_BODY_END:
2137 				printf ("end body %d '%.*s'\n",  len, len, data);
2138 				if (encoding && !g_ascii_strncasecmp (encoding, " base64", 7)) {
2139 					printf ("removing filters\n");
2140 #if 0
2141 					folder_filter_pull (s);
2142 					folder_filter_pull (s);
2143 #endif
2144 				}
2145 				if (encoding && !g_ascii_strncasecmp (encoding, " quoted-printable", 17)) {
2146 					printf ("removing filters\n");
2147 #if 0
2148 					folder_filter_pull (s);
2149 					folder_filter_pull (s);
2150 #endif
2151 				}
2152 				if (charset) {
2153 #if 0
2154 					folder_filter_pull (s);
2155 #endif
2156 					charset = NULL;
2157 				}
2158 				encoding = NULL;
2159 				break;
2160 			default:
2161 				break;
2162 			}
2163 		}
2164 		folder_scan_close (s);
2165 		close (fd);
2166 	}
2167 	return 0;
2168 }
2169 
2170 #endif /* STANDALONE */
2171 
2172