1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
4 *
5 * This library is free software: you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this library. If not, see <http://www.gnu.org/licenses/>.
16 *
17 * Authors: Michael Zucchi <notzed@ximian.com>
18 */
19
20 /* What should hopefully be a fast mail parser */
21
22 /* Do not change this code without asking me (Michael Zucchi) first
23 *
24 * There is almost always a reason something was done a certain way.
25 */
26
27 #include <errno.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32 #include <sys/types.h>
33
34 #include "camel-mempool.h"
35 #include "camel-mime-filter.h"
36 #include "camel-mime-parser.h"
37 #include "camel-mime-utils.h"
38 #include "camel-stream.h"
39
40 #define r(x)
41 #define h(x)
42 #define c(x)
43 #define d(x)
44
45 #define PRESERVE_HEADERS
46
47 /*#define PURIFY*/
48
49 #define MEMPOOL
50
51 #ifdef PURIFY
52 gint inend_id = -1,
53 inbuffer_id = -1;
54 #endif
55
56 #define SCAN_BUF 4096 /* size of read buffer */
57 #define SCAN_HEAD 128 /* headroom guaranteed to be before each read buffer */
58
59 /* a little hacky, but i couldn't be bothered renaming everything */
60 #define _header_scan_state _CamelMimeParserPrivate
61 #define _PRIVATE(obj) (((CamelMimeParser *)(obj))->priv)
62
63 /* a raw rfc822 header */
64 /* the value MUST be US-ASCII */
65 typedef struct _camel_header_raw {
66 struct _camel_header_raw *next;
67 gchar *name;
68 gchar *value;
69 gint offset; /* in file, if known */
70 } CamelHeaderRaw;
71
72 struct _header_scan_state {
73
74 /* global state */
75
76 CamelMimeParserState state;
77
78 /* for building headers during scanning */
79 gchar *outbuf;
80 gchar *outptr;
81 gchar *outend;
82
83 gint fd; /* input for a fd input */
84 CamelStream *stream; /* or for a stream */
85 GInputStream *input_stream;
86
87 gint ioerrno; /* io error state */
88
89 /* for scanning input buffers */
90 gchar *realbuf; /* the real buffer, SCAN_HEAD *2 + SCAN_BUF bytes */
91 gchar *inbuf; /* points to a subset of the allocated memory, the underflow */
92 gchar *inptr; /* (upto SCAN_HEAD) is for use by filters so they dont copy all data */
93 gchar *inend;
94
95 gint atleast;
96
97 goffset seek; /* current offset to start of buffer */
98 gint unstep; /* how many states to 'unstep' (repeat the current state) */
99
100 guint midline:1; /* are we mid-line interrupted? */
101 guint check_header_folded:1; /* check whether header is folded first? */
102 guint scan_from:1; /* do we care about From lines? */
103 guint scan_pre_from:1; /* do we return pre-from data? */
104 guint eof:1; /* reached eof? */
105
106 goffset start_of_from; /* where from started */
107 goffset start_of_boundary; /* where the last boundary started */
108 goffset start_of_headers; /* where headers started from the last scan */
109
110 goffset header_start; /* start of last header, or -1 */
111
112 /* filters to apply to all content before output */
113 gint filterid; /* id of next filter */
114 struct _header_scan_filter *filters;
115
116 /* per message/part info */
117 struct _header_scan_stack *parts;
118
119 };
120
121 struct _header_scan_stack {
122 struct _header_scan_stack *parent;
123
124 CamelMimeParserState savestate; /* state at invocation of this part */
125
126 #ifdef MEMPOOL
127 CamelMemPool *pool; /* memory pool to keep track of headers/etc at this level */
128 #endif
129 CamelHeaderRaw *headers; /* headers for this part */
130
131 CamelContentType *content_type;
132
133 /* I dont use GString's casue you can't efficiently append a buffer to them */
134 GByteArray *pretext; /* for multipart types, save the pre-boundary data here */
135 GByteArray *posttext; /* for multipart types, save the post-boundary data here */
136 gint prestage; /* used to determine if it is a pre-boundary or post-boundary data segment */
137
138 GByteArray *from_line; /* the from line */
139
140 gchar *boundary; /* for multipart/ * boundaries, including leading -- and trailing -- for the final part */
141 gint boundarylen; /* actual length of boundary, including leading -- if there is one */
142 gint boundarylenfinal; /* length of boundary, including trailing -- if there is one */
143 gint atleast; /* the biggest boundary from here to the parent */
144 };
145
146 struct _header_scan_filter {
147 struct _header_scan_filter *next;
148 gint id;
149 CamelMimeFilter *filter;
150 };
151
152 static void folder_scan_reset (struct _header_scan_state *s);
153 static void folder_scan_step (struct _header_scan_state *s, gchar **databuffer, gsize *datalength);
154 static void folder_scan_drop_step (struct _header_scan_state *s);
155 static gint folder_scan_init_with_fd (struct _header_scan_state *s, gint fd);
156 static gint folder_scan_init_with_stream (struct _header_scan_state *s, CamelStream *stream, GError **error);
157 static struct _header_scan_state *folder_scan_init (void);
158 static void folder_scan_close (struct _header_scan_state *s);
159 static struct _header_scan_stack *folder_scan_content (struct _header_scan_state *s, gint *lastone, gchar **data, gsize *length);
160 static struct _header_scan_stack *folder_scan_header (struct _header_scan_state *s, gint *lastone);
161 static gint folder_scan_skip_line (struct _header_scan_state *s, GByteArray *save);
162 static goffset folder_seek (struct _header_scan_state *s, goffset offset, gint whence);
163 static goffset folder_tell (struct _header_scan_state *s);
164 static gint folder_read (struct _header_scan_state *s);
165 static void folder_push_part (struct _header_scan_state *s, struct _header_scan_stack *h);
166
167 static const gchar * header_raw_find (CamelHeaderRaw **list, const gchar *name, gint *offset);
168
169 #ifdef MEMPOOL
170 static void header_append_mempool (struct _header_scan_state *s, struct _header_scan_stack *h, gchar *header, gint offset);
171 #else
172 static void header_raw_free (CamelHeaderRaw *l);
173 static void header_raw_clear (CamelHeaderRaw *l);
174 #endif
175
176 #if d(!)0
177 static gchar *states[] = {
178 "CAMEL_MIME_PARSER_STATE_INITIAL",
179 "CAMEL_MIME_PARSER_STATE_PRE_FROM", /* pre-from data */
180 "CAMEL_MIME_PARSER_STATE_FROM", /* got 'From' line */
181 "CAMEL_MIME_PARSER_STATE_HEADER", /* toplevel header */
182 "CAMEL_MIME_PARSER_STATE_BODY", /* scanning body of message */
183 "CAMEL_MIME_PARSER_STATE_MULTIPART", /* got multipart header */
184 "CAMEL_MIME_PARSER_STATE_MESSAGE", /* rfc822/news message */
185
186 "CAMEL_MIME_PARSER_STATE_PART", /* part of a multipart */
187
188 "CAMEL_MIME_PARSER_STATE_EOF", /* end of file */
189 "CAMEL_MIME_PARSER_STATE_PRE_FROM_END",
190 "CAMEL_MIME_PARSER_STATE_FROM_END",
191 "CAMEL_MIME_PARSER_STATE_HEAER_END",
192 "CAMEL_MIME_PARSER_STATE_BODY_END",
193 "CAMEL_MIME_PARSER_STATE_MULTIPART_END",
194 "CAMEL_MIME_PARSER_STATE_MESSAGE_END",
195 };
196 #endif
197
G_DEFINE_TYPE(CamelMimeParser,camel_mime_parser,G_TYPE_OBJECT)198 G_DEFINE_TYPE (CamelMimeParser, camel_mime_parser, G_TYPE_OBJECT)
199
200 static void
201 mime_parser_finalize (GObject *object)
202 {
203 struct _header_scan_state *s = _PRIVATE (object);
204
205 #ifdef PURIFY
206 purify_watch_remove_all ();
207 #endif
208
209 folder_scan_close (s);
210
211 /* Chain up to parent's finalize() method. */
212 G_OBJECT_CLASS (camel_mime_parser_parent_class)->finalize (object);
213 }
214
215 static void
camel_mime_parser_class_init(CamelMimeParserClass * class)216 camel_mime_parser_class_init (CamelMimeParserClass *class)
217 {
218 GObjectClass *object_class;
219
220 object_class = G_OBJECT_CLASS (class);
221 object_class->finalize = mime_parser_finalize;
222 }
223
224 static void
camel_mime_parser_init(CamelMimeParser * parser)225 camel_mime_parser_init (CamelMimeParser *parser)
226 {
227 parser->priv = folder_scan_init ();
228 }
229
230 /**
231 * camel_mime_parser_new:
232 *
233 * Create a new CamelMimeParser object.
234 *
235 * Returns: (transfer full): A new #CamelMimeParser object
236 **/
237 CamelMimeParser *
camel_mime_parser_new(void)238 camel_mime_parser_new (void)
239 {
240 return g_object_new (CAMEL_TYPE_MIME_PARSER, NULL);
241 }
242
243 /**
244 * camel_mime_parser_filter_add:
245 * @m: a #CamelMimeParser
246 * @mf: a #CamelMimeFilter
247 *
248 * Add a filter that will be applied to any body content before it is passed
249 * to the caller. Filters may be pipelined to perform multi-pass operations
250 * on the content, and are applied in the order they were added.
251 *
252 * Note that filters are only applied to the body content of messages, and once
253 * a filter has been set, all content returned by a camel_mime_parser_step()
254 * with a state of CAMEL_MIME_PARSER_STATE_BODY will have passed through the
255 * filter.
256 *
257 * Returns: An id that may be passed to camel_mime_parser_filter_remove() to
258 * remove the filter, or -1 if the operation failed.
259 *
260 * Since: 2.22
261 **/
262 gint
camel_mime_parser_filter_add(CamelMimeParser * m,CamelMimeFilter * mf)263 camel_mime_parser_filter_add (CamelMimeParser *m,
264 CamelMimeFilter *mf)
265 {
266 struct _header_scan_state *s = _PRIVATE (m);
267 struct _header_scan_filter *f, *new;
268
269 new = g_malloc (sizeof (*new));
270 new->filter = mf;
271 new->id = s->filterid++;
272 if (s->filterid == -1)
273 s->filterid++;
274 new->next = NULL;
275 g_object_ref (G_OBJECT (mf));
276
277 /* yes, this is correct, since 'next' is the first element of the struct */
278 f = (struct _header_scan_filter *) &s->filters;
279 while (f->next)
280 f = f->next;
281 f->next = new;
282 return new->id;
283 }
284
285 /**
286 * camel_mime_parser_filter_remove:
287 * @m: a #CamelMimeParser
288 * @id: id of the filter to remove, as returned from camel_mime_parser_filter_add()
289 *
290 * Remove a processing filter from the pipeline. There is no
291 * restriction on the order the filters can be removed.
292 *
293 * Since: 2.22
294 **/
295 void
camel_mime_parser_filter_remove(CamelMimeParser * m,gint id)296 camel_mime_parser_filter_remove (CamelMimeParser *m,
297 gint id)
298 {
299 struct _header_scan_state *s = _PRIVATE (m);
300 struct _header_scan_filter *f, *old;
301
302 f = (struct _header_scan_filter *) &s->filters;
303 while (f && f->next) {
304 old = f->next;
305 if (old->id == id) {
306 g_object_unref (old->filter);
307 f->next = old->next;
308 g_free (old);
309 /* there should only be a single matching id, but
310 * scan the whole lot anyway */
311 }
312 f = f->next;
313 }
314 }
315
316 /**
317 * camel_mime_parser_header:
318 * @m: a #CamelMimeParser
319 * @name: Name of header.
320 * @offset: Pointer that can receive the offset of the header in
321 * the stream from the start of parsing.
322 *
323 * Lookup a header by name.
324 *
325 * Returns: The header value, or NULL if the header is not
326 * defined.
327 **/
328 const gchar *
camel_mime_parser_header(CamelMimeParser * m,const gchar * name,gint * offset)329 camel_mime_parser_header (CamelMimeParser *m,
330 const gchar *name,
331 gint *offset)
332 {
333 struct _header_scan_state *s = _PRIVATE (m);
334
335 if (s->parts && s->parts->headers)
336 return header_raw_find (&s->parts->headers, name, offset);
337
338 return NULL;
339 }
340
341 /**
342 * camel_mime_parser_dup_headers:
343 * @m: a #CamelMimeParser
344 *
345 * Get the list of the raw headers which are defined for the
346 * current state of the parser. These headers are valid
347 * until the next call to camel_mime_parser_step(), or camel_mime_parser_drop_step().
348 *
349 * Returns: (transfer full): The headers, or %NULL, if there are no headers
350 * defined for the current part or state. Free it with camel_name_value_array_free().
351 *
352 * Since: 3.24
353 **/
354 CamelNameValueArray *
camel_mime_parser_dup_headers(CamelMimeParser * m)355 camel_mime_parser_dup_headers (CamelMimeParser *m)
356 {
357 struct _header_scan_state *s = _PRIVATE (m);
358
359 if (s->parts) {
360 CamelHeaderRaw *header = s->parts->headers;
361 CamelNameValueArray *header_copy = camel_name_value_array_new ();
362 while (header) {
363 camel_name_value_array_append (header_copy, header->name, header->value);
364 header = header->next;
365 }
366
367 return header_copy;
368 }
369 return NULL;
370 }
371
372 static const gchar *
byte_array_to_string(GByteArray * array)373 byte_array_to_string (GByteArray *array)
374 {
375 if (array == NULL)
376 return NULL;
377
378 if (array->len == 0 || array->data[array->len - 1] != '\0')
379 g_byte_array_append (array, (guint8 *) "", 1);
380
381 return (const gchar *) array->data;
382 }
383
384 /**
385 * camel_mime_parser_preface:
386 * @m: a #CamelMimeParser
387 *
388 * Retrieve the preface text for the current multipart.
389 * Can only be used when the state is CAMEL_MIME_PARSER_STATE_MULTIPART_END.
390 *
391 * Returns: The preface text, or NULL if there wasn't any.
392 *
393 * Since: 2.22
394 **/
395 const gchar *
camel_mime_parser_preface(CamelMimeParser * m)396 camel_mime_parser_preface (CamelMimeParser *m)
397 {
398 struct _header_scan_state *s = _PRIVATE (m);
399
400 if (s->parts)
401 return byte_array_to_string (s->parts->pretext);
402
403 return NULL;
404 }
405
406 /**
407 * camel_mime_parser_postface:
408 * @m: a #CamelMimeParser
409 *
410 * Retrieve the postface text for the current multipart.
411 * Only returns valid data when the current state if
412 * CAMEL_MIME_PARSER_STATE_MULTIPART_END.
413 *
414 * Returns: The postface text, or NULL if there wasn't any.
415 *
416 * Since: 2.22
417 **/
418 const gchar *
camel_mime_parser_postface(CamelMimeParser * m)419 camel_mime_parser_postface (CamelMimeParser *m)
420 {
421 struct _header_scan_state *s = _PRIVATE (m);
422
423 if (s->parts)
424 return byte_array_to_string (s->parts->posttext);
425
426 return NULL;
427 }
428
429 /**
430 * camel_mime_parser_from_line:
431 * @m: a #CamelMimeParser
432 *
433 * Get the last scanned "From " line, from a recently scanned from.
434 * This should only be called in the CAMEL_MIME_PARSER_STATE_FROM state. The
435 * from line will include the closing \n found (if there was one).
436 *
437 * The return value will remain valid while in the CAMEL_MIME_PARSER_STATE_FROM
438 * state, or any deeper state.
439 *
440 * Returns: The From line, or NULL if called out of context.
441 *
442 * Since: 2.22
443 **/
444 const gchar *
camel_mime_parser_from_line(CamelMimeParser * m)445 camel_mime_parser_from_line (CamelMimeParser *m)
446 {
447 struct _header_scan_state *s = _PRIVATE (m);
448
449 if (s->parts)
450 return byte_array_to_string (s->parts->from_line);
451
452 return NULL;
453 }
454
455 /**
456 * camel_mime_parser_init_with_fd:
457 * @m: a #CamelMimeParser
458 * @fd: A valid file descriptor.
459 *
460 * Initialise the scanner with an fd. The scanner's offsets
461 * will be relative to the current file position of the file
462 * descriptor. As a result, seekable descritors should
463 * be seeked using the parser seek functions.
464 *
465 * Returns: Returns -1 on error.
466 **/
467 gint
camel_mime_parser_init_with_fd(CamelMimeParser * m,gint fd)468 camel_mime_parser_init_with_fd (CamelMimeParser *m,
469 gint fd)
470 {
471 struct _header_scan_state *s = _PRIVATE (m);
472
473 return folder_scan_init_with_fd (s, fd);
474 }
475
476 /**
477 * camel_mime_parser_init_with_stream:
478 * @m: a #CamelMimeParser
479 * @stream: a #CamelStream to init with
480 * @error: return location for a #GError, or %NULL
481 *
482 * Initialise the scanner with a source stream. The scanner's
483 * offsets will be relative to the current file position of
484 * the stream. As a result, seekable streams should only
485 * be seeked using the parser seek function.
486 *
487 * Returns: -1 on error.
488 **/
489 gint
camel_mime_parser_init_with_stream(CamelMimeParser * parser,CamelStream * stream,GError ** error)490 camel_mime_parser_init_with_stream (CamelMimeParser *parser,
491 CamelStream *stream,
492 GError **error)
493 {
494 struct _header_scan_state *s = _PRIVATE (parser);
495
496 return folder_scan_init_with_stream (s, stream, error);
497 }
498
499 /**
500 * camel_mime_parser_init_with_input_stream:
501 * @parser: a #CamelMimeParser
502 * @input_stream: a #GInputStream
503 *
504 * Initialize the scanner with @input_stream. The scanner's offsets will
505 * be relative to the current file position of the stream. As a result,
506 * seekable streams should only be seeked using the parser seek function.
507 *
508 * Since: 3.12
509 **/
510 void
camel_mime_parser_init_with_input_stream(CamelMimeParser * parser,GInputStream * input_stream)511 camel_mime_parser_init_with_input_stream (CamelMimeParser *parser,
512 GInputStream *input_stream)
513 {
514 struct _header_scan_state *s = _PRIVATE (parser);
515
516 folder_scan_reset (s);
517 s->input_stream = g_object_ref (input_stream);
518 }
519
520 /**
521 * camel_mime_parser_init_with_bytes:
522 * @parser: a #CamelMimeParser
523 * @bytes: a #GBytes containing the message content
524 *
525 * Convenience function creates a #GMemoryInputStream from @bytes and hands
526 * it off to camel_mime_parser_init_with_input_stream().
527 *
528 * Since: 3.12
529 **/
530 void
camel_mime_parser_init_with_bytes(CamelMimeParser * parser,GBytes * bytes)531 camel_mime_parser_init_with_bytes (CamelMimeParser *parser,
532 GBytes *bytes)
533 {
534 GInputStream *input_stream;
535
536 g_return_if_fail (CAMEL_IS_MIME_PARSER (parser));
537 g_return_if_fail (bytes != NULL);
538
539 input_stream = g_memory_input_stream_new_from_bytes (bytes);
540 camel_mime_parser_init_with_input_stream (parser, input_stream);
541 g_object_unref (input_stream);
542 }
543
544 /**
545 * camel_mime_parser_scan_from:
546 * @parser: MIME parser object
547 * @scan_from: %TRUE if the scanner should scan From lines.
548 *
549 * Tell the scanner if it should scan "^From " lines or not.
550 *
551 * If the scanner is scanning from lines, two additional
552 * states CAMEL_MIME_PARSER_STATE_FROM and CAMEL_MIME_PARSER_STATE_FROM_END will be returned
553 * to the caller during parsing.
554 *
555 * This may also be preceeded by an optional
556 * CAMEL_MIME_PARSER_STATE_PRE_FROM state which contains the scanned data
557 * found before the From line is encountered. See also
558 * camel_mime_parser_scan_pre_from().
559 **/
560 void
camel_mime_parser_scan_from(CamelMimeParser * parser,gboolean scan_from)561 camel_mime_parser_scan_from (CamelMimeParser *parser,
562 gboolean scan_from)
563 {
564 struct _header_scan_state *s = _PRIVATE (parser);
565
566 s->scan_from = scan_from;
567 }
568
569 /**
570 * camel_mime_parser_scan_pre_from:
571 * @parser: MIME parser object
572 * @scan_pre_from: %TRUE if we want to get pre-from data.
573 *
574 * Tell the scanner whether we want to know abou the pre-from
575 * data during a scan. If we do, then we may get an additional
576 * state CAMEL_MIME_PARSER_STATE_PRE_FROM which returns the specified data.
577 **/
578 void
camel_mime_parser_scan_pre_from(CamelMimeParser * parser,gboolean scan_pre_from)579 camel_mime_parser_scan_pre_from (CamelMimeParser *parser,
580 gboolean scan_pre_from)
581 {
582 struct _header_scan_state *s = _PRIVATE (parser);
583
584 s->scan_pre_from = scan_pre_from;
585 }
586
587 /**
588 * camel_mime_parser_content_type:
589 * @parser: MIME parser object
590 *
591 * Get the content type defined in the current part.
592 *
593 * Returns: A content_type structure, or NULL if there
594 * is no content-type defined for this part of state of the
595 * parser.
596 **/
597 CamelContentType *
camel_mime_parser_content_type(CamelMimeParser * parser)598 camel_mime_parser_content_type (CamelMimeParser *parser)
599 {
600 struct _header_scan_state *s = _PRIVATE (parser);
601
602 /* FIXME: should this search up until it's found the 'right'
603 * content-type? can it? */
604 if (s->parts)
605 return s->parts->content_type;
606
607 return NULL;
608 }
609
610 /**
611 * camel_mime_parser_unstep:
612 * @parser: MIME parser object
613 *
614 * Cause the last step operation to repeat itself. If this is
615 * called repeated times, then the same step will be repeated
616 * that many times.
617 *
618 * Note that it is not possible to scan back using this function,
619 * only to have a way of peeking the next state.
620 **/
621 void
camel_mime_parser_unstep(CamelMimeParser * parser)622 camel_mime_parser_unstep (CamelMimeParser *parser)
623 {
624 struct _header_scan_state *s = _PRIVATE (parser);
625
626 s->unstep++;
627 }
628
629 /**
630 * camel_mime_parser_drop_step:
631 * @parser: MIME parser object
632 *
633 * Drop the last step call. This should only be used
634 * in conjunction with seeking of the stream as the
635 * stream may be in an undefined state relative to the
636 * state of the parser.
637 *
638 * Use this call with care.
639 **/
640 void
camel_mime_parser_drop_step(CamelMimeParser * parser)641 camel_mime_parser_drop_step (CamelMimeParser *parser)
642 {
643 struct _header_scan_state *s = _PRIVATE (parser);
644
645 s->unstep = 0;
646 folder_scan_drop_step (s);
647 }
648
649 /**
650 * camel_mime_parser_step:
651 * @parser: MIME parser object
652 * @databuffer: (inout) (array length=datalength) (optional) (element-type guint8): Pointer to
653 * accept a pointer to the data associated with this step (if any). May be %NULL,
654 * in which case datalength is also ingored.
655 * @datalength: (inout) (optional): Pointer to accept a pointer to the data
656 * length associated with this step (if any).
657 *
658 * Parse the next part of the MIME message. If camel_mime_parser_unstep()
659 * has been called, then continue to return the same state
660 * for that many calls.
661 *
662 * If the step is CAMEL_MIME_PARSER_STATE_BODY then the databuffer and datalength
663 * pointers will be setup to point to the internal data buffer
664 * of the scanner and may be processed as required. Any
665 * filters will have already been applied to this data.
666 *
667 * Refer to the state diagram elsewhere for a full listing of
668 * the states an application is gauranteed to get from the
669 * scanner.
670 *
671 * Returns: The current new state of the parser
672 * is returned.
673 **/
674 CamelMimeParserState
camel_mime_parser_step(CamelMimeParser * parser,gchar ** databuffer,gsize * datalength)675 camel_mime_parser_step (CamelMimeParser *parser,
676 gchar **databuffer,
677 gsize *datalength)
678 {
679 struct _header_scan_state *s = _PRIVATE (parser);
680
681 d (printf ("OLD STATE: '%s' :\n", states[s->state]));
682
683 if (s->unstep <= 0) {
684 gchar *dummy;
685 gsize dummylength;
686
687 if (databuffer == NULL) {
688 databuffer = &dummy;
689 datalength = &dummylength;
690 }
691
692 folder_scan_step (s, databuffer, datalength);
693 } else
694 s->unstep--;
695
696 d (printf ("NEW STATE: '%s' :\n", states[s->state]));
697
698 return s->state;
699 }
700
701 /**
702 * camel_mime_parser_read:
703 * @parser: MIME parser object
704 * @databuffer: (out) (array) (element-type guint8): The data buffer
705 * @len: The length of data to read
706 * @error: return location for a #GError, or %NULL
707 *
708 * Read at most @len bytes from the internal mime parser buffer.
709 *
710 * Returns the address of the internal buffer in @databuffer,
711 * and the length of useful data.
712 *
713 * @len may be specified as %G_MAXSSIZE, in which case you will
714 * get the full remainder of the buffer at each call.
715 *
716 * Note that no parsing of the data read through this function
717 * occurs, so no state changes occur, but the seek position
718 * is updated appropriately.
719 *
720 * Returns: The number of bytes available, or -1 on error.
721 **/
722 gssize
camel_mime_parser_read(CamelMimeParser * parser,const gchar ** databuffer,gssize len,GError ** error)723 camel_mime_parser_read (CamelMimeParser *parser,
724 const gchar **databuffer,
725 gssize len,
726 GError **error)
727 {
728 struct _header_scan_state *s = _PRIVATE (parser);
729 gintptr there;
730
731 if (len == 0)
732 return 0;
733
734 d (printf ("parser::read() reading %d bytes\n", len));
735
736 there = MIN (s->inend - s->inptr, len);
737 d (printf ("parser::read() there = %d bytes\n", there));
738 if (there > 0) {
739 *databuffer = s->inptr;
740 s->inptr += there;
741 return there;
742 }
743
744 if (folder_read (s) == -1) {
745 gint err = camel_mime_parser_errno (parser);
746
747 g_set_error (
748 error, G_IO_ERROR,
749 g_io_error_from_errno (err),
750 "%s", g_strerror (err));
751 return -1;
752 }
753
754 there = MIN (s->inend - s->inptr, len);
755 d (printf ("parser::read() had to re-read, now there = %d bytes\n", there));
756
757 *databuffer = s->inptr;
758 s->inptr += there;
759
760 return there;
761 }
762
763 /**
764 * camel_mime_parser_tell:
765 * @parser: MIME parser object
766 *
767 * Return the current scanning offset. The meaning of this
768 * value will depend on the current state of the parser.
769 *
770 * An incomplete listing of the states:
771 *
772 * CAMEL_MIME_PARSER_STATE_INITIAL, The start of the current message.
773 * CAMEL_MIME_PARSER_STATE_HEADER, CAMEL_MIME_PARSER_STATE_MESSAGE, CAMEL_MIME_PARSER_STATE_MULTIPART, the character
774 * position immediately after the end of the header.
775 * CAMEL_MIME_PARSER_STATE_BODY, Position within the message of the start
776 * of the current data block.
777 * CAMEL_MIME_PARSER_STATE_*_END, The position of the character starting
778 * the next section of the scan (the last position + 1 of
779 * the respective current state).
780 *
781 * Returns: See above.
782 *
783 * Since: 2.22
784 **/
785 goffset
camel_mime_parser_tell(CamelMimeParser * parser)786 camel_mime_parser_tell (CamelMimeParser *parser)
787 {
788 struct _header_scan_state *s = _PRIVATE (parser);
789
790 return folder_tell (s);
791 }
792
793 /**
794 * camel_mime_parser_tell_start_headers:
795 * @parser: MIME parser object
796 *
797 * Find out the position within the file of where the
798 * headers started, this is cached by the parser
799 * at the time.
800 *
801 * Returns: The header start position, or -1 if
802 * no headers were scanned in the current state.
803 *
804 * Since: 2.22
805 **/
806 goffset
camel_mime_parser_tell_start_headers(CamelMimeParser * parser)807 camel_mime_parser_tell_start_headers (CamelMimeParser *parser)
808 {
809 struct _header_scan_state *s = _PRIVATE (parser);
810
811 return s->start_of_headers;
812 }
813
814 /**
815 * camel_mime_parser_tell_start_from:
816 * @parser: MIME parser object
817 *
818 * If the parser is scanning From lines, then this returns
819 * the position of the start of the From line.
820 *
821 * Returns: The start of the from line, or -1 if there
822 * was no From line, or From lines are not being scanned.
823 *
824 * Since: 2.22
825 **/
826 goffset
camel_mime_parser_tell_start_from(CamelMimeParser * parser)827 camel_mime_parser_tell_start_from (CamelMimeParser *parser)
828 {
829 struct _header_scan_state *s = _PRIVATE (parser);
830
831 return s->start_of_from;
832 }
833
834 /**
835 * camel_mime_parser_tell_start_boundary:
836 * @parser: MIME parser object
837 *
838 * When parsing a multipart, this returns the start of the last
839 * boundary.
840 *
841 * Returns: The start of the boundary, or -1 if there
842 * was no boundary encountered yet.
843 *
844 * Since: 2.22
845 **/
846 goffset
camel_mime_parser_tell_start_boundary(CamelMimeParser * parser)847 camel_mime_parser_tell_start_boundary (CamelMimeParser *parser)
848 {
849 struct _header_scan_state *s = _PRIVATE (parser);
850
851 return s->start_of_boundary;
852 }
853
854 /**
855 * camel_mime_parser_seek:
856 * @parser: MIME parser object
857 * @offset: Number of bytes to offset the seek by.
858 * @whence: SEEK_SET, SEEK_CUR, SEEK_END
859 *
860 * Reset the source position to a known value.
861 *
862 * Note that if the source stream/descriptor was not
863 * positioned at 0 to begin with, and an absolute seek
864 * is specified (whence != SEEK_CUR), then the seek
865 * position may not match the desired seek position.
866 *
867 * Returns: The new seek offset, or -1 on
868 * an error (for example, trying to seek on a non-seekable
869 * stream or file descriptor).
870 *
871 * Since: 2.22
872 **/
873 goffset
camel_mime_parser_seek(CamelMimeParser * parser,goffset offset,gint whence)874 camel_mime_parser_seek (CamelMimeParser *parser,
875 goffset offset,
876 gint whence)
877 {
878 struct _header_scan_state *s = _PRIVATE (parser);
879
880 return folder_seek (s, offset, whence);
881 }
882
883 /**
884 * camel_mime_parser_state:
885 * @parser: MIME parser object
886 *
887 * Get the current parser state.
888 *
889 * Returns: The current parser state.
890 **/
891 CamelMimeParserState
camel_mime_parser_state(CamelMimeParser * parser)892 camel_mime_parser_state (CamelMimeParser *parser)
893 {
894 struct _header_scan_state *s = _PRIVATE (parser);
895
896 return s->state;
897 }
898
899 /**
900 * camel_mime_parser_push_state:
901 * @mp: MIME parser object
902 * @newstate: New state
903 * @boundary: Boundary marker for state.
904 *
905 * Pre-load a new parser state. Used to post-parse multipart content
906 * without headers.
907 **/
908 void
camel_mime_parser_push_state(CamelMimeParser * mp,CamelMimeParserState newstate,const gchar * boundary)909 camel_mime_parser_push_state (CamelMimeParser *mp,
910 CamelMimeParserState newstate,
911 const gchar *boundary)
912 {
913 struct _header_scan_stack *h;
914 struct _header_scan_state *s = _PRIVATE (mp);
915 gsize boundary_len;
916
917 h = g_malloc0 (sizeof (*h));
918 h->boundarylen = strlen (boundary) + 2;
919 h->boundarylenfinal = h->boundarylen + 2;
920 boundary_len = h->boundarylen + 3;
921 h->boundary = g_malloc (boundary_len);
922 g_snprintf (h->boundary, boundary_len, "--%s--", boundary);
923 folder_push_part (s, h);
924 s->state = newstate;
925 }
926
927 /**
928 * camel_mime_parser_stream:
929 * @parser: MIME parser object
930 *
931 * Get the stream, if any, the parser has been initialised
932 * with. May be used to setup sub-streams, but should not
933 * be read from directly (without saving and restoring
934 * the seek position in between).
935 *
936 * Returns: (transfer none) (nullable): The stream from camel_mime_parser_init_with_stream(),
937 * or NULL if the parser is reading from a file descriptor or is
938 * uninitialised.
939 **/
940 CamelStream *
camel_mime_parser_stream(CamelMimeParser * parser)941 camel_mime_parser_stream (CamelMimeParser *parser)
942 {
943 struct _header_scan_state *s = _PRIVATE (parser);
944
945 return s->stream;
946 }
947
948 /* Return errno of the parser, incase any error occurred during processing */
949 gint
camel_mime_parser_errno(CamelMimeParser * parser)950 camel_mime_parser_errno (CamelMimeParser *parser)
951 {
952 struct _header_scan_state *s = _PRIVATE (parser);
953
954 return s->ioerrno;
955 }
956
957 /* ********************************************************************** */
958 /* Implementation */
959 /* ********************************************************************** */
960
961 /* read the next bit of data, ensure there is enough room 'atleast' bytes */
962 static gint
folder_read(struct _header_scan_state * s)963 folder_read (struct _header_scan_state *s)
964 {
965 gint len;
966 gint inoffset;
967
968 if (s->inptr < s->inend - s->atleast || s->eof)
969 return s->inend - s->inptr;
970 #ifdef PURIFY
971 purify_watch_remove (inend_id);
972 purify_watch_remove (inbuffer_id);
973 #endif
974 /* check for any remaning bytes (under the atleast limit( */
975 inoffset = s->inend - s->inptr;
976 if (inoffset > 0) {
977 memmove (s->inbuf, s->inptr, inoffset);
978 }
979 if (s->stream) {
980 len = camel_stream_read (
981 s->stream, s->inbuf + inoffset, SCAN_BUF - inoffset, NULL, NULL);
982 } else if (s->input_stream != NULL) {
983 len = g_input_stream_read (
984 s->input_stream, s->inbuf + inoffset,
985 SCAN_BUF - inoffset, NULL, NULL);
986 } else {
987 len = read (s->fd, s->inbuf + inoffset, SCAN_BUF - inoffset);
988 }
989 r (printf ("read %d bytes, offset = %d\n", len, inoffset));
990 if (len >= 0) {
991 /* add on the last read block */
992 s->seek += s->inptr - s->inbuf;
993 s->inptr = s->inbuf;
994 s->inend = s->inbuf + len + inoffset;
995 s->eof = (len == 0);
996 r (printf ("content = %d '%.*s'\n",s->inend - s->inptr, s->inend - s->inptr, s->inptr));
997 } else {
998 s->ioerrno = errno ? errno : EIO;
999 }
1000
1001 g_return_val_if_fail (s->inptr <= s->inend, 0);
1002 #ifdef PURIFY
1003 inend_id = purify_watch (&s->inend);
1004 inbuffer_id = purify_watch_n (s->inend + 1, SCAN_HEAD - 1, "rw");
1005 #endif
1006 r (printf ("content = %d '%.*s'\n", s->inend - s->inptr, s->inend - s->inptr, s->inptr));
1007 /* set a sentinal, for the inner loops to check against */
1008 s->inend[0] = '\n';
1009 return s->inend - s->inptr;
1010 }
1011
1012 /* return the current absolute position of the data pointer */
1013 static goffset
folder_tell(struct _header_scan_state * s)1014 folder_tell (struct _header_scan_state *s)
1015 {
1016 return s->seek + (s->inptr - s->inbuf);
1017 }
1018
1019 /*
1020 * need some way to prime the parser state, so this actually works for
1021 * other than top-level messages
1022 */
1023 static goffset
folder_seek(struct _header_scan_state * s,goffset offset,gint whence)1024 folder_seek (struct _header_scan_state *s,
1025 goffset offset,
1026 gint whence)
1027 {
1028 goffset newoffset;
1029
1030 if (s->stream) {
1031 if (G_IS_SEEKABLE (s->stream)) {
1032 /* NOTE: assumes whence seekable stream == whence libc, which is probably
1033 * the case (or bloody well should've been) */
1034 g_seekable_seek (
1035 G_SEEKABLE (s->stream),
1036 offset, whence, NULL, NULL);
1037 newoffset = g_seekable_tell (G_SEEKABLE (s->stream));
1038 } else {
1039 newoffset = -1;
1040 errno = EINVAL;
1041 }
1042 } else if (s->input_stream != NULL) {
1043 if (G_IS_SEEKABLE (s->input_stream)) {
1044 /* NOTE: assumes whence seekable stream == whence libc, which is probably
1045 * the case (or bloody well should've been) */
1046 g_seekable_seek (
1047 G_SEEKABLE (s->input_stream),
1048 offset, whence, NULL, NULL);
1049 newoffset = g_seekable_tell (G_SEEKABLE (s->input_stream));
1050 } else {
1051 newoffset = -1;
1052 errno = EINVAL;
1053 }
1054 } else {
1055 newoffset = lseek (s->fd, offset, whence);
1056 }
1057 #ifdef PURIFY
1058 purify_watch_remove (inend_id);
1059 purify_watch_remove (inbuffer_id);
1060 #endif
1061 if (newoffset != -1) {
1062 s->seek = newoffset;
1063 s->inptr = s->inbuf;
1064 s->inend = s->inbuf;
1065 s->eof = FALSE;
1066 } else {
1067 s->ioerrno = errno ? errno : EIO;
1068 }
1069 #ifdef PURIFY
1070 inend_id = purify_watch (&s->inend);
1071 inbuffer_id = purify_watch_n (s->inend + 1, SCAN_HEAD - 1, "rw");
1072 #endif
1073 return newoffset;
1074 }
1075
1076 static void
folder_push_part(struct _header_scan_state * s,struct _header_scan_stack * h)1077 folder_push_part (struct _header_scan_state *s,
1078 struct _header_scan_stack *h)
1079 {
1080 if (s->parts && s->parts->atleast > h->boundarylenfinal)
1081 h->atleast = s->parts->atleast;
1082 else
1083 h->atleast = MAX (h->boundarylenfinal, 1);
1084
1085 h->parent = s->parts;
1086 s->parts = h;
1087 }
1088
1089 static void
folder_scan_stack_free(struct _header_scan_stack * h)1090 folder_scan_stack_free (struct _header_scan_stack *h)
1091 {
1092 if (h) {
1093 g_free (h->boundary);
1094 #ifdef MEMPOOL
1095 camel_mempool_destroy (h->pool);
1096 #else
1097 camel_header_raw_clear (&h->headers);
1098 #endif
1099 camel_content_type_unref (h->content_type);
1100 if (h->pretext)
1101 g_byte_array_free (h->pretext, TRUE);
1102 if (h->posttext)
1103 g_byte_array_free (h->posttext, TRUE);
1104 if (h->from_line)
1105 g_byte_array_free (h->from_line, TRUE);
1106 g_free (h);
1107 }
1108 }
1109
1110 static void
folder_pull_part(struct _header_scan_state * s)1111 folder_pull_part (struct _header_scan_state *s)
1112 {
1113 struct _header_scan_stack *h;
1114
1115 h = s->parts;
1116 if (h) {
1117 s->parts = h->parent;
1118
1119 folder_scan_stack_free (h);
1120 } else {
1121 g_warning ("Header stack underflow!\n");
1122 }
1123 }
1124
1125 static gint
folder_scan_skip_line(struct _header_scan_state * s,GByteArray * save)1126 folder_scan_skip_line (struct _header_scan_state *s,
1127 GByteArray *save)
1128 {
1129 gint atleast = s->atleast;
1130 register gchar *inptr, *inend, c;
1131 gint len;
1132
1133 s->atleast = 1;
1134
1135 d (printf ("skipping line\n"));
1136
1137 while ( (len = folder_read (s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */
1138 inptr = s->inptr;
1139 inend = s->inend;
1140
1141 c = -1;
1142 while (inptr < inend
1143 && (c = *inptr++) != '\n') {
1144 d (printf ("(%2x,%c)", c, isprint (c) ? c : '.'));
1145 ;
1146 }
1147
1148 if (save)
1149 g_byte_array_append (save, (guint8 *) s->inptr, inptr - s->inptr);
1150
1151 s->inptr = inptr;
1152
1153 if (c == '\n') {
1154 s->atleast = atleast;
1155 return 0;
1156 }
1157 }
1158
1159 d (printf ("couldn't find end of line?\n"));
1160
1161 s->atleast = atleast;
1162
1163 return -1; /* not found */
1164 }
1165
1166 /* TODO: Is there any way to make this run faster? It gets called a lot ... */
1167 static struct _header_scan_stack *
folder_boundary_check(struct _header_scan_state * s,const gchar * boundary,gint * lastone)1168 folder_boundary_check (struct _header_scan_state *s,
1169 const gchar *boundary,
1170 gint *lastone)
1171 {
1172 struct _header_scan_stack *part;
1173 gint len = s->inend - boundary; /* make sure we dont access past the buffer */
1174
1175 h (printf ("checking boundary marker upto %d bytes\n", len));
1176 part = s->parts;
1177 while (part) {
1178 h (printf (" boundary: %s\n", part->boundary));
1179 h (printf (" against: '%.*s'\n", part->boundarylen, boundary));
1180 if (part->boundary
1181 && part->boundarylen <= len
1182 && memcmp (boundary, part->boundary, part->boundarylen) == 0) {
1183 h (printf ("matched boundary: %s\n", part->boundary));
1184 /* again, make sure we're in range */
1185 if (part->boundarylenfinal <= len) {
1186 gint extra = part->boundarylenfinal - part->boundarylen;
1187
1188 /* check the extra stuff on a final boundary, normally -- for mime parts */
1189 if (extra > 0) {
1190 *lastone = memcmp(&boundary[part->boundarylen],
1191 &part->boundary[part->boundarylen],
1192 extra) == 0;
1193 } else {
1194 *lastone = TRUE;
1195 }
1196 h (printf ("checking lastone = %s\n", *lastone?"TRUE":"FALSE"));
1197 } else {
1198 h (printf ("not enough room to check last one?\n"));
1199 *lastone = FALSE;
1200 }
1201 /*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/
1202 return part;
1203 }
1204 part = part->parent;
1205 }
1206 return NULL;
1207 }
1208
1209 #ifdef MEMPOOL
1210 static void
header_append_mempool(struct _header_scan_state * s,struct _header_scan_stack * h,gchar * header,gint offset)1211 header_append_mempool (struct _header_scan_state *s,
1212 struct _header_scan_stack *h,
1213 gchar *header,
1214 gint offset)
1215 {
1216 CamelHeaderRaw *l, *n;
1217 gchar *content;
1218
1219 content = strchr (header, ':');
1220 if (content) {
1221 register gint len;
1222 n = camel_mempool_alloc (h->pool, sizeof (*n));
1223 n->next = NULL;
1224
1225 len = content - header;
1226 n->name = camel_mempool_alloc (h->pool, len + 1);
1227 memcpy (n->name, header, len);
1228 n->name[len] = 0;
1229
1230 content++;
1231
1232 len = s->outptr - content;
1233 n->value = camel_mempool_alloc (h->pool, len + 1);
1234 memcpy (n->value, content, len);
1235 n->value[len] = 0;
1236
1237 n->offset = offset;
1238
1239 l = (CamelHeaderRaw *) &h->headers;
1240 while (l->next) {
1241 l = l->next;
1242 }
1243 l->next = n;
1244 }
1245
1246 }
1247
1248 #define header_raw_append_parse(a, b, c) (header_append_mempool(s, h, b, c))
1249
1250 #endif
1251
1252 /* Copy the string start->inptr into the header buffer (s->outbuf),
1253 * grow if necessary
1254 * remove trailing \r chars (\n's assumed already removed)
1255 * and track the start offset of the header */
1256 /* Basically an optimised version of g_byte_array_append() */
1257 #define header_append(s, start, inptr) \
1258 { \
1259 register gintptr headerlen = inptr - start; \
1260 \
1261 if (headerlen > 0) { \
1262 if (headerlen >= (s->outend - s->outptr)) { \
1263 register gchar *outnew; \
1264 register gintptr olen = ((s->outend - s->outbuf) + headerlen) * 2 + 1; \
1265 outnew = g_realloc (s->outbuf, olen); \
1266 s->outptr = s->outptr - s->outbuf + outnew; \
1267 s->outbuf = outnew; \
1268 s->outend = outnew + olen; \
1269 } \
1270 if (start[headerlen - 1] == '\r') \
1271 headerlen--; \
1272 memcpy (s->outptr, start, headerlen); \
1273 s->outptr += headerlen; \
1274 } \
1275 if (s->header_start == -1) \
1276 s->header_start = (start - s->inbuf) + s->seek; \
1277 }
1278
1279 static struct _header_scan_stack *
folder_scan_header(struct _header_scan_state * s,gint * lastone)1280 folder_scan_header (struct _header_scan_state *s,
1281 gint *lastone)
1282 {
1283 gint atleast = s->atleast, newatleast;
1284 gchar *start = NULL;
1285 gint len;
1286 struct _header_scan_stack *h;
1287 gchar *inend;
1288 register gchar *inptr;
1289
1290 h (printf ("scanning first bit\n"));
1291
1292 h = g_malloc0 (sizeof (*h));
1293 #ifdef MEMPOOL
1294 h->pool = camel_mempool_new (8192, 4096, CAMEL_MEMPOOL_ALIGN_STRUCT);
1295 #endif
1296
1297 if (s->parts)
1298 newatleast = s->parts->atleast;
1299 else
1300 newatleast = 1;
1301 *lastone = FALSE;
1302
1303 do {
1304 s->atleast = newatleast;
1305
1306 h (printf ("atleast = %d\n", s->atleast));
1307
1308 while ((len = folder_read (s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
1309 inptr = s->inptr;
1310 inend = s->inend - s->atleast + 1;
1311
1312 while (inptr < inend) {
1313 start = inptr;
1314 if (!s->midline) {
1315 if (folder_boundary_check (s, inptr, lastone)) {
1316 if ((s->outptr > s->outbuf))
1317 goto header_truncated; /* may not actually be truncated */
1318
1319 goto header_done;
1320 }
1321 } else if (s->check_header_folded) {
1322 if (*inptr != ' ' && *inptr != '\t') {
1323 s->outptr[0] = 0;
1324
1325 /* The outbuf can contain an extra \r\n, thus remove it */
1326 if (s->outptr > s->outbuf && (s->outptr[-1] == '\r' || s->outptr[-1] == '\n'))
1327 s->outptr[-1] = 0;
1328
1329 if (s->outptr - 1 > s->outbuf && (s->outptr[-2] == '\r' || s->outptr[-2] == '\n'))
1330 s->outptr[-2] = 0;
1331
1332 if (s->outbuf == s->outptr || !*s->outbuf) {
1333 s->check_header_folded = FALSE;
1334 s->midline = FALSE;
1335 goto header_done;
1336 }
1337
1338 h (printf ("header not folded '%s' at %d\n", s->outbuf, (gint) s->header_start));
1339
1340 header_raw_append_parse (&h->headers, s->outbuf, s->header_start);
1341 s->outptr = s->outbuf;
1342 s->header_start = -1;
1343 }
1344 }
1345
1346 /* goto next line/sentinal */
1347 while ((*inptr++) != '\n')
1348 ;
1349
1350 if (inptr > s->inend + 1) {
1351 g_warn_if_fail (inptr <= s->inend + 1);
1352 folder_scan_stack_free (h);
1353 return NULL;
1354 }
1355
1356 /* check for sentinal or real end of line */
1357 if (inptr >= inend) {
1358 h (printf ("not at end of line yet, going further\n"));
1359 /* didn't find end of line within our allowed area */
1360 s->midline = TRUE;
1361 s->check_header_folded = inptr == inend;
1362 inptr = inend;
1363 header_append (s, start, inptr + (s->check_header_folded ? -1 : 0));
1364 } else {
1365 h (printf ("got line part: '%.*s'\n", (gint) (inptr - 1 - start), start));
1366 /* got a line, strip and add it, process it */
1367 s->midline = FALSE;
1368 s->check_header_folded = FALSE;
1369 header_append (s, start, inptr - 1);
1370
1371 /* check for end of headers */
1372 if (s->outbuf == s->outptr)
1373 goto header_done;
1374
1375 /* check for continuation/compress headers, we have atleast 1 gchar here to work with */
1376 if (inptr[0] == ' ' || inptr[0] == '\t') {
1377 h (printf ("continuation\n"));
1378
1379 #ifdef PRESERVE_HEADERS
1380 if (inptr - 1 >= start) {
1381 start = inptr - 1;
1382 header_append (s, start, inptr);
1383 }
1384 #endif
1385 #ifndef PRESERVE_HEADERS
1386 /* TODO: this wont catch multiple space continuation across a read boundary, but
1387 * that is assumed rare, and not fatal anyway */
1388 do
1389 inptr++;
1390 while (*inptr == ' ' || *inptr == '\t');
1391 inptr--;
1392 *inptr = ' ';
1393 #endif
1394 } else {
1395 /* otherwise, complete header, add it */
1396 s->outptr[0] = 0;
1397
1398 h (printf ("header '%s' at %d\n", s->outbuf, (gint) s->header_start));
1399
1400 header_raw_append_parse (&h->headers, s->outbuf, s->header_start);
1401 s->outptr = s->outbuf;
1402 s->header_start = -1;
1403 }
1404 }
1405 }
1406 s->inptr = inptr;
1407 }
1408 h (printf ("end of file? read %d bytes\n", len));
1409 newatleast = 1;
1410 } while (s->atleast > 1);
1411
1412 if ((s->outptr > s->outbuf) || s->inend > s->inptr) {
1413 start = s->inptr;
1414 inptr = s->inend;
1415 if (inptr > start) {
1416 if (inptr[-1] == '\n')
1417 inptr--;
1418 }
1419 goto header_truncated;
1420 }
1421
1422 s->atleast = atleast;
1423
1424 return h;
1425
1426 header_truncated:
1427 header_append (s, start, inptr);
1428
1429 s->outptr[0] = 0;
1430
1431 if (s->check_header_folded && s->midline) {
1432 /* The outbuf can contain an extra \r\n, thus remove it */
1433 if (s->outptr > s->outbuf && (s->outptr[-1] == '\r' || s->outptr[-1] == '\n'))
1434 s->outptr[-1] = 0;
1435
1436 if (s->outptr - 1 > s->outbuf && (s->outptr[-2] == '\r' || s->outptr[-2] == '\n'))
1437 s->outptr[-2] = 0;
1438 }
1439
1440 if (s->outbuf == s->outptr)
1441 goto header_done;
1442
1443 header_raw_append_parse (&h->headers, s->outbuf, s->header_start);
1444
1445 s->outptr = s->outbuf;
1446 header_done:
1447 s->inptr = inptr;
1448 s->atleast = atleast;
1449 s->header_start = -1;
1450 return h;
1451 }
1452
1453 static struct _header_scan_stack *
folder_scan_content(struct _header_scan_state * s,gint * lastone,gchar ** data,gsize * length)1454 folder_scan_content (struct _header_scan_state *s,
1455 gint *lastone,
1456 gchar **data,
1457 gsize *length)
1458 {
1459 gint atleast = s->atleast, newatleast;
1460 register gchar *inptr;
1461 gchar *inend;
1462 gchar *start;
1463 gint len;
1464 struct _header_scan_stack *part;
1465 gint onboundary = FALSE;
1466
1467 c (printf ("scanning content\n"));
1468
1469 part = s->parts;
1470 if (part)
1471 newatleast = part->atleast;
1472 else
1473 newatleast = 1;
1474 *lastone = FALSE;
1475
1476 c (printf ("atleast = %d\n", newatleast));
1477
1478 do {
1479 s->atleast = newatleast;
1480
1481 while ((len = folder_read (s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
1482 inptr = s->inptr;
1483 if (s->eof)
1484 inend = s->inend;
1485 else
1486 inend = s->inend - s->atleast + 1;
1487 start = inptr;
1488
1489 c (printf ("inptr = %p, inend = %p\n", inptr, inend));
1490
1491 while (inptr < inend) {
1492 if (!s->midline
1493 && (part = folder_boundary_check (s, inptr, lastone))) {
1494 onboundary = TRUE;
1495
1496 /* since we truncate the boundary data, we need at least 1 gchar here spare,
1497 * to remain in the same state */
1498 if ( (inptr - start) > 1)
1499 goto content;
1500
1501 /* otherwise, jump to the state of the boundary we actually found */
1502 goto normal_exit;
1503 }
1504
1505 /* goto the next line */
1506 while ((*inptr++) != '\n')
1507 ;
1508
1509 /* check the sentinal, if we went past the atleast limit, and reset it to there */
1510 if (inptr > inend) {
1511 s->midline = TRUE;
1512 inptr = inend;
1513 } else {
1514 s->midline = FALSE;
1515 }
1516 }
1517
1518 goto content;
1519 }
1520 newatleast = 1;
1521 } while (s->atleast > 1);
1522
1523 c (printf ("length read = %d\n", len));
1524
1525 if (s->inend > s->inptr) {
1526 start = s->inptr;
1527 inptr = s->inend;
1528 goto content;
1529 }
1530
1531 *length = 0;
1532 *data = s->inptr;
1533 s->atleast = atleast;
1534 return NULL;
1535
1536 content:
1537 /* treat eof as the last boundary in From mode */
1538 if (s->scan_from && s->eof && s->atleast <= 1) {
1539 onboundary = TRUE;
1540 part = NULL;
1541 } else {
1542 part = s->parts;
1543 }
1544 normal_exit:
1545 s->atleast = atleast;
1546 s->inptr = inptr;
1547
1548 *data = start;
1549 /* if we hit a boundary, we should not include the closing \n */
1550 if (onboundary && (inptr - start) > 0)
1551 *length = inptr-start-1;
1552 else
1553 *length = inptr-start;
1554
1555 /*printf("got %scontent: '%.*s'\n", s->midline?"partial ":"", inptr-start, start);*/
1556
1557 return part;
1558 }
1559
1560 static void
folder_scan_close(struct _header_scan_state * s)1561 folder_scan_close (struct _header_scan_state *s)
1562 {
1563 g_free (s->realbuf);
1564 g_free (s->outbuf);
1565 while (s->parts)
1566 folder_pull_part (s);
1567 if (s->fd != -1)
1568 close (s->fd);
1569 g_clear_object (&s->stream);
1570 g_clear_object (&s->input_stream);
1571 g_free (s);
1572 }
1573
1574 static struct _header_scan_state *
folder_scan_init(void)1575 folder_scan_init (void)
1576 {
1577 struct _header_scan_state *s;
1578
1579 s = g_malloc (sizeof (*s));
1580
1581 s->fd = -1;
1582 s->stream = NULL;
1583 s->input_stream = NULL;
1584 s->ioerrno = 0;
1585
1586 s->outbuf = g_malloc (1024);
1587 s->outptr = s->outbuf;
1588 s->outend = s->outbuf + 1024;
1589
1590 s->realbuf = g_malloc0 (SCAN_BUF + SCAN_HEAD * 2);
1591 s->inbuf = s->realbuf + SCAN_HEAD;
1592 s->inptr = s->inbuf;
1593 s->inend = s->inbuf;
1594 s->atleast = 0;
1595
1596 s->seek = 0; /* current character position in file of the last read block */
1597 s->unstep = 0;
1598
1599 s->header_start = -1;
1600
1601 s->start_of_from = -1;
1602 s->start_of_headers = -1;
1603 s->start_of_boundary = -1;
1604
1605 s->midline = FALSE;
1606 s->check_header_folded = FALSE;
1607 s->scan_from = FALSE;
1608 s->scan_pre_from = FALSE;
1609 s->eof = FALSE;
1610
1611 s->filters = NULL;
1612 s->filterid = 1;
1613
1614 s->parts = NULL;
1615
1616 s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1617 return s;
1618 }
1619
1620 static void
drop_states(struct _header_scan_state * s)1621 drop_states (struct _header_scan_state *s)
1622 {
1623 while (s->parts) {
1624 folder_scan_drop_step (s);
1625 }
1626 s->unstep = 0;
1627 s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1628 }
1629
1630 static void
folder_scan_reset(struct _header_scan_state * s)1631 folder_scan_reset (struct _header_scan_state *s)
1632 {
1633 drop_states (s);
1634 s->inend = s->inbuf;
1635 s->inptr = s->inbuf;
1636 s->inend[0] = '\n';
1637 if (s->fd != -1) {
1638 close (s->fd);
1639 s->fd = -1;
1640 }
1641 g_clear_object (&s->stream);
1642 g_clear_object (&s->input_stream);
1643 s->ioerrno = 0;
1644 s->eof = FALSE;
1645 }
1646
1647 static gint
folder_scan_init_with_fd(struct _header_scan_state * s,gint fd)1648 folder_scan_init_with_fd (struct _header_scan_state *s,
1649 gint fd)
1650 {
1651 folder_scan_reset (s);
1652 s->fd = fd;
1653
1654 return 0;
1655 }
1656
1657 static gint
folder_scan_init_with_stream(struct _header_scan_state * s,CamelStream * stream,GError ** error)1658 folder_scan_init_with_stream (struct _header_scan_state *s,
1659 CamelStream *stream,
1660 GError **error)
1661 {
1662 folder_scan_reset (s);
1663 s->stream = g_object_ref (stream);
1664
1665 return 0;
1666 }
1667
1668 static gboolean
part_is_encoded(CamelHeaderRaw ** headers)1669 part_is_encoded (CamelHeaderRaw **headers)
1670 {
1671 const gchar *encoding;
1672
1673 encoding = header_raw_find (headers, "Content-Transfer-Encoding", NULL);
1674
1675 if (!encoding || !*encoding)
1676 return FALSE;
1677
1678 if (*encoding == ' ' || *encoding == '\t')
1679 encoding++;
1680
1681 switch (camel_transfer_encoding_from_string (encoding)) {
1682 case CAMEL_TRANSFER_ENCODING_BASE64:
1683 case CAMEL_TRANSFER_ENCODING_QUOTEDPRINTABLE:
1684 case CAMEL_TRANSFER_ENCODING_UUENCODE:
1685 return TRUE;
1686 default:
1687 break;
1688 }
1689
1690 return FALSE;
1691 }
1692
1693 #define USE_FROM
1694
1695 static void
folder_scan_step(struct _header_scan_state * s,gchar ** databuffer,gsize * datalength)1696 folder_scan_step (struct _header_scan_state *s,
1697 gchar **databuffer,
1698 gsize *datalength)
1699 {
1700 struct _header_scan_stack *h, *hb;
1701 const gchar *content;
1702 const gchar *bound;
1703 gint type, state, seenlast;
1704 CamelContentType *ct = NULL;
1705 struct _header_scan_filter *f;
1706 gsize presize;
1707 gulong boundary_len;
1708
1709 /* printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/
1710
1711 tail_recurse:
1712 d ({
1713 printf ("\nSCAN STACK:\n");
1714 printf (" '%s' :\n", states[s->state]);
1715 hb = s->parts;
1716 while (hb) {
1717 printf (" '%s' : %s ", states[hb->savestate], hb->boundary);
1718 if (hb->content_type) {
1719 printf ("(%s/%s)", hb->content_type->type, hb->content_type->subtype);
1720 } else {
1721 printf ("(default)");
1722 }
1723 printf ("\n");
1724 hb = hb->parent;
1725 }
1726 printf ("\n");
1727 });
1728
1729 switch (s->state) {
1730
1731 #ifdef USE_FROM
1732 case CAMEL_MIME_PARSER_STATE_INITIAL:
1733 if (s->scan_from) {
1734 h = g_malloc0 (sizeof (*h));
1735 h->boundary = g_strdup ("From ");
1736 h->boundarylen = strlen (h->boundary);
1737 h->boundarylenfinal = h->boundarylen;
1738 h->from_line = g_byte_array_new ();
1739 folder_push_part (s, h);
1740 s->state = CAMEL_MIME_PARSER_STATE_PRE_FROM;
1741 goto scan_pre_from;
1742 } else {
1743 s->start_of_from = -1;
1744 goto scan_header;
1745 }
1746
1747 case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1748
1749 scan_pre_from:
1750 h = s->parts;
1751 do {
1752 hb = folder_scan_content (s, &state, databuffer, datalength);
1753 if (s->scan_pre_from && *datalength > 0) {
1754 d (printf ("got pre-from content %d bytes\n", *datalength));
1755 return;
1756 }
1757 } while (hb == h && *datalength > 0);
1758
1759 if (*datalength == 0 && hb == h) {
1760 d (printf ("found 'From '\n"));
1761 s->start_of_from = folder_tell (s);
1762 folder_scan_skip_line (s, h->from_line);
1763 h->savestate = CAMEL_MIME_PARSER_STATE_INITIAL;
1764 s->state = CAMEL_MIME_PARSER_STATE_FROM;
1765 } else {
1766 folder_pull_part (s);
1767 s->state = CAMEL_MIME_PARSER_STATE_EOF;
1768 }
1769 return;
1770 #else
1771 case CAMEL_MIME_PARSER_STATE_INITIAL:
1772 case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1773 #endif /* USE_FROM */
1774
1775 scan_header:
1776 case CAMEL_MIME_PARSER_STATE_FROM:
1777 s->start_of_headers = folder_tell (s);
1778 h = folder_scan_header (s, &state);
1779 #ifdef USE_FROM
1780 if (s->scan_from)
1781 h->savestate = CAMEL_MIME_PARSER_STATE_FROM_END;
1782 else
1783 #endif
1784 h->savestate = CAMEL_MIME_PARSER_STATE_EOF;
1785
1786 /* FIXME: should this check for MIME-Version: 1.0 as well? */
1787
1788 type = CAMEL_MIME_PARSER_STATE_HEADER;
1789 if ((content = header_raw_find (&h->headers, "Content-Type", NULL))
1790 && (ct = camel_content_type_decode (content))) {
1791 if (!g_ascii_strcasecmp (ct->type, "multipart")) {
1792 if (!camel_content_type_is (ct, "multipart", "signed")
1793 && (bound = camel_content_type_param (ct, "boundary"))) {
1794 d (printf ("multipart, boundary = %s\n", bound));
1795 h->boundarylen = strlen (bound) + 2;
1796 h->boundarylenfinal = h->boundarylen + 2;
1797 boundary_len = h->boundarylen + 3;
1798 h->boundary = g_malloc (boundary_len);
1799 g_snprintf (h->boundary, boundary_len, "--%s--", bound);
1800 type = CAMEL_MIME_PARSER_STATE_MULTIPART;
1801 } else {
1802 /*camel_content_type_unref(ct);
1803 ct = camel_content_type_decode ("text/plain");*/
1804 /* We can't quite do this, as it will mess up all the offsets ... */
1805 /* camel_header_raw_replace(&h->headers, "Content-Type", "text/plain", offset); */
1806 /*g_warning("Multipart with no boundary, treating as text/plain");*/
1807 }
1808 } else if (!g_ascii_strcasecmp (ct->type, "message")) {
1809 if ((!g_ascii_strcasecmp (ct->subtype, "rfc2822") ||
1810 !g_ascii_strcasecmp (ct->subtype, "rfc822") ||
1811 !g_ascii_strcasecmp (ct->subtype, "global") ||
1812 !g_ascii_strcasecmp (ct->subtype, "news")) &&
1813 !part_is_encoded (&h->headers)) {
1814 type = CAMEL_MIME_PARSER_STATE_MESSAGE;
1815 }
1816 }
1817 } else {
1818 /* make the default type for multipart/digest be message/rfc822 */
1819 if (s->parts &&
1820 camel_content_type_is (s->parts->content_type, "multipart", "digest") &&
1821 !part_is_encoded (&h->headers)) {
1822 ct = camel_content_type_decode ("message/rfc822");
1823 type = CAMEL_MIME_PARSER_STATE_MESSAGE;
1824 d (printf ("parent was multipart/digest, autoupgrading to message/rfc822?\n"));
1825 /* maybe we should do this too?
1826 * header_raw_append_parse(&h->headers, "Content-Type: message/rfc822", -1);*/
1827 } else {
1828 ct = camel_content_type_decode ("text/plain");
1829 }
1830 }
1831 h->content_type = ct;
1832 folder_push_part (s, h);
1833 s->state = type;
1834 return;
1835
1836 case CAMEL_MIME_PARSER_STATE_HEADER:
1837 s->state = CAMEL_MIME_PARSER_STATE_BODY;
1838 /* coverity[fallthrough] */
1839 /* falls through */
1840
1841 case CAMEL_MIME_PARSER_STATE_BODY:
1842 h = s->parts;
1843 *datalength = 0;
1844 presize = SCAN_HEAD;
1845 f = s->filters;
1846
1847 do {
1848 hb = folder_scan_content (s, &state, databuffer, datalength);
1849
1850 d (printf ("\n\nOriginal content: '"));
1851 d (fwrite (*databuffer, sizeof (gchar), *datalength, stdout));
1852 d (printf ("'\n"));
1853
1854 if (*datalength > 0) {
1855 while (f) {
1856 camel_mime_filter_filter (
1857 f->filter,
1858 *databuffer, *datalength, presize,
1859 databuffer, datalength, &presize);
1860 d (fwrite (*databuffer, sizeof (gchar), *datalength, stdout));
1861 d (printf ("'\n"));
1862 f = f->next;
1863 }
1864 return;
1865 }
1866 } while (hb == h && *datalength > 0);
1867
1868 /* check for any filter completion data */
1869 while (f) {
1870 camel_mime_filter_complete (
1871 f->filter, *databuffer, *datalength, presize,
1872 databuffer, datalength, &presize);
1873 f = f->next;
1874 }
1875
1876 if (*datalength > 0)
1877 return;
1878
1879 s->state = CAMEL_MIME_PARSER_STATE_BODY_END;
1880 break;
1881
1882 case CAMEL_MIME_PARSER_STATE_MULTIPART:
1883 h = s->parts;
1884 /* This mess looks for the next boundary on this
1885 * level. Once it finds the last one, it keeps going,
1886 * looking for post-multipart content ('postface').
1887 * Because messages might have duplicate boundaries for
1888 * different parts, it makes sure it stops if its already
1889 * found an end boundary for this part. It handles
1890 * truncated and missing boundaries appropriately too. */
1891 seenlast = FALSE;
1892 do {
1893 do {
1894 hb = folder_scan_content (s, &state, databuffer, datalength);
1895 if (*datalength > 0) {
1896 /* instead of a new state, we'll just store it locally and provide
1897 * an accessor function */
1898 d (printf (
1899 "Multipart %s Content %p: '%.*s'\n",
1900 h->prestage > 0 ? "post" : "pre",
1901 h, *datalength, *databuffer));
1902 if (h->prestage > 0) {
1903 if (h->posttext == NULL)
1904 h->posttext = g_byte_array_new ();
1905 g_byte_array_append (h->posttext, (guint8 *) *databuffer, *datalength);
1906 } else {
1907 if (h->pretext == NULL)
1908 h->pretext = g_byte_array_new ();
1909 g_byte_array_append (h->pretext, (guint8 *) *databuffer, *datalength);
1910 }
1911 }
1912 } while (hb == h && *datalength > 0);
1913 h->prestage++;
1914 if (*datalength == 0 && hb == h && !seenlast) {
1915 d (printf ("got boundary: %s last=%d\n", hb->boundary, state));
1916 s->start_of_boundary = folder_tell (s);
1917 folder_scan_skip_line (s, NULL);
1918 if (!state) {
1919 s->state = CAMEL_MIME_PARSER_STATE_FROM;
1920 folder_scan_step (s, databuffer, datalength);
1921 s->parts->savestate = CAMEL_MIME_PARSER_STATE_MULTIPART; /* set return state for the new head part */
1922 return;
1923 } else
1924 seenlast = TRUE;
1925 } else {
1926 break;
1927 }
1928 } while (1);
1929
1930 s->state = CAMEL_MIME_PARSER_STATE_MULTIPART_END;
1931 break;
1932
1933 case CAMEL_MIME_PARSER_STATE_MESSAGE:
1934 s->state = CAMEL_MIME_PARSER_STATE_FROM;
1935 folder_scan_step (s, databuffer, datalength);
1936 s->parts->savestate = CAMEL_MIME_PARSER_STATE_MESSAGE_END;
1937 break;
1938
1939 case CAMEL_MIME_PARSER_STATE_FROM_END:
1940 case CAMEL_MIME_PARSER_STATE_BODY_END:
1941 case CAMEL_MIME_PARSER_STATE_MULTIPART_END:
1942 case CAMEL_MIME_PARSER_STATE_MESSAGE_END:
1943 s->state = s->parts->savestate;
1944 folder_pull_part (s);
1945 if (s->state & CAMEL_MIME_PARSER_STATE_END)
1946 return;
1947 goto tail_recurse;
1948
1949 case CAMEL_MIME_PARSER_STATE_EOF:
1950 return;
1951
1952 default:
1953 g_warning ("Invalid state in camel-mime-parser: %u", s->state);
1954 break;
1955 }
1956
1957 return;
1958 }
1959
1960 /* drops the current state back one */
1961 static void
folder_scan_drop_step(struct _header_scan_state * s)1962 folder_scan_drop_step (struct _header_scan_state *s)
1963 {
1964 switch (s->state) {
1965 case CAMEL_MIME_PARSER_STATE_EOF:
1966 s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1967 case CAMEL_MIME_PARSER_STATE_INITIAL:
1968 return;
1969
1970 case CAMEL_MIME_PARSER_STATE_FROM:
1971 case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1972 s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1973 folder_pull_part (s);
1974 return;
1975
1976 case CAMEL_MIME_PARSER_STATE_MESSAGE:
1977 case CAMEL_MIME_PARSER_STATE_HEADER:
1978 case CAMEL_MIME_PARSER_STATE_MULTIPART:
1979
1980 case CAMEL_MIME_PARSER_STATE_FROM_END:
1981 case CAMEL_MIME_PARSER_STATE_BODY_END:
1982 case CAMEL_MIME_PARSER_STATE_MULTIPART_END:
1983 case CAMEL_MIME_PARSER_STATE_MESSAGE_END:
1984
1985 s->state = s->parts->savestate;
1986 folder_pull_part (s);
1987 if (s->state & CAMEL_MIME_PARSER_STATE_END) {
1988 s->state &= ~CAMEL_MIME_PARSER_STATE_END;
1989 }
1990 return;
1991 default:
1992 /* FIXME: not sure if this is entirely right */
1993 break;
1994 }
1995 }
1996
1997 static CamelHeaderRaw *
header_raw_find_node(CamelHeaderRaw ** list,const gchar * name)1998 header_raw_find_node (CamelHeaderRaw **list,
1999 const gchar *name)
2000 {
2001 CamelHeaderRaw *l;
2002
2003 l = *list;
2004 while (l) {
2005 if (!g_ascii_strcasecmp (l->name, name))
2006 break;
2007 l = l->next;
2008 }
2009 return l;
2010 }
2011
2012 static const gchar *
header_raw_find(CamelHeaderRaw ** list,const gchar * name,gint * offset)2013 header_raw_find (CamelHeaderRaw **list,
2014 const gchar *name,
2015 gint *offset)
2016 {
2017 CamelHeaderRaw *l;
2018
2019 l = header_raw_find_node (list, name);
2020 if (l) {
2021 if (offset)
2022 *offset = l->offset;
2023 return l->value;
2024 } else
2025 return NULL;
2026 }
2027
2028 #ifndef MEMPOOL
2029 static void
header_raw_free(CamelHeaderRaw * l)2030 header_raw_free (CamelHeaderRaw *l)
2031 {
2032 g_free (l->name);
2033 g_free (l->value);
2034 g_free (l);
2035 }
2036
2037 static void
header_raw_clear(CamelHeaderRaw ** list)2038 header_raw_clear (CamelHeaderRaw **list)
2039 {
2040 CamelHeaderRaw *l, *n;
2041 l = *list;
2042 while (l) {
2043 n = l->next;
2044 header_raw_free (l);
2045 l = n;
2046 }
2047 *list = NULL;
2048 }
2049 #endif
2050
2051 #ifdef STANDALONE
main(gint argc,gchar ** argv)2052 gint main (gint argc, gchar **argv)
2053 {
2054 gint fd;
2055 struct _header_scan_state *s;
2056 gchar *data;
2057 gsize len;
2058 gint state;
2059 gchar *name = "/tmp/evmail/Inbox";
2060 struct _header_scan_stack *h;
2061 gint i;
2062 gint attach = 0;
2063
2064 if (argc == 2)
2065 name = argv[1];
2066
2067 printf ("opening: %s", name);
2068
2069 for (i = 1; i < argc; i++) {
2070 const gchar *encoding = NULL, *charset = NULL;
2071 gchar *attachname;
2072
2073 name = argv[i];
2074 printf ("opening: %s", name);
2075
2076 fd = g_open (name, O_RDONLY | O_BINARY, 0);
2077 if (fd==-1) {
2078 perror ("Cannot open mailbox");
2079 exit (1);
2080 }
2081 s = folder_scan_init ();
2082 folder_scan_init_with_fd (s, fd);
2083 s->scan_from = FALSE;
2084 #if 0
2085 h = g_malloc0 (sizeof (*h));
2086 h->savestate = CAMEL_MIME_PARSER_STATE_EOF;
2087 folder_push_part (s, h);
2088 #endif
2089 while (s->state != CAMEL_MIME_PARSER_STATE_EOF) {
2090 folder_scan_step (s, &data, &len);
2091 printf ("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]);
2092 switch (s->state) {
2093 case CAMEL_MIME_PARSER_STATE_HEADER:
2094 if (s->parts->content_type
2095 && (charset = camel_content_type_param (s->parts->content_type, "charset"))) {
2096 if (g_ascii_strcasecmp (charset, "us-ascii")) {
2097 #if 0
2098 folder_push_filter_charset (s, "UTF-8", charset);
2099 #endif
2100 } else {
2101 charset = NULL;
2102 }
2103 } else {
2104 charset = NULL;
2105 }
2106
2107 encoding = header_raw_find (&s->parts->headers, "Content-transfer-encoding", NULL);
2108 printf ("encoding = '%s'\n", encoding);
2109 if (encoding && !g_ascii_strncasecmp (encoding, " base64", 7)) {
2110 printf ("adding base64 filter\n");
2111 attachname = g_strdup_printf ("attach.%d.%d", i, attach++);
2112 #if 0
2113 folder_push_filter_save (s, attachname);
2114 #endif
2115 g_free (attachname);
2116 #if 0
2117 folder_push_filter_mime (s, 0);
2118 #endif
2119 }
2120 if (encoding && !g_ascii_strncasecmp (encoding, " quoted-printable", 17)) {
2121 printf ("adding quoted-printable filter\n");
2122 attachname = g_strdup_printf ("attach.%d.%d", i, attach++);
2123 #if 0
2124 folder_push_filter_save (s, attachname);
2125 #endif
2126 g_free (attachname);
2127 #if 0
2128 folder_push_filter_mime (s, 1);
2129 #endif
2130 }
2131
2132 break;
2133 case CAMEL_MIME_PARSER_STATE_BODY:
2134 printf ("got body %d '%.*s'\n", len, len, data);
2135 break;
2136 case CAMEL_MIME_PARSER_STATE_BODY_END:
2137 printf ("end body %d '%.*s'\n", len, len, data);
2138 if (encoding && !g_ascii_strncasecmp (encoding, " base64", 7)) {
2139 printf ("removing filters\n");
2140 #if 0
2141 folder_filter_pull (s);
2142 folder_filter_pull (s);
2143 #endif
2144 }
2145 if (encoding && !g_ascii_strncasecmp (encoding, " quoted-printable", 17)) {
2146 printf ("removing filters\n");
2147 #if 0
2148 folder_filter_pull (s);
2149 folder_filter_pull (s);
2150 #endif
2151 }
2152 if (charset) {
2153 #if 0
2154 folder_filter_pull (s);
2155 #endif
2156 charset = NULL;
2157 }
2158 encoding = NULL;
2159 break;
2160 default:
2161 break;
2162 }
2163 }
2164 folder_scan_close (s);
2165 close (fd);
2166 }
2167 return 0;
2168 }
2169
2170 #endif /* STANDALONE */
2171
2172