1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*  GMime
3  *  Copyright (C) 2000-2009 Jeffrey Stedfast
4  *
5  *  This library is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public License
7  *  as published by the Free Software Foundation; either version 2.1
8  *  of the License, or (at your option) any later version.
9  *
10  *  This library is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *  Lesser General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public
16  *  License along with this library; if not, write to the Free
17  *  Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
18  *  02110-1301, USA.
19  */
20 
21 
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "gmime-common.h"
31 #include "gmime-filter-enriched.h"
32 
33 /* text/enriched is rfc1896 */
34 
35 
36 /**
37  * SECTION: gmime-filter-enriched
38  * @title: GMimeFilterEnriched
39  * @short_description: Convert text/enriched or text/rtf to HTML
40  * @see_also: #GMimeFilter
41  *
42  * A #GMimeFilter used for converting text/enriched or text/rtf to HTML.
43  **/
44 
45 
46 typedef char * (*EnrichedParamParser) (const char *inptr, size_t inlen);
47 
48 static char *param_parse_colour (const char *inptr, size_t inlen);
49 static char *param_parse_font (const char *inptr, size_t inlen);
50 static char *param_parse_lang (const char *inptr, size_t inlen);
51 
52 static struct {
53 	char *enriched;
54 	char *html;
55 	gboolean needs_param;
56 	EnrichedParamParser parse_param; /* parses *and* validates the input */
57 } enriched_tags[] = {
58 	{ "bold",        "<b>",                 FALSE, NULL               },
59 	{ "/bold",       "</b>",                FALSE, NULL               },
60 	{ "italic",      "<i>",                 FALSE, NULL               },
61 	{ "/italic",     "</i>",                FALSE, NULL               },
62 	{ "fixed",       "<tt>",                FALSE, NULL               },
63 	{ "/fixed",      "</tt>",               FALSE, NULL               },
64 	{ "smaller",     "<font size=-1>",      FALSE, NULL               },
65 	{ "/smaller",    "</font>",             FALSE, NULL               },
66 	{ "bigger",      "<font size=+1>",      FALSE, NULL               },
67 	{ "/bigger",     "</font>",             FALSE, NULL               },
68 	{ "underline",   "<u>",                 FALSE, NULL               },
69 	{ "/underline",  "</u>",                FALSE, NULL               },
70 	{ "center",      "<p align=center>",    FALSE, NULL               },
71 	{ "/center",     "</p>",                FALSE, NULL               },
72 	{ "flushleft",   "<p align=left>",      FALSE, NULL               },
73 	{ "/flushleft",  "</p>",                FALSE, NULL               },
74 	{ "flushright",  "<p align=right>",     FALSE, NULL               },
75 	{ "/flushright", "</p>",                FALSE, NULL               },
76 	{ "excerpt",     "<blockquote>",        FALSE, NULL               },
77 	{ "/excerpt",    "</blockquote>",       FALSE, NULL               },
78 	{ "paragraph",   "<p>",                 FALSE, NULL               },
79 	{ "signature",   "<address>",           FALSE, NULL               },
80 	{ "/signature",  "</address>",          FALSE, NULL               },
81 	{ "comment",     "<!-- ",               FALSE, NULL               },
82 	{ "/comment",    " -->",                FALSE, NULL               },
83 	{ "np",          "<hr>",                FALSE, NULL               },
84 	{ "fontfamily",  "<font face=\"%s\">",  TRUE,  param_parse_font   },
85 	{ "/fontfamily", "</font>",             FALSE, NULL               },
86 	{ "color",       "<font color=\"%s\">", TRUE,  param_parse_colour },
87 	{ "/color",      "</font>",             FALSE, NULL               },
88 	{ "lang",        "<span lang=\"%s\">",  TRUE,  param_parse_lang   },
89 	{ "/lang",       "</span>",             FALSE, NULL               },
90 
91 	/* don't handle this tag yet... */
92 	{ "paraindent",  "<!-- ",               /* TRUE */ FALSE, NULL    },
93 	{ "/paraindent", " -->",                FALSE, NULL               },
94 
95 	/* as soon as we support all the tags that can have a param
96 	 * tag argument, these should be unnecessary, but we'll keep
97 	 * them anyway just in case? */
98 	{ "param",       "<!-- ",               FALSE, NULL               },
99 	{ "/param",      " -->",                FALSE, NULL               },
100 };
101 
102 #define NUM_ENRICHED_TAGS (sizeof (enriched_tags) / sizeof (enriched_tags[0]))
103 
104 static GHashTable *enriched_hash = NULL;
105 
106 
107 static void g_mime_filter_enriched_class_init (GMimeFilterEnrichedClass *klass);
108 static void g_mime_filter_enriched_init       (GMimeFilterEnriched *filter, GMimeFilterEnrichedClass *klass);
109 static void g_mime_filter_enriched_finalize   (GObject *object);
110 
111 static GMimeFilter *filter_copy (GMimeFilter *filter);
112 static void filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
113 			   char **out, size_t *outlen, size_t *outprespace);
114 static void filter_complete (GMimeFilter *filter, char *in, size_t len, size_t prespace,
115 			     char **out, size_t *outlen, size_t *outprespace);
116 static void filter_reset (GMimeFilter *filter);
117 
118 
119 static GMimeFilterClass *parent_class = NULL;
120 
121 
122 GType
g_mime_filter_enriched_get_type(void)123 g_mime_filter_enriched_get_type (void)
124 {
125 	static GType type = 0;
126 
127 	if (!type) {
128 		static const GTypeInfo info = {
129 			sizeof (GMimeFilterEnrichedClass),
130 			NULL, /* base_class_init */
131 			NULL, /* base_class_finalize */
132 			(GClassInitFunc) g_mime_filter_enriched_class_init,
133 			NULL, /* class_finalize */
134 			NULL, /* class_data */
135 			sizeof (GMimeFilterEnriched),
136 			0,    /* n_preallocs */
137 			(GInstanceInitFunc) g_mime_filter_enriched_init,
138 		};
139 
140 		type = g_type_register_static (GMIME_TYPE_FILTER, "GMimeFilterEnriched", &info, 0);
141 	}
142 
143 	return type;
144 }
145 
146 static void
g_mime_filter_enriched_class_init(GMimeFilterEnrichedClass * klass)147 g_mime_filter_enriched_class_init (GMimeFilterEnrichedClass *klass)
148 {
149 	GObjectClass *object_class = G_OBJECT_CLASS (klass);
150 	GMimeFilterClass *filter_class = GMIME_FILTER_CLASS (klass);
151 	guint i;
152 
153 	parent_class = g_type_class_ref (GMIME_TYPE_FILTER);
154 
155 	object_class->finalize = g_mime_filter_enriched_finalize;
156 
157 	filter_class->copy = filter_copy;
158 	filter_class->reset = filter_reset;
159 	filter_class->filter = filter_filter;
160 	filter_class->complete = filter_complete;
161 
162 	if (!enriched_hash) {
163 		enriched_hash = g_hash_table_new (g_mime_strcase_hash, g_mime_strcase_equal);
164 		for (i = 0; i < NUM_ENRICHED_TAGS; i++)
165 			g_hash_table_insert (enriched_hash, enriched_tags[i].enriched,
166 					     enriched_tags[i].html);
167 	}
168 }
169 
170 static void
g_mime_filter_enriched_init(GMimeFilterEnriched * filter,GMimeFilterEnrichedClass * klass)171 g_mime_filter_enriched_init (GMimeFilterEnriched *filter, GMimeFilterEnrichedClass *klass)
172 {
173 	filter->flags = 0;
174 	filter->nofill = 0;
175 }
176 
177 static void
g_mime_filter_enriched_finalize(GObject * object)178 g_mime_filter_enriched_finalize (GObject *object)
179 {
180 	G_OBJECT_CLASS (parent_class)->finalize (object);
181 }
182 
183 
184 static GMimeFilter *
filter_copy(GMimeFilter * filter)185 filter_copy (GMimeFilter *filter)
186 {
187 	GMimeFilterEnriched *enriched = (GMimeFilterEnriched *) filter;
188 
189 	return g_mime_filter_enriched_new (enriched->flags);
190 }
191 
192 #if 0
193 static gboolean
194 enriched_tag_needs_param (const char *tag)
195 {
196 	int i;
197 
198 	for (i = 0; i < NUM_ENRICHED_TAGS; i++)
199 		if (!g_ascii_strcasecmp (tag, enriched_tags[i].enriched))
200 			return enriched_tags[i].needs_param;
201 
202 	return FALSE;
203 }
204 #endif
205 
206 static gboolean
html_tag_needs_param(const char * tag)207 html_tag_needs_param (const char *tag)
208 {
209 	return strstr (tag, "%s") != NULL;
210 }
211 
212 static const char *valid_colours[] = {
213 	"red", "green", "blue", "yellow", "cyan", "magenta", "black", "white"
214 };
215 
216 #define NUM_VALID_COLOURS  (sizeof (valid_colours) / sizeof (valid_colours[0]))
217 
218 static char *
param_parse_colour(const char * inptr,size_t inlen)219 param_parse_colour (const char *inptr, size_t inlen)
220 {
221 	const char *inend, *end;
222 	guint32 rgb = 0;
223 	guint v, i;
224 
225 	for (i = 0; i < NUM_VALID_COLOURS; i++) {
226 		if (!g_ascii_strncasecmp (inptr, valid_colours[i], inlen))
227 			return g_strdup (valid_colours[i]);
228 	}
229 
230 	/* check for numeric r/g/b in the format: ####,####,#### */
231 	if (inptr[4] != ',' || inptr[9] != ',') {
232 		/* okay, mailer must have used a string name that
233 		 * rfc1896 did not specify? do some simple scanning
234 		 * action, a colour name MUST be [a-zA-Z] */
235 		end = inptr;
236 		inend = inptr + inlen;
237 		while (end < inend && ((*end >= 'a' && *end <= 'z') || (*end >= 'A' && *end <= 'Z')))
238 			end++;
239 
240 		return g_strndup (inptr, end - inptr);
241 	}
242 
243 	for (i = 0; i < 3; i++) {
244 		v = strtoul (inptr, (char **) &end, 16);
245 		if (end != inptr + 4)
246 			goto invalid_format;
247 
248 		v >>= 8;
249 		rgb = (rgb << 8) | (v & 0xff);
250 
251 		inptr += 5;
252 	}
253 
254 	return g_strdup_printf ("#%.6X", rgb);
255 
256  invalid_format:
257 
258 	/* default colour? */
259 	return g_strdup ("black");
260 }
261 
262 static char *
param_parse_font(const char * fontfamily,size_t inlen)263 param_parse_font (const char *fontfamily, size_t inlen)
264 {
265 	register const char *inptr = fontfamily;
266 	const char *inend = inptr + inlen;
267 
268 	/* don't allow any of '"', '<', nor '>' */
269 	while (inptr < inend && *inptr != '"' && *inptr != '<' && *inptr != '>')
270 		inptr++;
271 
272 	return g_strndup (fontfamily, inptr - fontfamily);
273 }
274 
275 static char *
param_parse_lang(const char * lang,size_t inlen)276 param_parse_lang (const char *lang, size_t inlen)
277 {
278 	register const char *inptr = lang;
279 	const char *inend = inptr + inlen;
280 
281 	/* don't allow any of '"', '<', nor '>' */
282 	while (inptr < inend && *inptr != '"' && *inptr != '<' && *inptr != '>')
283 		inptr++;
284 
285 	return g_strndup (lang, inptr - lang);
286 }
287 
288 static char *
param_parse(const char * enriched,const char * inptr,size_t inlen)289 param_parse (const char *enriched, const char *inptr, size_t inlen)
290 {
291 	guint i;
292 
293 	for (i = 0; i < NUM_ENRICHED_TAGS; i++) {
294 		if (!g_ascii_strcasecmp (enriched, enriched_tags[i].enriched))
295 			return enriched_tags[i].parse_param (inptr, inlen);
296 	}
297 
298 	g_assert_not_reached ();
299 
300 	return NULL;
301 }
302 
303 #define IS_RICHTEXT GMIME_FILTER_ENRICHED_IS_RICHTEXT
304 
305 static void
enriched_to_html(GMimeFilter * filter,char * in,size_t inlen,size_t prespace,char ** out,size_t * outlen,size_t * outprespace,gboolean flush)306 enriched_to_html (GMimeFilter *filter, char *in, size_t inlen, size_t prespace,
307 		  char **out, size_t *outlen, size_t *outprespace, gboolean flush)
308 {
309 	GMimeFilterEnriched *enriched = (GMimeFilterEnriched *) filter;
310 	const char *tag, *inend, *outend;
311 	register const char *inptr;
312 	register char *outptr;
313 
314 	g_mime_filter_set_size (filter, inlen * 2 + 6, FALSE);
315 
316 	inptr = in;
317 	inend = in + inlen;
318 	outptr = filter->outbuf;
319 	outend = filter->outbuf + filter->outsize;
320 
321  retry:
322 	do {
323 		while (inptr < inend && outptr < outend && !strchr (" <>&\n", *inptr))
324 			*outptr++ = *inptr++;
325 
326 		if (outptr == outend)
327 			goto backup;
328 
329 		if ((inptr + 1) >= inend)
330 			break;
331 
332 		switch (*inptr++) {
333 		case ' ':
334 			while (inptr < inend && (outptr + 7) < outend && *inptr == ' ') {
335 				memcpy (outptr, "&nbsp;", 6);
336 				outptr += 6;
337 				inptr++;
338 			}
339 
340 			if (outptr < outend)
341 				*outptr++ = ' ';
342 
343 			break;
344 		case '\n':
345 			if (!(enriched->flags & IS_RICHTEXT)) {
346 				/* text/enriched */
347 				if (enriched->nofill > 0) {
348 					if ((outptr + 4) < outend) {
349 						memcpy (outptr, "<br>", 4);
350 						outptr += 4;
351 					} else {
352 						inptr--;
353 						goto backup;
354 					}
355 				} else if (*inptr == '\n') {
356 					if ((outptr + 4) >= outend) {
357 						inptr--;
358 						goto backup;
359 					}
360 
361 					while (inptr < inend && (outptr + 4) < outend && *inptr == '\n') {
362 						memcpy (outptr, "<br>", 4);
363 						outptr += 4;
364 						inptr++;
365 					}
366 				} else {
367 					*outptr++ = ' ';
368 				}
369 			} else {
370 				/* text/richtext */
371 				*outptr++ = ' ';
372 			}
373 			break;
374 		case '>':
375 			if ((outptr + 4) < outend) {
376 				memcpy (outptr, "&gt;", 4);
377 				outptr += 4;
378 			} else {
379 				inptr--;
380 				goto backup;
381 			}
382 			break;
383 		case '&':
384 			if ((outptr + 5) < outend) {
385 				memcpy (outptr, "&amp;", 5);
386 				outptr += 5;
387 			} else {
388 				inptr--;
389 				goto backup;
390 			}
391 			break;
392 		case '<':
393 			if (!(enriched->flags & IS_RICHTEXT)) {
394 				/* text/enriched */
395 				if (*inptr == '<') {
396 					if ((outptr + 4) < outend) {
397 						memcpy (outptr, "&lt;", 4);
398 						outptr += 4;
399 						inptr++;
400 						break;
401 					} else {
402 						inptr--;
403 						goto backup;
404 					}
405 				}
406 			} else {
407 				/* text/richtext */
408 				if ((inend - inptr) >= 3 && (outptr + 4) < outend) {
409 					if (strncmp (inptr, "lt>", 3) == 0) {
410 						memcpy (outptr, "&lt;", 4);
411 						outptr += 4;
412 						inptr += 3;
413 						break;
414 					} else if (strncmp (inptr, "nl>", 3) == 0) {
415 						memcpy (outptr, "<br>", 4);
416 						outptr += 4;
417 						inptr += 3;
418 						break;
419 					}
420 				} else {
421 					inptr--;
422 					goto backup;
423 				}
424 			}
425 
426 			tag = inptr;
427 			while (inptr < inend && *inptr != '>')
428 				inptr++;
429 
430 			if (inptr == inend) {
431 				inptr = tag - 1;
432 				goto need_input;
433 			}
434 
435 			if (!g_ascii_strncasecmp (tag, "nofill>", 7)) {
436 				if ((outptr + 5) < outend) {
437 					enriched->nofill++;
438 				} else {
439 					inptr = tag - 1;
440 					goto backup;
441 				}
442 			} else if (!g_ascii_strncasecmp (tag, "/nofill>", 8)) {
443 				if ((outptr + 6) < outend) {
444 					enriched->nofill--;
445 				} else {
446 					inptr = tag - 1;
447 					goto backup;
448 				}
449 			} else {
450 				const char *html_tag;
451 				char *enriched_tag;
452 				size_t len;
453 
454 				len = inptr - tag;
455 				enriched_tag = g_alloca (len + 1);
456 				memcpy (enriched_tag, tag, len);
457 				enriched_tag[len] = '\0';
458 
459 				html_tag = g_hash_table_lookup (enriched_hash, enriched_tag);
460 
461 				if (html_tag) {
462 					if (html_tag_needs_param (html_tag)) {
463 						const char *start;
464 						char *param;
465 
466 						while (inptr < inend && *inptr != '<')
467 							inptr++;
468 
469 #define PARAM_TAG_MIN_LEN  (sizeof ("<param>") + sizeof ("</param>") - 1)
470 						if (inptr == inend || (inend - inptr) <= PARAM_TAG_MIN_LEN) {
471 							inptr = tag - 1;
472 							goto need_input;
473 						}
474 
475 						if (g_ascii_strncasecmp (inptr, "<param>", 7) != 0) {
476 							/* ignore the enriched command tag... */
477 							inptr -= 1;
478 							goto loop;
479 						}
480 
481 						inptr += 7;
482 						start = inptr;
483 
484 						while (inptr < inend && *inptr != '<')
485 							inptr++;
486 
487 						if (inptr == inend || (inend - inptr) <= 8) {
488 							inptr = tag - 1;
489 							goto need_input;
490 						}
491 
492 						if (g_ascii_strncasecmp (inptr, "</param>", 8) != 0) {
493 							/* ignore the enriched command tag... */
494 							inptr += 7;
495 							goto loop;
496 						}
497 
498 						len = inptr - start;
499 						param = param_parse (enriched_tag, start, len);
500 						len = strlen (param);
501 
502 						inptr += 7;
503 
504 						len += strlen (html_tag);
505 
506 						if ((outptr + len) < outend) {
507 							outptr += g_snprintf (outptr, len, html_tag, param);
508 							g_free (param);
509 						} else {
510 							g_free (param);
511 							inptr = tag - 1;
512 							goto backup;
513 						}
514 					} else {
515 						len = strlen (html_tag);
516 						if ((outptr + len) < outend) {
517 							memcpy (outptr, html_tag, len);
518 							outptr += len;
519 						} else {
520 							inptr = tag - 1;
521 							goto backup;
522 						}
523 					}
524 				}
525 			}
526 
527 		loop:
528 			inptr++;
529 			break;
530 		default:
531 			break;
532 		}
533 	} while (inptr < inend);
534 
535  need_input:
536 
537 	/* the reason we ignore @flush here is because if there isn't
538            enough input to parse a tag, then there's nothing we can
539            do. */
540 
541 	if (inptr < inend)
542 		g_mime_filter_backup (filter, inptr, (unsigned) (inend - inptr));
543 
544 	*out = filter->outbuf;
545 	*outlen = outptr - filter->outbuf;
546 	*outprespace = filter->outpre;
547 
548 	return;
549 
550  backup:
551 
552 	if (flush) {
553 		size_t offset, grow;
554 
555 		grow = (inend - inptr) * 2 + 20;
556 		offset = outptr - filter->outbuf;
557 		g_mime_filter_set_size (filter, filter->outsize + grow, TRUE);
558 		outend = filter->outbuf + filter->outsize;
559 		outptr = filter->outbuf + offset;
560 
561 		goto retry;
562 	} else {
563 		g_mime_filter_backup (filter, inptr, (unsigned) (inend - inptr));
564 	}
565 
566 	*out = filter->outbuf;
567 	*outlen = outptr - filter->outbuf;
568 	*outprespace = filter->outpre;
569 }
570 
571 static void
filter_filter(GMimeFilter * filter,char * in,size_t len,size_t prespace,char ** out,size_t * outlen,size_t * outprespace)572 filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
573 	       char **out, size_t *outlen, size_t *outprespace)
574 {
575 	enriched_to_html (filter, in, len, prespace, out, outlen, outprespace, FALSE);
576 }
577 
578 static void
filter_complete(GMimeFilter * filter,char * in,size_t len,size_t prespace,char ** out,size_t * outlen,size_t * outprespace)579 filter_complete (GMimeFilter *filter, char *in, size_t len, size_t prespace,
580 		 char **out, size_t *outlen, size_t *outprespace)
581 {
582 	enriched_to_html (filter, in, len, prespace, out, outlen, outprespace, TRUE);
583 }
584 
585 static void
filter_reset(GMimeFilter * filter)586 filter_reset (GMimeFilter *filter)
587 {
588 	GMimeFilterEnriched *enriched = (GMimeFilterEnriched *) filter;
589 
590 	enriched->nofill = 0;
591 }
592 
593 
594 /**
595  * g_mime_filter_enriched_new:
596  * @flags: flags
597  *
598  * Creates a new GMimeFilterEnriched object.
599  *
600  * Returns a new GMimeFilter object.
601  **/
602 GMimeFilter *
g_mime_filter_enriched_new(guint32 flags)603 g_mime_filter_enriched_new (guint32 flags)
604 {
605 	GMimeFilterEnriched *new;
606 
607 	new = (GMimeFilterEnriched *) g_object_new (GMIME_TYPE_FILTER_ENRICHED, NULL);
608 	new->flags = flags;
609 
610 	return (GMimeFilter *) new;
611 }
612