1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*  This file is part of the GtkHTML library.
3  *
4  *  Copyright (C) 1998 World Wide Web Consortium
5  *  Copyright (C) 2000 Helix Code, Inc.
6  *
7  *  This library is free software; you can redistribute it and/or
8  *  modify it under the terms of the GNU Library General Public
9  *  License as published by the Free Software Foundation; either
10  *  version 2 of the License, or (at your option) any later version.
11  *
12  *  This library is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  *  Library General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Library General Public License
18  *  along with this library; see the file COPYING.LIB.  If not, write to
19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  *  Boston, MA 02110-1301, USA.
21  *
22  *  Author: Ettore Perazzoli <ettore@helixcode.com>
23  *  `encode_entities ()' adapted from gnome-xml by Daniel Veillard
24  *  <Daniel.Veillard@w3.org>.
25 */
26 
27 #include <string.h>
28 
29 #include "config.h"
30 #include "htmlcluev.h"
31 #include "htmlcolor.h"
32 #include "htmlengine.h"
33 #include "htmlimage.h"
34 #include "htmlentity.h"
35 #include "htmlengine-save.h"
36 #include "htmlsettings.h"
37 
38 #include "gtkhtmldebug.h"
39 
40 /* %# = 2 characters
41  * at most 10 characters from gunichar = guint32 (0 to 4294967296)
42  * ;  = 1 character
43  * \0 = 1 character
44  */
45 #define HTML_ENTITIES_MAX_LENGTH	14
46 
47 
48 /* This routine was originally written by Daniel Velliard, (C) 1998 World Wide
49  * Web Consortium.  */
50 gchar *
html_encode_entities(const gchar * input,guint len,guint * encoded_len_return)51 html_encode_entities (const gchar *input,
52                       guint len,
53                       guint *encoded_len_return)
54 {
55 	gunichar uc;
56 	const gchar *p;
57 	guchar *buffer = NULL;
58 	guchar *out = NULL;
59 	gint buffer_size = 0;
60 	guint count;
61 
62 	/* Allocate an translation buffer.  */
63 	buffer_size = 1000;
64 	buffer      = g_malloc (buffer_size);
65 
66 	out   = buffer;
67 	p     = input;
68 	count = 0;
69 
70 	while (p && *p && count < len) {
71 		if (out - buffer > buffer_size - 100) {
72 			gint index = out - buffer;
73 
74 			buffer_size *= 2;
75 			buffer = g_realloc (buffer, buffer_size);
76 			out = &buffer[index];
77 		}
78 		uc = g_utf8_get_char (p);
79 
80 		/* By default one have to encode at least '<', '>', '"' and '&'.  */
81 
82 		if (uc == '<') {
83 			*out++ = '&';
84 			*out++ = 'l';
85 			*out++ = 't';
86 			*out++ = ';';
87 		} else if (uc == '>') {
88 			*out++ = '&';
89 			*out++ = 'g';
90 			*out++ = 't';
91 			*out++ = ';';
92 		} else if (uc == '&') {
93 			*out++ = '&';
94 			*out++ = 'a';
95 			*out++ = 'm';
96 			*out++ = 'p';
97 			*out++ = ';';
98 		} else if (uc == '"') {
99 			*out++ = '&';
100 			*out++ = 'q';
101 			*out++ = 'u';
102 			*out++ = 'o';
103 			*out++ = 't';
104 			*out++ = ';';
105 		} else if (uc == ENTITY_NBSP) {
106 			*out++ = '&';
107 			*out++ = 'n';
108 			*out++ = 'b';
109 			*out++ = 's';
110 			*out++ = 'p';
111 			*out++ = ';';
112 		} else if (((uc >= 0x20) && (uc < 0x80))
113 			   || (uc == '\n') || (uc == '\r') || (uc == '\t')) {
114 			/* Default case, just copy. */
115 			*out++ = uc;
116 		} else {
117 			gchar buf[HTML_ENTITIES_MAX_LENGTH], *ptr;
118 
119 			g_snprintf (buf, HTML_ENTITIES_MAX_LENGTH, "&#%d;", uc);
120 
121 			ptr = buf;
122 			while (*ptr != 0)
123 				*out++ = *ptr++;
124 		}
125 
126 		count++;
127 		p = g_utf8_next_char (p);
128 	}
129 
130 	*out = 0;
131 	if (encoded_len_return)
132 		*encoded_len_return = out - buffer;
133 
134 	return (gchar *) buffer;
135 }
136 
137 gboolean
html_engine_save_encode(HTMLEngineSaveState * state,const gchar * buffer,guint length)138 html_engine_save_encode (HTMLEngineSaveState *state,
139                          const gchar *buffer,
140                          guint length)
141 {
142 	gchar *encoded_buffer;
143 	guint encoded_length;
144 	gboolean success;
145 
146 	g_return_val_if_fail (state != NULL, FALSE);
147 	g_return_val_if_fail (buffer != NULL, FALSE);
148 
149 	if (length == 0)
150 		return TRUE;
151 
152 	encoded_buffer = html_encode_entities (buffer, length, &encoded_length);
153 	success = state->receiver (state->engine, encoded_buffer, encoded_length, state->user_data);
154 
155 	g_free (encoded_buffer);
156 	return success;
157 }
158 
159 gboolean
html_engine_save_encode_string(HTMLEngineSaveState * state,const gchar * s)160 html_engine_save_encode_string (HTMLEngineSaveState *state,
161                                 const gchar *s)
162 {
163 	guint len;
164 
165 	g_return_val_if_fail (state != NULL, FALSE);
166 	g_return_val_if_fail (s != NULL, FALSE);
167 
168 	len = strlen (s);
169 
170 	return html_engine_save_encode (state, s, len);
171 }
172 
173 gboolean
html_engine_save_output_stringv(HTMLEngineSaveState * state,const gchar * format,va_list ap)174 html_engine_save_output_stringv (HTMLEngineSaveState *state,
175                                  const gchar *format,
176                                  va_list ap)
177 {
178 	gchar *string;
179 	gboolean retval;
180 
181 	string = g_strdup_vprintf (format, ap);
182 	retval = state->receiver (state->engine, string, strlen (string), state->user_data);
183 	g_free (string);
184 
185 	return retval;
186 }
187 
188 gboolean
html_engine_save_output_string(HTMLEngineSaveState * state,const gchar * format,...)189 html_engine_save_output_string (HTMLEngineSaveState *state,
190                                 const gchar *format,
191                                 ...)
192 {
193   va_list args;
194   gboolean retval;
195 
196   g_return_val_if_fail (format != NULL, FALSE);
197   g_return_val_if_fail (state != NULL, FALSE);
198 
199   va_start (args, format);
200   retval = html_engine_save_output_stringv (state, format, args);
201   va_end (args);
202 
203   return retval;
204 }
205 
206 gboolean
html_engine_save_output_buffer(HTMLEngineSaveState * state,const gchar * buffer,gint bytes)207 html_engine_save_output_buffer (HTMLEngineSaveState *state,
208                                 const gchar *buffer,
209                                 gint bytes)
210 {
211 	if (bytes == -1)
212 		bytes = strlen (buffer);
213 	return state->receiver (state->engine, buffer, bytes, state->user_data);
214 }
215 
216 gboolean
html_engine_save_delims_and_vals(HTMLEngineSaveState * state,const gchar * first,...)217 html_engine_save_delims_and_vals (HTMLEngineSaveState *state,
218                                   const gchar *first,
219                                   ...)
220 {
221   va_list args;
222   gboolean retval;
223   const gchar *value, *after;
224 
225   g_return_val_if_fail (state != NULL, FALSE);
226 
227   retval = html_engine_save_output_buffer (state, first, -1);
228   va_start (args, first);
229   while (retval && (value = va_arg (args, const gchar *)) != NULL) {
230     after = va_arg (args, const gchar *);
231     retval = html_engine_save_encode_string (state, value)
232 	    && html_engine_save_output_buffer (state, after, -1);
233   }
234   va_end (args);
235 
236   return retval;
237 }
238 
239 
240 static gchar *
color_to_string(const gchar * s,HTMLColor * c)241 color_to_string (const gchar *s,
242                  HTMLColor *c)
243 {
244 	gchar color[20];
245 
246 	g_snprintf (color, 20, " %s=\"#%02x%02x%02x\"", s, c->color.red >> 8, c->color.green >> 8, c->color.blue >> 8);
247 	return g_strdup (color);
248 }
249 
250 static gchar *
get_body(HTMLEngine * e)251 get_body (HTMLEngine *e)
252 {
253 	HTMLColorSet *cset;
254 	gchar *body;
255 	gchar *text;
256 	gchar *bg;
257 	gchar *bg_image;
258 	gchar *link;
259 	gchar *lm, *rm, *tm, *bm;
260 	gchar *url = NULL;
261 
262 	cset = e->settings->color_set;
263 	text = (cset->changed[HTMLTextColor]) ? color_to_string ("TEXT", cset->color[HTMLTextColor]) : g_strdup ("");
264 	link = (cset->changed[HTMLLinkColor]) ? color_to_string ("LINK", cset->color[HTMLLinkColor]) : g_strdup ("");
265 	bg   = (cset->changed[HTMLBgColor]) ? color_to_string ("BGCOLOR", cset->color[HTMLBgColor]) : g_strdup ("");
266 	bg_image = e->bgPixmapPtr ? g_strdup_printf (" BACKGROUND=\"%s\"",
267 						     url = html_image_resolve_image_url
268 						     (e->widget, ((HTMLImagePointer *) e->bgPixmapPtr)->url))
269 		: g_strdup ("");
270 	g_free (url);
271 
272 	lm = e->leftBorder != LEFT_BORDER ? g_strdup_printf (" LEFTMARGIN=\"%d\"", e->leftBorder) : g_strdup ("");
273 	rm = e->rightBorder != RIGHT_BORDER ? g_strdup_printf (" RIGHTMARGIN=\"%d\"", e->rightBorder) : g_strdup ("");
274 	tm = e->topBorder != TOP_BORDER ? g_strdup_printf (" TOPMARGIN=\"%d\"", e->topBorder) : g_strdup ("");
275 	bm = e->bottomBorder != BOTTOM_BORDER ? g_strdup_printf (" BOTTOMMARGIN=\"%d\"", e->bottomBorder) : g_strdup ("");
276 
277 	body = g_strconcat ("<BODY", text, link, bg, bg_image, lm, rm, tm, bm, ">\n", NULL);
278 
279 	g_free (lm);
280 	g_free (rm);
281 	g_free (tm);
282 	g_free (bm);
283 	g_free (text);
284 	g_free (link);
285 	g_free (bg);
286 	g_free (bg_image);
287 
288 	return body;
289 }
290 
291 static gboolean
write_header(HTMLEngineSaveState * state)292 write_header (HTMLEngineSaveState *state)
293 {
294 	gboolean retval = TRUE;
295 	gchar *body;
296 
297 	html_engine_clear_all_class_data (state->engine);
298 	/* Preface.  */
299 	if (!html_engine_save_output_string
300 		    (state,
301 		     "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 TRANSITIONAL//EN\">\n"
302 		     "<HTML>\n"))
303 		return FALSE;
304 
305 	/* Header start.  FIXME: `GENERATOR' string?  */
306 	if (!html_engine_save_output_string
307 		     (state,
308 		      "<HEAD>\n"
309 		      "  <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; CHARSET=UTF-8\">\n"
310 		      "  <META NAME=\"GENERATOR\" CONTENT=\"GtkHTML/%s\">\n", VERSION))
311 		return FALSE;
312 
313 	/* Title.  */
314 	if (state->engine->title != NULL
315 	    && state->engine->title->str != NULL
316 	    && state->engine->title->str[0] != '\0') {
317 		if (!html_engine_save_delims_and_vals (state,
318 				"  <TITLE>", state->engine->title->str,
319 				"</TITLE>\n", NULL))
320 			return FALSE;
321 	}
322 
323 	/* End of header.  */
324 	if (!html_engine_save_output_string (state, "</HEAD>\n"))
325 		return FALSE;
326 
327 	/* Start of body.  */
328 	body = get_body (state->engine);
329 	if (!html_engine_save_output_string (state, "%s", body))
330 		retval = FALSE;
331 	g_free (body);
332 
333 	return retval;
334 }
335 
336 static gboolean
write_end(HTMLEngineSaveState * state)337 write_end (HTMLEngineSaveState *state)
338 {
339 	if (!html_engine_save_output_string (state, "</BODY>\n</HTML>\n"))
340 		return FALSE;
341 
342 	html_engine_clear_all_class_data (state->engine);
343 
344 	return TRUE;
345 }
346 
347 gboolean
html_engine_save(HTMLEngine * engine,HTMLEngineSaveReceiverFn receiver,gpointer user_data)348 html_engine_save (HTMLEngine *engine,
349                   HTMLEngineSaveReceiverFn receiver,
350                   gpointer user_data)
351 {
352 	HTMLEngineSaveState state;
353 
354 	if (engine->clue == NULL) {
355 		/* Empty document.  */
356 		return FALSE;
357 	}
358 
359 	/* gtk_html_debug_dump_tree_simple (engine->clue, 1); */
360 
361 	state.engine = engine;
362 	state.receiver = receiver;
363 	state.br_count = 0;
364 	state.error = FALSE;
365 	state.inline_frames = FALSE;
366 	state.user_data = user_data;
367 	state.last_level = 0;
368 
369 	if (!write_header (&state))
370 		return FALSE;
371 
372 	html_object_save (engine->clue, &state);
373 	if (state.error)
374 		return FALSE;
375 
376 	if (!write_end (&state))
377 		return FALSE;
378 
379 	return TRUE;
380 }
381 
382 gboolean
html_engine_save_plain(HTMLEngine * engine,HTMLEngineSaveReceiverFn receiver,gpointer user_data)383 html_engine_save_plain (HTMLEngine *engine,
384                         HTMLEngineSaveReceiverFn receiver,
385                         gpointer user_data)
386 {
387 	HTMLEngineSaveState state;
388 
389 	if (engine->clue == NULL) {
390 		/* Empty document.  */
391 		return FALSE;
392 	}
393 
394 	/* gtk_html_debug_dump_tree_simple (engine->clue, 1); */
395 
396 	state.engine = engine;
397 	state.receiver = receiver;
398 	state.br_count = 0;
399 	state.error = FALSE;
400 	state.inline_frames = FALSE;
401 	state.user_data = user_data;
402 	state.last_level = 0;
403 
404 	/* FIXME don't hardcode the length */
405 	html_object_save_plain (engine->clue, &state, 72);
406 	if (state.error)
407 		return FALSE;
408 
409 	return TRUE;
410 }
411 
412 static gboolean
html_engine_save_buffer_receiver(const HTMLEngine * engine,const gchar * data,guint len,gpointer user_data)413 html_engine_save_buffer_receiver (const HTMLEngine *engine,
414                                   const gchar *data,
415                                   guint len,
416                                   gpointer user_data)
417 {
418 	g_string_append ((GString *) user_data, (gchar *) data);
419 
420 	return TRUE;
421 }
422 
423 gchar *
html_engine_save_buffer_free(HTMLEngineSaveState * state,gboolean free_string)424 html_engine_save_buffer_free (HTMLEngineSaveState *state,
425                               gboolean free_string)
426 {
427 	GString *string;
428 	gchar *rv = NULL;
429 
430 	g_return_val_if_fail (state != NULL, NULL);
431 	string = (GString *) state->user_data;
432 
433 	if (!free_string)
434 		rv = string->str;
435 	g_string_free (string, free_string);
436 
437 	g_free (state);
438 
439 	return rv;
440 }
441 
442 guchar *
html_engine_save_buffer_peek_text(HTMLEngineSaveState * state)443 html_engine_save_buffer_peek_text (HTMLEngineSaveState *state)
444 {
445 	GString *string;
446 
447 	g_return_val_if_fail (state != NULL, NULL);
448 	string = (GString *) state->user_data;
449 
450 	return (guchar *) string->str;
451 }
452 
453 gint
html_engine_save_buffer_peek_text_bytes(HTMLEngineSaveState * state)454 html_engine_save_buffer_peek_text_bytes (HTMLEngineSaveState *state)
455 {
456 	GString *string;
457 
458 	g_return_val_if_fail (state != NULL, 0);
459 	string = (GString *) state->user_data;
460 
461 	return string->len;
462 }
463 
464 HTMLEngineSaveState *
html_engine_save_buffer_new(HTMLEngine * engine,gboolean inline_frames)465 html_engine_save_buffer_new (HTMLEngine *engine,
466                              gboolean inline_frames)
467 {
468 	HTMLEngineSaveState *state = g_new0 (HTMLEngineSaveState, 1);
469 
470 	if (state) {
471 		state->engine = engine;
472 		state->receiver = (HTMLEngineSaveReceiverFn) html_engine_save_buffer_receiver;
473 		state->br_count = 0;
474 		state->error = FALSE;
475 		state->inline_frames = inline_frames;
476 		state->user_data = (gpointer) g_string_new ("");
477 		state->last_level = 0;
478 	}
479 
480 	return state;
481 }
482 
483 gchar *
html_engine_save_get_sample_body(HTMLEngine * e,HTMLObject * o)484 html_engine_save_get_sample_body (HTMLEngine *e,
485                                   HTMLObject *o)
486 {
487 	return get_body (e);
488 }
489 
490 const gchar *
html_engine_save_get_paragraph_style(GtkHTMLParagraphStyle style)491 html_engine_save_get_paragraph_style (GtkHTMLParagraphStyle style)
492 {
493 	switch (style) {
494 	case GTK_HTML_PARAGRAPH_STYLE_NORMAL:
495 		return NULL;
496 	case GTK_HTML_PARAGRAPH_STYLE_H1:
497 		return "h1";
498 	case GTK_HTML_PARAGRAPH_STYLE_H2:
499 		return "h2";
500 	case GTK_HTML_PARAGRAPH_STYLE_H3:
501 		return "h3";
502 	case GTK_HTML_PARAGRAPH_STYLE_H4:
503 		return "h4";
504 	case GTK_HTML_PARAGRAPH_STYLE_H5:
505 		return "h5";
506 	case GTK_HTML_PARAGRAPH_STYLE_H6:
507 		return "h6";
508 	case GTK_HTML_PARAGRAPH_STYLE_ADDRESS:
509 		return "address";
510 	case GTK_HTML_PARAGRAPH_STYLE_PRE:
511 		return "pre";
512 	case GTK_HTML_PARAGRAPH_STYLE_ITEMDOTTED:
513 	case GTK_HTML_PARAGRAPH_STYLE_ITEMROMAN:
514 	case GTK_HTML_PARAGRAPH_STYLE_ITEMDIGIT:
515 	case GTK_HTML_PARAGRAPH_STYLE_ITEMALPHA:
516 		return "li";
517 	}
518 
519 	g_warning ("Unknown GtkHTMLParagraphStyle %d", style);
520 
521 	return NULL;
522 }
523 
524 const gchar *
html_engine_save_get_paragraph_align(GtkHTMLParagraphAlignment align)525 html_engine_save_get_paragraph_align (GtkHTMLParagraphAlignment align)
526 {
527 	switch (align) {
528 	case GTK_HTML_PARAGRAPH_ALIGNMENT_RIGHT:
529 		return "right";
530 	case GTK_HTML_PARAGRAPH_ALIGNMENT_CENTER:
531 		return "center";
532 	case GTK_HTML_PARAGRAPH_ALIGNMENT_LEFT:
533 		return "left";
534 	}
535 
536 	g_warning ("Unknown GtkHTMLParagraphAlignment %d", align);
537 
538 	return NULL;
539 }
540 
541 gint
html_engine_save_string_append_nonbsp(GString * out,const guchar * s,guint length)542 html_engine_save_string_append_nonbsp (GString *out,
543                                        const guchar *s,
544                                        guint length)
545 {
546 	guint len = length;
547 
548 	while (len--) {
549 		if (IS_UTF8_NBSP (s)) {
550 			g_string_append_c (out, ' ');
551 			s += 2;
552 			len--;
553 		} else {
554 			g_string_append_c (out, *s);
555 			s++;
556 		}
557 	}
558 	return length;
559 }
560