1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* This file is part of the GtkHTML library.
3 *
4 * Copyright (C) 1998 World Wide Web Consortium
5 * Copyright (C) 2000 Helix Code, Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 * Author: Ettore Perazzoli <ettore@helixcode.com>
23 * `encode_entities ()' adapted from gnome-xml by Daniel Veillard
24 * <Daniel.Veillard@w3.org>.
25 */
26
27 #include <string.h>
28
29 #include "config.h"
30 #include "htmlcluev.h"
31 #include "htmlcolor.h"
32 #include "htmlengine.h"
33 #include "htmlimage.h"
34 #include "htmlentity.h"
35 #include "htmlengine-save.h"
36 #include "htmlsettings.h"
37
38 #include "gtkhtmldebug.h"
39
40 /* %# = 2 characters
41 * at most 10 characters from gunichar = guint32 (0 to 4294967296)
42 * ; = 1 character
43 * \0 = 1 character
44 */
45 #define HTML_ENTITIES_MAX_LENGTH 14
46
47
48 /* This routine was originally written by Daniel Velliard, (C) 1998 World Wide
49 * Web Consortium. */
50 gchar *
html_encode_entities(const gchar * input,guint len,guint * encoded_len_return)51 html_encode_entities (const gchar *input,
52 guint len,
53 guint *encoded_len_return)
54 {
55 gunichar uc;
56 const gchar *p;
57 guchar *buffer = NULL;
58 guchar *out = NULL;
59 gint buffer_size = 0;
60 guint count;
61
62 /* Allocate an translation buffer. */
63 buffer_size = 1000;
64 buffer = g_malloc (buffer_size);
65
66 out = buffer;
67 p = input;
68 count = 0;
69
70 while (p && *p && count < len) {
71 if (out - buffer > buffer_size - 100) {
72 gint index = out - buffer;
73
74 buffer_size *= 2;
75 buffer = g_realloc (buffer, buffer_size);
76 out = &buffer[index];
77 }
78 uc = g_utf8_get_char (p);
79
80 /* By default one have to encode at least '<', '>', '"' and '&'. */
81
82 if (uc == '<') {
83 *out++ = '&';
84 *out++ = 'l';
85 *out++ = 't';
86 *out++ = ';';
87 } else if (uc == '>') {
88 *out++ = '&';
89 *out++ = 'g';
90 *out++ = 't';
91 *out++ = ';';
92 } else if (uc == '&') {
93 *out++ = '&';
94 *out++ = 'a';
95 *out++ = 'm';
96 *out++ = 'p';
97 *out++ = ';';
98 } else if (uc == '"') {
99 *out++ = '&';
100 *out++ = 'q';
101 *out++ = 'u';
102 *out++ = 'o';
103 *out++ = 't';
104 *out++ = ';';
105 } else if (uc == ENTITY_NBSP) {
106 *out++ = '&';
107 *out++ = 'n';
108 *out++ = 'b';
109 *out++ = 's';
110 *out++ = 'p';
111 *out++ = ';';
112 } else if (((uc >= 0x20) && (uc < 0x80))
113 || (uc == '\n') || (uc == '\r') || (uc == '\t')) {
114 /* Default case, just copy. */
115 *out++ = uc;
116 } else {
117 gchar buf[HTML_ENTITIES_MAX_LENGTH], *ptr;
118
119 g_snprintf (buf, HTML_ENTITIES_MAX_LENGTH, "&#%d;", uc);
120
121 ptr = buf;
122 while (*ptr != 0)
123 *out++ = *ptr++;
124 }
125
126 count++;
127 p = g_utf8_next_char (p);
128 }
129
130 *out = 0;
131 if (encoded_len_return)
132 *encoded_len_return = out - buffer;
133
134 return (gchar *) buffer;
135 }
136
137 gboolean
html_engine_save_encode(HTMLEngineSaveState * state,const gchar * buffer,guint length)138 html_engine_save_encode (HTMLEngineSaveState *state,
139 const gchar *buffer,
140 guint length)
141 {
142 gchar *encoded_buffer;
143 guint encoded_length;
144 gboolean success;
145
146 g_return_val_if_fail (state != NULL, FALSE);
147 g_return_val_if_fail (buffer != NULL, FALSE);
148
149 if (length == 0)
150 return TRUE;
151
152 encoded_buffer = html_encode_entities (buffer, length, &encoded_length);
153 success = state->receiver (state->engine, encoded_buffer, encoded_length, state->user_data);
154
155 g_free (encoded_buffer);
156 return success;
157 }
158
159 gboolean
html_engine_save_encode_string(HTMLEngineSaveState * state,const gchar * s)160 html_engine_save_encode_string (HTMLEngineSaveState *state,
161 const gchar *s)
162 {
163 guint len;
164
165 g_return_val_if_fail (state != NULL, FALSE);
166 g_return_val_if_fail (s != NULL, FALSE);
167
168 len = strlen (s);
169
170 return html_engine_save_encode (state, s, len);
171 }
172
173 gboolean
html_engine_save_output_stringv(HTMLEngineSaveState * state,const gchar * format,va_list ap)174 html_engine_save_output_stringv (HTMLEngineSaveState *state,
175 const gchar *format,
176 va_list ap)
177 {
178 gchar *string;
179 gboolean retval;
180
181 string = g_strdup_vprintf (format, ap);
182 retval = state->receiver (state->engine, string, strlen (string), state->user_data);
183 g_free (string);
184
185 return retval;
186 }
187
188 gboolean
html_engine_save_output_string(HTMLEngineSaveState * state,const gchar * format,...)189 html_engine_save_output_string (HTMLEngineSaveState *state,
190 const gchar *format,
191 ...)
192 {
193 va_list args;
194 gboolean retval;
195
196 g_return_val_if_fail (format != NULL, FALSE);
197 g_return_val_if_fail (state != NULL, FALSE);
198
199 va_start (args, format);
200 retval = html_engine_save_output_stringv (state, format, args);
201 va_end (args);
202
203 return retval;
204 }
205
206 gboolean
html_engine_save_output_buffer(HTMLEngineSaveState * state,const gchar * buffer,gint bytes)207 html_engine_save_output_buffer (HTMLEngineSaveState *state,
208 const gchar *buffer,
209 gint bytes)
210 {
211 if (bytes == -1)
212 bytes = strlen (buffer);
213 return state->receiver (state->engine, buffer, bytes, state->user_data);
214 }
215
216 gboolean
html_engine_save_delims_and_vals(HTMLEngineSaveState * state,const gchar * first,...)217 html_engine_save_delims_and_vals (HTMLEngineSaveState *state,
218 const gchar *first,
219 ...)
220 {
221 va_list args;
222 gboolean retval;
223 const gchar *value, *after;
224
225 g_return_val_if_fail (state != NULL, FALSE);
226
227 retval = html_engine_save_output_buffer (state, first, -1);
228 va_start (args, first);
229 while (retval && (value = va_arg (args, const gchar *)) != NULL) {
230 after = va_arg (args, const gchar *);
231 retval = html_engine_save_encode_string (state, value)
232 && html_engine_save_output_buffer (state, after, -1);
233 }
234 va_end (args);
235
236 return retval;
237 }
238
239
240 static gchar *
color_to_string(const gchar * s,HTMLColor * c)241 color_to_string (const gchar *s,
242 HTMLColor *c)
243 {
244 gchar color[20];
245
246 g_snprintf (color, 20, " %s=\"#%02x%02x%02x\"", s, c->color.red >> 8, c->color.green >> 8, c->color.blue >> 8);
247 return g_strdup (color);
248 }
249
250 static gchar *
get_body(HTMLEngine * e)251 get_body (HTMLEngine *e)
252 {
253 HTMLColorSet *cset;
254 gchar *body;
255 gchar *text;
256 gchar *bg;
257 gchar *bg_image;
258 gchar *link;
259 gchar *lm, *rm, *tm, *bm;
260 gchar *url = NULL;
261
262 cset = e->settings->color_set;
263 text = (cset->changed[HTMLTextColor]) ? color_to_string ("TEXT", cset->color[HTMLTextColor]) : g_strdup ("");
264 link = (cset->changed[HTMLLinkColor]) ? color_to_string ("LINK", cset->color[HTMLLinkColor]) : g_strdup ("");
265 bg = (cset->changed[HTMLBgColor]) ? color_to_string ("BGCOLOR", cset->color[HTMLBgColor]) : g_strdup ("");
266 bg_image = e->bgPixmapPtr ? g_strdup_printf (" BACKGROUND=\"%s\"",
267 url = html_image_resolve_image_url
268 (e->widget, ((HTMLImagePointer *) e->bgPixmapPtr)->url))
269 : g_strdup ("");
270 g_free (url);
271
272 lm = e->leftBorder != LEFT_BORDER ? g_strdup_printf (" LEFTMARGIN=\"%d\"", e->leftBorder) : g_strdup ("");
273 rm = e->rightBorder != RIGHT_BORDER ? g_strdup_printf (" RIGHTMARGIN=\"%d\"", e->rightBorder) : g_strdup ("");
274 tm = e->topBorder != TOP_BORDER ? g_strdup_printf (" TOPMARGIN=\"%d\"", e->topBorder) : g_strdup ("");
275 bm = e->bottomBorder != BOTTOM_BORDER ? g_strdup_printf (" BOTTOMMARGIN=\"%d\"", e->bottomBorder) : g_strdup ("");
276
277 body = g_strconcat ("<BODY", text, link, bg, bg_image, lm, rm, tm, bm, ">\n", NULL);
278
279 g_free (lm);
280 g_free (rm);
281 g_free (tm);
282 g_free (bm);
283 g_free (text);
284 g_free (link);
285 g_free (bg);
286 g_free (bg_image);
287
288 return body;
289 }
290
291 static gboolean
write_header(HTMLEngineSaveState * state)292 write_header (HTMLEngineSaveState *state)
293 {
294 gboolean retval = TRUE;
295 gchar *body;
296
297 html_engine_clear_all_class_data (state->engine);
298 /* Preface. */
299 if (!html_engine_save_output_string
300 (state,
301 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 TRANSITIONAL//EN\">\n"
302 "<HTML>\n"))
303 return FALSE;
304
305 /* Header start. FIXME: `GENERATOR' string? */
306 if (!html_engine_save_output_string
307 (state,
308 "<HEAD>\n"
309 " <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; CHARSET=UTF-8\">\n"
310 " <META NAME=\"GENERATOR\" CONTENT=\"GtkHTML/%s\">\n", VERSION))
311 return FALSE;
312
313 /* Title. */
314 if (state->engine->title != NULL
315 && state->engine->title->str != NULL
316 && state->engine->title->str[0] != '\0') {
317 if (!html_engine_save_delims_and_vals (state,
318 " <TITLE>", state->engine->title->str,
319 "</TITLE>\n", NULL))
320 return FALSE;
321 }
322
323 /* End of header. */
324 if (!html_engine_save_output_string (state, "</HEAD>\n"))
325 return FALSE;
326
327 /* Start of body. */
328 body = get_body (state->engine);
329 if (!html_engine_save_output_string (state, "%s", body))
330 retval = FALSE;
331 g_free (body);
332
333 return retval;
334 }
335
336 static gboolean
write_end(HTMLEngineSaveState * state)337 write_end (HTMLEngineSaveState *state)
338 {
339 if (!html_engine_save_output_string (state, "</BODY>\n</HTML>\n"))
340 return FALSE;
341
342 html_engine_clear_all_class_data (state->engine);
343
344 return TRUE;
345 }
346
347 gboolean
html_engine_save(HTMLEngine * engine,HTMLEngineSaveReceiverFn receiver,gpointer user_data)348 html_engine_save (HTMLEngine *engine,
349 HTMLEngineSaveReceiverFn receiver,
350 gpointer user_data)
351 {
352 HTMLEngineSaveState state;
353
354 if (engine->clue == NULL) {
355 /* Empty document. */
356 return FALSE;
357 }
358
359 /* gtk_html_debug_dump_tree_simple (engine->clue, 1); */
360
361 state.engine = engine;
362 state.receiver = receiver;
363 state.br_count = 0;
364 state.error = FALSE;
365 state.inline_frames = FALSE;
366 state.user_data = user_data;
367 state.last_level = 0;
368
369 if (!write_header (&state))
370 return FALSE;
371
372 html_object_save (engine->clue, &state);
373 if (state.error)
374 return FALSE;
375
376 if (!write_end (&state))
377 return FALSE;
378
379 return TRUE;
380 }
381
382 gboolean
html_engine_save_plain(HTMLEngine * engine,HTMLEngineSaveReceiverFn receiver,gpointer user_data)383 html_engine_save_plain (HTMLEngine *engine,
384 HTMLEngineSaveReceiverFn receiver,
385 gpointer user_data)
386 {
387 HTMLEngineSaveState state;
388
389 if (engine->clue == NULL) {
390 /* Empty document. */
391 return FALSE;
392 }
393
394 /* gtk_html_debug_dump_tree_simple (engine->clue, 1); */
395
396 state.engine = engine;
397 state.receiver = receiver;
398 state.br_count = 0;
399 state.error = FALSE;
400 state.inline_frames = FALSE;
401 state.user_data = user_data;
402 state.last_level = 0;
403
404 /* FIXME don't hardcode the length */
405 html_object_save_plain (engine->clue, &state, 72);
406 if (state.error)
407 return FALSE;
408
409 return TRUE;
410 }
411
412 static gboolean
html_engine_save_buffer_receiver(const HTMLEngine * engine,const gchar * data,guint len,gpointer user_data)413 html_engine_save_buffer_receiver (const HTMLEngine *engine,
414 const gchar *data,
415 guint len,
416 gpointer user_data)
417 {
418 g_string_append ((GString *) user_data, (gchar *) data);
419
420 return TRUE;
421 }
422
423 gchar *
html_engine_save_buffer_free(HTMLEngineSaveState * state,gboolean free_string)424 html_engine_save_buffer_free (HTMLEngineSaveState *state,
425 gboolean free_string)
426 {
427 GString *string;
428 gchar *rv = NULL;
429
430 g_return_val_if_fail (state != NULL, NULL);
431 string = (GString *) state->user_data;
432
433 if (!free_string)
434 rv = string->str;
435 g_string_free (string, free_string);
436
437 g_free (state);
438
439 return rv;
440 }
441
442 guchar *
html_engine_save_buffer_peek_text(HTMLEngineSaveState * state)443 html_engine_save_buffer_peek_text (HTMLEngineSaveState *state)
444 {
445 GString *string;
446
447 g_return_val_if_fail (state != NULL, NULL);
448 string = (GString *) state->user_data;
449
450 return (guchar *) string->str;
451 }
452
453 gint
html_engine_save_buffer_peek_text_bytes(HTMLEngineSaveState * state)454 html_engine_save_buffer_peek_text_bytes (HTMLEngineSaveState *state)
455 {
456 GString *string;
457
458 g_return_val_if_fail (state != NULL, 0);
459 string = (GString *) state->user_data;
460
461 return string->len;
462 }
463
464 HTMLEngineSaveState *
html_engine_save_buffer_new(HTMLEngine * engine,gboolean inline_frames)465 html_engine_save_buffer_new (HTMLEngine *engine,
466 gboolean inline_frames)
467 {
468 HTMLEngineSaveState *state = g_new0 (HTMLEngineSaveState, 1);
469
470 if (state) {
471 state->engine = engine;
472 state->receiver = (HTMLEngineSaveReceiverFn) html_engine_save_buffer_receiver;
473 state->br_count = 0;
474 state->error = FALSE;
475 state->inline_frames = inline_frames;
476 state->user_data = (gpointer) g_string_new ("");
477 state->last_level = 0;
478 }
479
480 return state;
481 }
482
483 gchar *
html_engine_save_get_sample_body(HTMLEngine * e,HTMLObject * o)484 html_engine_save_get_sample_body (HTMLEngine *e,
485 HTMLObject *o)
486 {
487 return get_body (e);
488 }
489
490 const gchar *
html_engine_save_get_paragraph_style(GtkHTMLParagraphStyle style)491 html_engine_save_get_paragraph_style (GtkHTMLParagraphStyle style)
492 {
493 switch (style) {
494 case GTK_HTML_PARAGRAPH_STYLE_NORMAL:
495 return NULL;
496 case GTK_HTML_PARAGRAPH_STYLE_H1:
497 return "h1";
498 case GTK_HTML_PARAGRAPH_STYLE_H2:
499 return "h2";
500 case GTK_HTML_PARAGRAPH_STYLE_H3:
501 return "h3";
502 case GTK_HTML_PARAGRAPH_STYLE_H4:
503 return "h4";
504 case GTK_HTML_PARAGRAPH_STYLE_H5:
505 return "h5";
506 case GTK_HTML_PARAGRAPH_STYLE_H6:
507 return "h6";
508 case GTK_HTML_PARAGRAPH_STYLE_ADDRESS:
509 return "address";
510 case GTK_HTML_PARAGRAPH_STYLE_PRE:
511 return "pre";
512 case GTK_HTML_PARAGRAPH_STYLE_ITEMDOTTED:
513 case GTK_HTML_PARAGRAPH_STYLE_ITEMROMAN:
514 case GTK_HTML_PARAGRAPH_STYLE_ITEMDIGIT:
515 case GTK_HTML_PARAGRAPH_STYLE_ITEMALPHA:
516 return "li";
517 }
518
519 g_warning ("Unknown GtkHTMLParagraphStyle %d", style);
520
521 return NULL;
522 }
523
524 const gchar *
html_engine_save_get_paragraph_align(GtkHTMLParagraphAlignment align)525 html_engine_save_get_paragraph_align (GtkHTMLParagraphAlignment align)
526 {
527 switch (align) {
528 case GTK_HTML_PARAGRAPH_ALIGNMENT_RIGHT:
529 return "right";
530 case GTK_HTML_PARAGRAPH_ALIGNMENT_CENTER:
531 return "center";
532 case GTK_HTML_PARAGRAPH_ALIGNMENT_LEFT:
533 return "left";
534 }
535
536 g_warning ("Unknown GtkHTMLParagraphAlignment %d", align);
537
538 return NULL;
539 }
540
541 gint
html_engine_save_string_append_nonbsp(GString * out,const guchar * s,guint length)542 html_engine_save_string_append_nonbsp (GString *out,
543 const guchar *s,
544 guint length)
545 {
546 guint len = length;
547
548 while (len--) {
549 if (IS_UTF8_NBSP (s)) {
550 g_string_append_c (out, ' ');
551 s += 2;
552 len--;
553 } else {
554 g_string_append_c (out, *s);
555 s++;
556 }
557 }
558 return length;
559 }
560