1 /*
2  * Copyright 2007 James Bursa <bursa@users.sourceforge.net>
3  * Copyright 2010 Michael Drake <tlsa@netsurf-browser.org>
4  *
5  * This file is part of NetSurf, http://www.netsurf-browser.org/
6  *
7  * NetSurf is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; version 2 of the License.
10  *
11  * NetSurf is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 /**
21  * \file
22  * Implementation of HTML content handling.
23  */
24 
25 #include <assert.h>
26 #include <stdint.h>
27 #include <string.h>
28 #include <strings.h>
29 #include <stdlib.h>
30 #include <nsutils/time.h>
31 
32 #include "utils/utils.h"
33 #include "utils/config.h"
34 #include "utils/corestrings.h"
35 #include "utils/http.h"
36 #include "utils/libdom.h"
37 #include "utils/log.h"
38 #include "utils/messages.h"
39 #include "utils/talloc.h"
40 #include "utils/utf8.h"
41 #include "utils/nsoption.h"
42 #include "utils/string.h"
43 #include "utils/ascii.h"
44 #include "netsurf/content.h"
45 #include "netsurf/browser_window.h"
46 #include "netsurf/utf8.h"
47 #include "netsurf/keypress.h"
48 #include "netsurf/layout.h"
49 #include "netsurf/misc.h"
50 #include "content/hlcache.h"
51 #include "content/content_factory.h"
52 #include "content/textsearch.h"
53 #include "desktop/selection.h"
54 #include "desktop/scrollbar.h"
55 #include "desktop/textarea.h"
56 #include "netsurf/bitmap.h"
57 #include "javascript/js.h"
58 #include "desktop/gui_internal.h"
59 
60 #include "html/html.h"
61 #include "html/private.h"
62 #include "html/dom_event.h"
63 #include "html/css.h"
64 #include "html/object.h"
65 #include "html/html_save.h"
66 #include "html/interaction.h"
67 #include "html/box.h"
68 #include "html/box_construct.h"
69 #include "html/box_inspect.h"
70 #include "html/form_internal.h"
71 #include "html/imagemap.h"
72 #include "html/layout.h"
73 #include "html/textselection.h"
74 
75 #define CHUNK 4096
76 
77 /* Change these to 1 to cause a dump to stderr of the frameset or box
78  * when the trees have been built.
79  */
80 #define ALWAYS_DUMP_FRAMESET 0
81 #define ALWAYS_DUMP_BOX 0
82 
83 static const char *html_types[] = {
84 	"application/xhtml+xml",
85 	"text/html"
86 };
87 
88 /**
89  * Fire an event at the DOM
90  *
91  * Helper that swallows DOM errors.
92  *
93  * \param[in] event   the event to fire at the DOM
94  * \param[in] target  the event target
95  * \return true on success
96  */
fire_dom_event(dom_event * event,dom_node * target)97 static bool fire_dom_event(dom_event *event, dom_node *target)
98 {
99 	dom_exception exc;
100 	bool result;
101 
102 	exc = dom_event_target_dispatch_event(target, event, &result);
103 	if (exc != DOM_NO_ERR) {
104 		return false;
105 	}
106 
107 	return result;
108 }
109 
110 /* Exported interface, see html_internal.h */
fire_generic_dom_event(dom_string * type,dom_node * target,bool bubbles,bool cancelable)111 bool fire_generic_dom_event(dom_string *type, dom_node *target,
112 		bool bubbles, bool cancelable)
113 {
114 	dom_exception exc;
115 	dom_event *evt;
116 	bool result;
117 
118 	exc = dom_event_create(&evt);
119 	if (exc != DOM_NO_ERR) return false;
120 	exc = dom_event_init(evt, type, bubbles, cancelable);
121 	if (exc != DOM_NO_ERR) {
122 		dom_event_unref(evt);
123 		return false;
124 	}
125 	NSLOG(netsurf, INFO, "Dispatching '%*s' against %p",
126 	      dom_string_length(type), dom_string_data(type), target);
127 	result = fire_dom_event(evt, target);
128 	dom_event_unref(evt);
129 	return result;
130 }
131 
132 /* Exported interface, see html_internal.h */
fire_dom_keyboard_event(dom_string * type,dom_node * target,bool bubbles,bool cancelable,uint32_t key)133 bool fire_dom_keyboard_event(dom_string *type, dom_node *target,
134 		bool bubbles, bool cancelable, uint32_t key)
135 {
136 	bool is_special = key <= 0x001F || (0x007F <= key && key <= 0x009F);
137 	dom_string *dom_key = NULL;
138 	dom_keyboard_event *evt;
139 	dom_exception exc;
140 	bool result;
141 
142 	if (is_special) {
143 		switch (key) {
144 		case NS_KEY_ESCAPE:
145 			dom_key = dom_string_ref(corestring_dom_Escape);
146 			break;
147 		case NS_KEY_LEFT:
148 			dom_key = dom_string_ref(corestring_dom_ArrowLeft);
149 			break;
150 		case NS_KEY_RIGHT:
151 			dom_key = dom_string_ref(corestring_dom_ArrowRight);
152 			break;
153 		case NS_KEY_UP:
154 			dom_key = dom_string_ref(corestring_dom_ArrowUp);
155 			break;
156 		case NS_KEY_DOWN:
157 			dom_key = dom_string_ref(corestring_dom_ArrowDown);
158 			break;
159 		case NS_KEY_PAGE_UP:
160 			dom_key = dom_string_ref(corestring_dom_PageUp);
161 			break;
162 		case NS_KEY_PAGE_DOWN:
163 			dom_key = dom_string_ref(corestring_dom_PageDown);
164 			break;
165 		case NS_KEY_TEXT_START:
166 			dom_key = dom_string_ref(corestring_dom_Home);
167 			break;
168 		case NS_KEY_TEXT_END:
169 			dom_key = dom_string_ref(corestring_dom_End);
170 			break;
171 		default:
172 			dom_key = NULL;
173 			break;
174 		}
175 	} else {
176 		char utf8[6];
177 		size_t length = utf8_from_ucs4(key, utf8);
178 		utf8[length] = '\0';
179 
180 		exc = dom_string_create((const uint8_t *)utf8, strlen(utf8),
181 				&dom_key);
182 		if (exc != DOM_NO_ERR) {
183 			return exc;
184 		}
185 	}
186 
187 	exc = dom_keyboard_event_create(&evt);
188 	if (exc != DOM_NO_ERR) {
189 		dom_string_unref(dom_key);
190 		return false;
191 	}
192 
193 	exc = dom_keyboard_event_init(evt, type, bubbles, cancelable, NULL,
194 			dom_key, NULL, DOM_KEY_LOCATION_STANDARD, false,
195 			false, false, false, false, false);
196 	dom_string_unref(dom_key);
197 	if (exc != DOM_NO_ERR) {
198 		dom_event_unref(evt);
199 		return false;
200 	}
201 
202 	NSLOG(netsurf, INFO, "Dispatching '%*s' against %p",
203 			dom_string_length(type), dom_string_data(type), target);
204 
205 	result = fire_dom_event((dom_event *) evt, target);
206 	dom_event_unref(evt);
207 	return result;
208 }
209 
210 /**
211  * Perform post-box-creation conversion of a document
212  *
213  * \param c        HTML content to complete conversion of
214  * \param success  Whether box tree construction was successful
215  */
html_box_convert_done(html_content * c,bool success)216 static void html_box_convert_done(html_content *c, bool success)
217 {
218 	nserror err;
219 	dom_exception exc; /* returned by libdom functions */
220 	dom_node *html;
221 
222 	NSLOG(netsurf, INFO, "DOM to box conversion complete (content %p)", c);
223 
224 	c->box_conversion_context = NULL;
225 
226 	/* Clean up and report error if unsuccessful or aborted */
227 	if ((success == false) || (c->aborted)) {
228 		html_object_free_objects(c);
229 
230 		if (success == false) {
231 			content_broadcast_error(&c->base, NSERROR_BOX_CONVERT, NULL);
232 		} else {
233 			content_broadcast_error(&c->base, NSERROR_STOPPED, NULL);
234 		}
235 
236 		content_set_error(&c->base);
237 		return;
238 	}
239 
240 
241 #if ALWAYS_DUMP_BOX
242 	box_dump(stderr, c->layout->children, 0, true);
243 #endif
244 #if ALWAYS_DUMP_FRAMESET
245 	if (c->frameset)
246 		html_dump_frameset(c->frameset, 0);
247 #endif
248 
249 	exc = dom_document_get_document_element(c->document, (void *) &html);
250 	if ((exc != DOM_NO_ERR) || (html == NULL)) {
251 		/** @todo should this call html_object_free_objects(c);
252 		 * like the other error paths
253 		 */
254 		NSLOG(netsurf, INFO, "error retrieving html element from dom");
255 		content_broadcast_error(&c->base, NSERROR_DOM, NULL);
256 		content_set_error(&c->base);
257 		return;
258 	}
259 
260 	/* extract image maps - can't do this sensibly in dom_to_box */
261 	err = imagemap_extract(c);
262 	if (err != NSERROR_OK) {
263 		NSLOG(netsurf, INFO, "imagemap extraction failed");
264 		html_object_free_objects(c);
265 		content_broadcast_error(&c->base, err, NULL);
266 		content_set_error(&c->base);
267 		dom_node_unref(html);
268 		return;
269 	}
270 	/*imagemap_dump(c);*/
271 
272 	/* Destroy the parser binding */
273 	dom_hubbub_parser_destroy(c->parser);
274 	c->parser = NULL;
275 
276 	content_set_ready(&c->base);
277 
278 	html_proceed_to_done(c);
279 
280 	dom_node_unref(html);
281 }
282 
283 /* Documented in html_internal.h */
284 nserror
html_proceed_to_done(html_content * html)285 html_proceed_to_done(html_content *html)
286 {
287 	switch (content__get_status(&html->base)) {
288 	case CONTENT_STATUS_READY:
289 		if (html->base.active == 0) {
290 			content_set_done(&html->base);
291 			return NSERROR_OK;
292 		}
293 		break;
294 	case CONTENT_STATUS_DONE:
295 		/* fallthrough */
296 	case CONTENT_STATUS_LOADING:
297 		return NSERROR_OK;
298 	default:
299 		NSLOG(netsurf, ERROR, "Content status unexpectedly not LOADING/READY/DONE");
300 		break;
301 	}
302 	return NSERROR_UNKNOWN;
303 }
304 
305 
html_get_dimensions(html_content * htmlc)306 static void html_get_dimensions(html_content *htmlc)
307 {
308 	unsigned w;
309 	unsigned h;
310 	union content_msg_data msg_data = {
311 		.getdims = {
312 			.viewport_width = &w,
313 			.viewport_height = &h,
314 		},
315 	};
316 
317 	content_broadcast(&htmlc->base, CONTENT_MSG_GETDIMS, &msg_data);
318 
319 	htmlc->media.width  = nscss_pixels_physical_to_css(INTTOFIX(w));
320 	htmlc->media.height = nscss_pixels_physical_to_css(INTTOFIX(h));
321 	htmlc->media.client_font_size =
322 			FDIV(INTTOFIX(nsoption_int(font_size)), F_10);
323 	htmlc->media.client_line_height =
324 			FMUL(nscss_len2px(NULL, htmlc->media.client_font_size,
325 					CSS_UNIT_PT, NULL), FLTTOFIX(1.33));
326 }
327 
328 /* exported function documented in html/html_internal.h */
html_finish_conversion(html_content * htmlc)329 void html_finish_conversion(html_content *htmlc)
330 {
331 	union content_msg_data msg_data;
332 	dom_exception exc; /* returned by libdom functions */
333 	dom_node *html;
334 	nserror error;
335 
336 	/* Bail out if we've been aborted */
337 	if (htmlc->aborted) {
338 		content_broadcast_error(&htmlc->base, NSERROR_STOPPED, NULL);
339 		content_set_error(&htmlc->base);
340 		return;
341 	}
342 
343 	/* If we already have a selection context, then we have already
344 	 * "finished" conversion.  We can get here twice if e.g. some JS
345 	 * adds a new stylesheet, and the stylesheet gets added after
346 	 * the HTML content is initially finished.
347 	 *
348 	 * If we didn't do this, the HTML content would try to rebuild the
349 	 * box tree for the html content when this new stylesheet is ready.
350 	 * NetSurf has no concept of dynamically changing documents, so this
351 	 * would break badly.
352 	 */
353 	if (htmlc->select_ctx != NULL) {
354 		NSLOG(netsurf, INFO,
355 				"Ignoring style change: NS layout is static.");
356 		return;
357 	}
358 
359 	/* create new css selection context */
360 	error = html_css_new_selection_context(htmlc, &htmlc->select_ctx);
361 	if (error != NSERROR_OK) {
362 		content_broadcast_error(&htmlc->base, error, NULL);
363 		content_set_error(&htmlc->base);
364 		return;
365 	}
366 
367 
368 	/* fire a simple event named load at the Document's Window
369 	 * object, but with its target set to the Document object (and
370 	 * the currentTarget set to the Window object)
371 	 */
372 	if (htmlc->jsthread != NULL) {
373 		js_fire_event(htmlc->jsthread, "load", htmlc->document, NULL);
374 	}
375 
376 	/* convert dom tree to box tree */
377 	NSLOG(netsurf, INFO, "DOM to box (%p)", htmlc);
378 	content_set_status(&htmlc->base, messages_get("Processing"));
379 	msg_data.explicit_status_text = NULL;
380 	content_broadcast(&htmlc->base, CONTENT_MSG_STATUS, &msg_data);
381 
382 	exc = dom_document_get_document_element(htmlc->document, (void *) &html);
383 	if ((exc != DOM_NO_ERR) || (html == NULL)) {
384 		NSLOG(netsurf, INFO, "error retrieving html element from dom");
385 		content_broadcast_error(&htmlc->base, NSERROR_DOM, NULL);
386 		content_set_error(&htmlc->base);
387 		return;
388 	}
389 
390 	html_get_dimensions(htmlc);
391 
392 	error = dom_to_box(html, htmlc, html_box_convert_done, &htmlc->box_conversion_context);
393 	if (error != NSERROR_OK) {
394 		NSLOG(netsurf, INFO, "box conversion failed");
395 		dom_node_unref(html);
396 		html_object_free_objects(htmlc);
397 		content_broadcast_error(&htmlc->base, error, NULL);
398 		content_set_error(&htmlc->base);
399 		return;
400 	}
401 
402 	dom_node_unref(html);
403 }
404 
405 
406 static void
html_document_user_data_handler(dom_node_operation operation,dom_string * key,void * data,struct dom_node * src,struct dom_node * dst)407 html_document_user_data_handler(dom_node_operation operation,
408 				dom_string *key, void *data,
409 				struct dom_node *src,
410 				struct dom_node *dst)
411 {
412 	if (dom_string_isequal(corestring_dom___ns_key_html_content_data,
413 			       key) == false || data == NULL) {
414 		return;
415 	}
416 
417 	switch (operation) {
418 	case DOM_NODE_CLONED:
419 		NSLOG(netsurf, INFO, "Cloned");
420 		break;
421 	case DOM_NODE_RENAMED:
422 		NSLOG(netsurf, INFO, "Renamed");
423 		break;
424 	case DOM_NODE_IMPORTED:
425 		NSLOG(netsurf, INFO, "imported");
426 		break;
427 	case DOM_NODE_ADOPTED:
428 		NSLOG(netsurf, INFO, "Adopted");
429 		break;
430 	case DOM_NODE_DELETED:
431 		/* This is the only path I expect */
432 		break;
433 	default:
434 		NSLOG(netsurf, INFO, "User data operation not handled.");
435 		assert(0);
436 	}
437 }
438 
439 
440 static nserror
html_create_html_data(html_content * c,const http_parameter * params)441 html_create_html_data(html_content *c, const http_parameter *params)
442 {
443 	lwc_string *charset;
444 	nserror nerror;
445 	dom_hubbub_parser_params parse_params;
446 	dom_hubbub_error error;
447 	dom_exception err;
448 	void *old_node_data;
449 
450 	c->parser = NULL;
451 	c->parse_completed = false;
452 	c->conversion_begun = false;
453 	c->document = NULL;
454 	c->quirks = DOM_DOCUMENT_QUIRKS_MODE_NONE;
455 	c->encoding = NULL;
456 	c->base_url = nsurl_ref(content_get_url(&c->base));
457 	c->base_target = NULL;
458 	c->aborted = false;
459 	c->refresh = false;
460 	c->reflowing = false;
461 	c->title = NULL;
462 	c->bctx = NULL;
463 	c->layout = NULL;
464 	c->background_colour = NS_TRANSPARENT;
465 	c->stylesheet_count = 0;
466 	c->stylesheets = NULL;
467 	c->select_ctx = NULL;
468 	c->media.type = CSS_MEDIA_SCREEN;
469 	c->universal = NULL;
470 	c->num_objects = 0;
471 	c->object_list = NULL;
472 	c->forms = NULL;
473 	c->imagemaps = NULL;
474 	c->bw = NULL;
475 	c->frameset = NULL;
476 	c->iframe = NULL;
477 	c->page = NULL;
478 	c->font_func = guit->layout;
479 	c->drag_type = HTML_DRAG_NONE;
480 	c->drag_owner.no_owner = true;
481 	c->selection_type = HTML_SELECTION_NONE;
482 	c->selection_owner.none = true;
483 	c->focus_type = HTML_FOCUS_SELF;
484 	c->focus_owner.self = true;
485 	c->scripts_count = 0;
486 	c->scripts = NULL;
487 	c->jsthread = NULL;
488 
489 	c->enable_scripting = nsoption_bool(enable_javascript);
490 	c->base.active = 1; /* The html content itself is active */
491 
492 	if (lwc_intern_string("*", SLEN("*"), &c->universal) != lwc_error_ok) {
493 		return NSERROR_NOMEM;
494 	}
495 
496 	c->sel = selection_create((struct content *)c);
497 
498 	nerror = http_parameter_list_find_item(params, corestring_lwc_charset, &charset);
499 	if (nerror == NSERROR_OK) {
500 		c->encoding = strdup(lwc_string_data(charset));
501 
502 		lwc_string_unref(charset);
503 
504 		if (c->encoding == NULL) {
505 			lwc_string_unref(c->universal);
506 			c->universal = NULL;
507 			return NSERROR_NOMEM;
508 
509 		}
510 		c->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_HEADER;
511 	}
512 
513 	/* Create the parser binding */
514 	parse_params.enc = c->encoding;
515 	parse_params.fix_enc = true;
516 	parse_params.enable_script = c->enable_scripting;
517 	parse_params.msg = NULL;
518 	parse_params.script = html_process_script;
519 	parse_params.ctx = c;
520 	parse_params.daf = html_dom_event_fetcher;
521 
522 	error = dom_hubbub_parser_create(&parse_params,
523 					 &c->parser,
524 					 &c->document);
525 	if ((error != DOM_HUBBUB_OK) && (c->encoding != NULL)) {
526 		/* Ok, we don't support the declared encoding. Bailing out
527 		 * isn't exactly user-friendly, so fall back to autodetect */
528 		free(c->encoding);
529 		c->encoding = NULL;
530 
531 		parse_params.enc = c->encoding;
532 
533 		error = dom_hubbub_parser_create(&parse_params,
534 						 &c->parser,
535 						 &c->document);
536 	}
537 	if (error != DOM_HUBBUB_OK) {
538 		nsurl_unref(c->base_url);
539 		c->base_url = NULL;
540 
541 		lwc_string_unref(c->universal);
542 		c->universal = NULL;
543 
544 		return libdom_hubbub_error_to_nserror(error);
545 	}
546 
547 	err = dom_node_set_user_data(c->document,
548 				     corestring_dom___ns_key_html_content_data,
549 				     c, html_document_user_data_handler,
550 				     (void *) &old_node_data);
551 	if (err != DOM_NO_ERR) {
552 		dom_hubbub_parser_destroy(c->parser);
553 		c->parser = NULL;
554 		nsurl_unref(c->base_url);
555 		c->base_url = NULL;
556 
557 		lwc_string_unref(c->universal);
558 		c->universal = NULL;
559 
560 		NSLOG(netsurf, INFO, "Unable to set user data.");
561 		return NSERROR_DOM;
562 	}
563 
564 	assert(old_node_data == NULL);
565 
566 	return NSERROR_OK;
567 
568 }
569 
570 /**
571  * Create a CONTENT_HTML.
572  *
573  * The content_html_data structure is initialized and the HTML parser is
574  * created.
575  */
576 
577 static nserror
html_create(const content_handler * handler,lwc_string * imime_type,const http_parameter * params,llcache_handle * llcache,const char * fallback_charset,bool quirks,struct content ** c)578 html_create(const content_handler *handler,
579 	    lwc_string *imime_type,
580 	    const http_parameter *params,
581 	    llcache_handle *llcache,
582 	    const char *fallback_charset,
583 	    bool quirks,
584 	    struct content **c)
585 {
586 	html_content *html;
587 	nserror error;
588 
589 	html = calloc(1, sizeof(html_content));
590 	if (html == NULL)
591 		return NSERROR_NOMEM;
592 
593 	error = content__init(&html->base, handler, imime_type, params,
594 			llcache, fallback_charset, quirks);
595 	if (error != NSERROR_OK) {
596 		free(html);
597 		return error;
598 	}
599 
600 	error = html_create_html_data(html, params);
601 	if (error != NSERROR_OK) {
602 		content_broadcast_error(&html->base, error, NULL);
603 		free(html);
604 		return error;
605 	}
606 
607 	error = html_css_new_stylesheets(html);
608 	if (error != NSERROR_OK) {
609 		content_broadcast_error(&html->base, error, NULL);
610 		free(html);
611 		return error;
612 	}
613 
614 	*c = (struct content *) html;
615 
616 	return NSERROR_OK;
617 }
618 
619 
620 
621 static nserror
html_process_encoding_change(struct content * c,const char * data,unsigned int size)622 html_process_encoding_change(struct content *c,
623 			     const char *data,
624 			     unsigned int size)
625 {
626 	html_content *html = (html_content *) c;
627 	dom_hubbub_parser_params parse_params;
628 	dom_hubbub_error error;
629 	const char *encoding;
630 	const uint8_t *source_data;
631 	size_t source_size;
632 
633 	/* Retrieve new encoding */
634 	encoding = dom_hubbub_parser_get_encoding(html->parser,
635 						  &html->encoding_source);
636 	if (encoding == NULL) {
637 		return NSERROR_NOMEM;
638 	}
639 
640 	if (html->encoding != NULL) {
641 		free(html->encoding);
642 		html->encoding = NULL;
643 	}
644 
645 	html->encoding = strdup(encoding);
646 	if (html->encoding == NULL) {
647 		return NSERROR_NOMEM;
648 	}
649 
650 	/* Destroy binding */
651 	dom_hubbub_parser_destroy(html->parser);
652 	html->parser = NULL;
653 
654 	if (html->document != NULL) {
655 		dom_node_unref(html->document);
656 	}
657 
658 	parse_params.enc = html->encoding;
659 	parse_params.fix_enc = true;
660 	parse_params.enable_script = html->enable_scripting;
661 	parse_params.msg = NULL;
662 	parse_params.script = html_process_script;
663 	parse_params.ctx = html;
664 	parse_params.daf = html_dom_event_fetcher;
665 
666 	/* Create new binding, using the new encoding */
667 	error = dom_hubbub_parser_create(&parse_params,
668 					 &html->parser,
669 					 &html->document);
670 	if (error != DOM_HUBBUB_OK) {
671 		/* Ok, we don't support the declared encoding. Bailing out
672 		 * isn't exactly user-friendly, so fall back to Windows-1252 */
673 		free(html->encoding);
674 		html->encoding = strdup("Windows-1252");
675 		if (html->encoding == NULL) {
676 			return NSERROR_NOMEM;
677 		}
678 		parse_params.enc = html->encoding;
679 
680 		error = dom_hubbub_parser_create(&parse_params,
681 						 &html->parser,
682 						 &html->document);
683 
684 		if (error != DOM_HUBBUB_OK) {
685 			return libdom_hubbub_error_to_nserror(error);
686 		}
687 
688 	}
689 
690 	source_data = content__get_source_data(c, &source_size);
691 
692 	/* Reprocess all the data.  This is safe because
693 	 * the encoding is now specified at parser start which means
694 	 * it cannot be changed again.
695 	 */
696 	error = dom_hubbub_parser_parse_chunk(html->parser,
697 					      source_data,
698 					      source_size);
699 
700 	return libdom_hubbub_error_to_nserror(error);
701 }
702 
703 
704 /**
705  * Process data for CONTENT_HTML.
706  */
707 
708 static bool
html_process_data(struct content * c,const char * data,unsigned int size)709 html_process_data(struct content *c, const char *data, unsigned int size)
710 {
711 	html_content *html = (html_content *) c;
712 	dom_hubbub_error dom_ret;
713 	nserror err = NSERROR_OK; /* assume its all going to be ok */
714 
715 	dom_ret = dom_hubbub_parser_parse_chunk(html->parser,
716 					      (const uint8_t *) data,
717 					      size);
718 
719 	err = libdom_hubbub_error_to_nserror(dom_ret);
720 
721 	/* deal with encoding change */
722 	if (err == NSERROR_ENCODING_CHANGE) {
723 		 err = html_process_encoding_change(c, data, size);
724 	}
725 
726 	/* broadcast the error if necessary */
727 	if (err != NSERROR_OK) {
728 		content_broadcast_error(c, err, NULL);
729 		return false;
730 	}
731 
732 	return true;
733 }
734 
735 
736 /**
737  * Convert a CONTENT_HTML for display.
738  *
739  * The following steps are carried out in order:
740  *
741  *  - parsing to an XML tree is completed
742  *  - stylesheets are fetched
743  *  - the XML tree is converted to a box tree and object fetches are started
744  *
745  * On exit, the content status will be either CONTENT_STATUS_DONE if the
746  * document is completely loaded or CONTENT_STATUS_READY if objects are still
747  * being fetched.
748  */
749 
html_convert(struct content * c)750 static bool html_convert(struct content *c)
751 {
752 	html_content *htmlc = (html_content *) c;
753 	dom_exception exc; /* returned by libdom functions */
754 
755 	/* The quirk check and associated stylesheet fetch is "safe"
756 	 * once the root node has been inserted into the document
757 	 * which must have happened by this point in the parse.
758 	 *
759 	 * faliure to retrive the quirk mode or to start the
760 	 * stylesheet fetch is non fatal as this "only" affects the
761 	 * render and it would annoy the user to fail the entire
762 	 * render for want of a quirks stylesheet.
763 	 */
764 	exc = dom_document_get_quirks_mode(htmlc->document, &htmlc->quirks);
765 	if (exc == DOM_NO_ERR) {
766 		html_css_quirks_stylesheets(htmlc);
767 		NSLOG(netsurf, INFO, "quirks set to %d", htmlc->quirks);
768 	}
769 
770 	htmlc->base.active--; /* the html fetch is no longer active */
771 	NSLOG(netsurf, INFO, "%d fetches active (%p)", htmlc->base.active, c);
772 
773 	/* The parse cannot be completed here because it may be paused
774 	 * untill all the resources being fetched have completed.
775 	 */
776 
777 	/* if there are no active fetches in progress no scripts are
778 	 * being fetched or they completed already.
779 	 */
780 	if (html_can_begin_conversion(htmlc)) {
781 		return html_begin_conversion(htmlc);
782 	}
783 	return true;
784 }
785 
786 /* Exported interface documented in html_internal.h */
html_can_begin_conversion(html_content * htmlc)787 bool html_can_begin_conversion(html_content *htmlc)
788 {
789 	unsigned int i;
790 
791 	/* Cannot begin conversion if we're still fetching stuff */
792 	if (htmlc->base.active != 0)
793 		return false;
794 
795 	for (i = 0; i != htmlc->stylesheet_count; i++) {
796 		/* Cannot begin conversion if the stylesheets are modified */
797 		if (htmlc->stylesheets[i].modified)
798 			return false;
799 	}
800 
801 	/* All is good, begin */
802 	return true;
803 }
804 
805 bool
html_begin_conversion(html_content * htmlc)806 html_begin_conversion(html_content *htmlc)
807 {
808 	dom_node *html;
809 	nserror ns_error;
810 	struct form *f;
811 	dom_exception exc; /* returned by libdom functions */
812 	dom_string *node_name = NULL;
813 	dom_hubbub_error error;
814 
815 	/* The act of completing the parse can result in additional data
816 	 * being flushed through the parser. This may result in new style or
817 	 * script nodes, upon which the conversion depends. Thus, once we
818 	 * have completed the parse, we must check again to see if we can
819 	 * begin the conversion. If we can't, we must stop and wait for the
820 	 * new styles/scripts to be processed. Once they have been processed,
821 	 * we will be called again to begin the conversion for real. Thus,
822 	 * we must also ensure that we don't attempt to complete the parse
823 	 * multiple times, so store a flag to indicate that parsing is
824 	 * complete to avoid repeating the completion pointlessly.
825 	 */
826 	if (htmlc->parse_completed == false) {
827 		NSLOG(netsurf, INFO, "Completing parse (%p)", htmlc);
828 		/* complete parsing */
829 		error = dom_hubbub_parser_completed(htmlc->parser);
830 		if (error == DOM_HUBBUB_HUBBUB_ERR_PAUSED && htmlc->base.active > 0) {
831 			/* The act of completing the parse failed because we've
832 			 * encountered a sync script which needs to run
833 			 */
834 			NSLOG(netsurf, INFO, "Completing parse brought synchronous JS to light, cannot complete yet");
835 			return true;
836 		}
837 		if (error != DOM_HUBBUB_OK) {
838 			NSLOG(netsurf, INFO, "Parsing failed");
839 
840 			content_broadcast_error(&htmlc->base,
841 						libdom_hubbub_error_to_nserror(error),
842 						NULL);
843 
844 			return false;
845 		}
846 		htmlc->parse_completed = true;
847 	}
848 
849 	if (html_can_begin_conversion(htmlc) == false) {
850 		NSLOG(netsurf, INFO, "Can't begin conversion (%p)", htmlc);
851 		/* We can't proceed (see commentary above) */
852 		return true;
853 	}
854 
855 	/* Give up processing if we've been aborted */
856 	if (htmlc->aborted) {
857 		NSLOG(netsurf, INFO, "Conversion aborted (%p) (active: %u)",
858 		      htmlc, htmlc->base.active);
859 		content_set_error(&htmlc->base);
860 		content_broadcast_error(&htmlc->base, NSERROR_STOPPED, NULL);
861 		return false;
862 	}
863 
864 	/* Conversion begins proper at this point */
865 	htmlc->conversion_begun = true;
866 
867 	/* complete script execution, including deferred scripts */
868 	html_script_exec(htmlc, true);
869 
870 	/* fire a simple event that bubbles named DOMContentLoaded at
871 	 * the Document.
872 	 */
873 
874 	/* get encoding */
875 	if (htmlc->encoding == NULL) {
876 		const char *encoding;
877 
878 		encoding = dom_hubbub_parser_get_encoding(htmlc->parser,
879 					&htmlc->encoding_source);
880 		if (encoding == NULL) {
881 			content_broadcast_error(&htmlc->base,
882 						NSERROR_NOMEM,
883 						NULL);
884 			return false;
885 		}
886 
887 		htmlc->encoding = strdup(encoding);
888 		if (htmlc->encoding == NULL) {
889 			content_broadcast_error(&htmlc->base,
890 						NSERROR_NOMEM,
891 						NULL);
892 			return false;
893 		}
894 	}
895 
896 	/* locate root element and ensure it is html */
897 	exc = dom_document_get_document_element(htmlc->document, (void *) &html);
898 	if ((exc != DOM_NO_ERR) || (html == NULL)) {
899 		NSLOG(netsurf, INFO, "error retrieving html element from dom");
900 		content_broadcast_error(&htmlc->base, NSERROR_DOM, NULL);
901 		return false;
902 	}
903 
904 	exc = dom_node_get_node_name(html, &node_name);
905 	if ((exc != DOM_NO_ERR) ||
906 	    (node_name == NULL) ||
907 	    (!dom_string_caseless_lwc_isequal(node_name,
908 			corestring_lwc_html))) {
909 		NSLOG(netsurf, INFO, "root element not html");
910 		content_broadcast_error(&htmlc->base, NSERROR_DOM, NULL);
911 		dom_node_unref(html);
912 		return false;
913 	}
914 	dom_string_unref(node_name);
915 
916 	/* Retrieve forms from parser */
917 	htmlc->forms = html_forms_get_forms(htmlc->encoding,
918 			(dom_html_document *) htmlc->document);
919 	for (f = htmlc->forms; f != NULL; f = f->prev) {
920 		nsurl *action;
921 
922 		/* Make all actions absolute */
923 		if (f->action == NULL || f->action[0] == '\0') {
924 			/* HTML5 4.10.22.3 step 9 */
925 			nsurl *doc_addr = content_get_url(&htmlc->base);
926 			ns_error = nsurl_join(htmlc->base_url,
927 					      nsurl_access(doc_addr),
928 					      &action);
929 		} else {
930 			ns_error = nsurl_join(htmlc->base_url,
931 					      f->action,
932 					      &action);
933 		}
934 
935 		if (ns_error != NSERROR_OK) {
936 			content_broadcast_error(&htmlc->base, ns_error, NULL);
937 
938 			dom_node_unref(html);
939 			return false;
940 		}
941 
942 		free(f->action);
943 		f->action = strdup(nsurl_access(action));
944 		nsurl_unref(action);
945 		if (f->action == NULL) {
946 			content_broadcast_error(&htmlc->base,
947 						NSERROR_NOMEM,
948 						NULL);
949 
950 			dom_node_unref(html);
951 			return false;
952 		}
953 
954 		/* Ensure each form has a document encoding */
955 		if (f->document_charset == NULL) {
956 			f->document_charset = strdup(htmlc->encoding);
957 			if (f->document_charset == NULL) {
958 				content_broadcast_error(&htmlc->base,
959 							NSERROR_NOMEM,
960 							NULL);
961 				dom_node_unref(html);
962 				return false;
963 			}
964 		}
965 	}
966 
967 	dom_node_unref(html);
968 
969 	if (htmlc->base.active == 0) {
970 		html_finish_conversion(htmlc);
971 	}
972 
973 	return true;
974 }
975 
976 
977 /**
978  * Stop loading a CONTENT_HTML.
979  *
980  * called when the content is aborted. This must clean up any state
981  * created during the fetch.
982  */
983 
html_stop(struct content * c)984 static void html_stop(struct content *c)
985 {
986 	html_content *htmlc = (html_content *) c;
987 
988 	switch (c->status) {
989 	case CONTENT_STATUS_LOADING:
990 		/* Still loading; simply flag that we've been aborted
991 		 * html_convert/html_finish_conversion will do the rest */
992 		htmlc->aborted = true;
993 		if (htmlc->jsthread != NULL) {
994 			/* Close the JS thread to cancel out any callbacks */
995 			js_closethread(htmlc->jsthread);
996 		}
997 		break;
998 
999 	case CONTENT_STATUS_READY:
1000 		html_object_abort_objects(htmlc);
1001 
1002 		/* If there are no further active fetches and we're still
1003 		 * in the READY state, transition to the DONE state. */
1004 		if (c->status == CONTENT_STATUS_READY && c->active == 0) {
1005 			content_set_done(c);
1006 		}
1007 
1008 		break;
1009 
1010 	case CONTENT_STATUS_DONE:
1011 		/* Nothing to do */
1012 		break;
1013 
1014 	default:
1015 		NSLOG(netsurf, INFO, "Unexpected status %d (%p)", c->status,
1016 		      c);
1017 		assert(0);
1018 	}
1019 }
1020 
1021 
1022 /**
1023  * Reformat a CONTENT_HTML to a new width.
1024  */
1025 
html_reformat(struct content * c,int width,int height)1026 static void html_reformat(struct content *c, int width, int height)
1027 {
1028 	html_content *htmlc = (html_content *) c;
1029 	struct box *layout;
1030 	uint64_t ms_before;
1031 	uint64_t ms_after;
1032 	uint64_t ms_interval;
1033 
1034 	nsu_getmonotonic_ms(&ms_before);
1035 
1036 	htmlc->reflowing = true;
1037 
1038 	htmlc->len_ctx.vw = nscss_pixels_physical_to_css(INTTOFIX(width));
1039 	htmlc->len_ctx.vh = nscss_pixels_physical_to_css(INTTOFIX(height));
1040 	htmlc->len_ctx.root_style = htmlc->layout->style;
1041 
1042 	layout_document(htmlc, width, height);
1043 	layout = htmlc->layout;
1044 
1045 	/* width and height are at least margin box of document */
1046 	c->width = layout->x + layout->padding[LEFT] + layout->width +
1047 		layout->padding[RIGHT] + layout->border[RIGHT].width +
1048 		layout->margin[RIGHT];
1049 	c->height = layout->y + layout->padding[TOP] + layout->height +
1050 		layout->padding[BOTTOM] + layout->border[BOTTOM].width +
1051 		layout->margin[BOTTOM];
1052 
1053 	/* if boxes overflow right or bottom edge, expand to contain it */
1054 	if (c->width < layout->x + layout->descendant_x1)
1055 		c->width = layout->x + layout->descendant_x1;
1056 	if (c->height < layout->y + layout->descendant_y1)
1057 		c->height = layout->y + layout->descendant_y1;
1058 
1059 	selection_reinit(htmlc->sel);
1060 
1061 	htmlc->reflowing = false;
1062 	htmlc->had_initial_layout = true;
1063 
1064 	/* calculate next reflow time at three times what it took to reflow */
1065 	nsu_getmonotonic_ms(&ms_after);
1066 
1067 	ms_interval = (ms_after - ms_before) * 3;
1068 	if (ms_interval < (nsoption_uint(min_reflow_period) * 10)) {
1069 		ms_interval = nsoption_uint(min_reflow_period) * 10;
1070 	}
1071 	c->reformat_time = ms_after + ms_interval;
1072 }
1073 
1074 
1075 /**
1076  * Redraw a box.
1077  *
1078  * \param  h	content containing the box, of type CONTENT_HTML
1079  * \param  box  box to redraw
1080  */
1081 
html_redraw_a_box(hlcache_handle * h,struct box * box)1082 void html_redraw_a_box(hlcache_handle *h, struct box *box)
1083 {
1084 	int x, y;
1085 
1086 	box_coords(box, &x, &y);
1087 
1088 	content_request_redraw(h, x, y,
1089 			box->padding[LEFT] + box->width + box->padding[RIGHT],
1090 			box->padding[TOP] + box->height + box->padding[BOTTOM]);
1091 }
1092 
1093 
1094 /**
1095  * Redraw a box.
1096  *
1097  * \param html  content containing the box, of type CONTENT_HTML
1098  * \param box  box to redraw.
1099  */
1100 
html__redraw_a_box(struct html_content * html,struct box * box)1101 void html__redraw_a_box(struct html_content *html, struct box *box)
1102 {
1103 	int x, y;
1104 
1105 	box_coords(box, &x, &y);
1106 
1107 	content__request_redraw((struct content *)html, x, y,
1108 			box->padding[LEFT] + box->width + box->padding[RIGHT],
1109 			box->padding[TOP] + box->height + box->padding[BOTTOM]);
1110 }
1111 
html_destroy_frameset(struct content_html_frames * frameset)1112 static void html_destroy_frameset(struct content_html_frames *frameset)
1113 {
1114 	int i;
1115 
1116 	if (frameset->name) {
1117 		talloc_free(frameset->name);
1118 		frameset->name = NULL;
1119 	}
1120 	if (frameset->url) {
1121 		talloc_free(frameset->url);
1122 		frameset->url = NULL;
1123 	}
1124 	if (frameset->children) {
1125 		for (i = 0; i < (frameset->rows * frameset->cols); i++) {
1126 			if (frameset->children[i].name) {
1127 				talloc_free(frameset->children[i].name);
1128 				frameset->children[i].name = NULL;
1129 			}
1130 			if (frameset->children[i].url) {
1131 				nsurl_unref(frameset->children[i].url);
1132 				frameset->children[i].url = NULL;
1133 			}
1134 			if (frameset->children[i].children)
1135 				html_destroy_frameset(&frameset->children[i]);
1136 		}
1137 		talloc_free(frameset->children);
1138 		frameset->children = NULL;
1139 	}
1140 }
1141 
html_destroy_iframe(struct content_html_iframe * iframe)1142 static void html_destroy_iframe(struct content_html_iframe *iframe)
1143 {
1144 	struct content_html_iframe *next;
1145 	next = iframe;
1146 	while ((iframe = next) != NULL) {
1147 		next = iframe->next;
1148 		if (iframe->name)
1149 			talloc_free(iframe->name);
1150 		if (iframe->url) {
1151 			nsurl_unref(iframe->url);
1152 			iframe->url = NULL;
1153 		}
1154 		talloc_free(iframe);
1155 	}
1156 }
1157 
1158 
html_free_layout(html_content * htmlc)1159 static void html_free_layout(html_content *htmlc)
1160 {
1161 	if (htmlc->bctx != NULL) {
1162 		/* freeing talloc context should let the entire box
1163 		 * set be destroyed
1164 		 */
1165 		talloc_free(htmlc->bctx);
1166 	}
1167 }
1168 
1169 /**
1170  * Destroy a CONTENT_HTML and free all resources it owns.
1171  */
1172 
html_destroy(struct content * c)1173 static void html_destroy(struct content *c)
1174 {
1175 	html_content *html = (html_content *) c;
1176 	struct form *f, *g;
1177 
1178 	NSLOG(netsurf, INFO, "content %p", c);
1179 
1180 	/* If we're still converting a layout, cancel it */
1181 	if (html->box_conversion_context != NULL) {
1182 		if (cancel_dom_to_box(html->box_conversion_context) != NSERROR_OK) {
1183 			NSLOG(netsurf, CRITICAL, "WARNING, Unable to cancel conversion context, browser may crash");
1184 		}
1185 	}
1186 
1187 	selection_destroy(html->sel);
1188 
1189 	/* Destroy forms */
1190 	for (f = html->forms; f != NULL; f = g) {
1191 		g = f->prev;
1192 
1193 		form_free(f);
1194 	}
1195 
1196 	imagemap_destroy(html);
1197 
1198 	if (c->refresh)
1199 		nsurl_unref(c->refresh);
1200 
1201 	if (html->base_url)
1202 		nsurl_unref(html->base_url);
1203 
1204 	/* At this point we can be moderately confident the JS is offline
1205 	 * so we destroy the JS thread.
1206 	 */
1207 	if (html->jsthread != NULL) {
1208 		js_destroythread(html->jsthread);
1209 		html->jsthread = NULL;
1210 	}
1211 
1212 	if (html->parser != NULL) {
1213 		dom_hubbub_parser_destroy(html->parser);
1214 		html->parser = NULL;
1215 	}
1216 
1217 	if (html->document != NULL) {
1218 		dom_node_unref(html->document);
1219 		html->document = NULL;
1220 	}
1221 
1222 	if (html->title != NULL) {
1223 		dom_node_unref(html->title);
1224 		html->title = NULL;
1225 	}
1226 
1227 	/* Free encoding */
1228 	if (html->encoding != NULL) {
1229 		free(html->encoding);
1230 		html->encoding = NULL;
1231 	}
1232 
1233 	/* Free base target */
1234 	if (html->base_target != NULL) {
1235 		free(html->base_target);
1236 		html->base_target = NULL;
1237 	}
1238 
1239 	/* Free frameset */
1240 	if (html->frameset != NULL) {
1241 		html_destroy_frameset(html->frameset);
1242 		talloc_free(html->frameset);
1243 		html->frameset = NULL;
1244 	}
1245 
1246 	/* Free iframes */
1247 	if (html->iframe != NULL) {
1248 		html_destroy_iframe(html->iframe);
1249 		html->iframe = NULL;
1250 	}
1251 
1252 	/* Destroy selection context */
1253 	if (html->select_ctx != NULL) {
1254 		css_select_ctx_destroy(html->select_ctx);
1255 		html->select_ctx = NULL;
1256 	}
1257 
1258 	if (html->universal != NULL) {
1259 		lwc_string_unref(html->universal);
1260 		html->universal = NULL;
1261 	}
1262 
1263 	/* Free stylesheets */
1264 	html_css_free_stylesheets(html);
1265 
1266 	/* Free scripts */
1267 	html_script_free(html);
1268 
1269 	/* Free objects */
1270 	html_object_free_objects(html);
1271 
1272 	/* free layout */
1273 	html_free_layout(html);
1274 }
1275 
1276 
html_clone(const struct content * old,struct content ** newc)1277 static nserror html_clone(const struct content *old, struct content **newc)
1278 {
1279 	/** \todo Clone HTML specifics */
1280 
1281 	/* In the meantime, we should never be called, as HTML contents
1282 	 * cannot be shared and we're not intending to fix printing's
1283 	 * cloning of documents. */
1284 	assert(0 && "html_clone should never be called");
1285 
1286 	return true;
1287 }
1288 
1289 
1290 /**
1291  * Handle a window containing a CONTENT_HTML being opened.
1292  */
1293 
1294 static nserror
html_open(struct content * c,struct browser_window * bw,struct content * page,struct object_params * params)1295 html_open(struct content *c,
1296 	  struct browser_window *bw,
1297 	  struct content *page,
1298 	  struct object_params *params)
1299 {
1300 	html_content *html = (html_content *) c;
1301 
1302 	html->bw = bw;
1303 	html->page = (html_content *) page;
1304 
1305 	html->drag_type = HTML_DRAG_NONE;
1306 	html->drag_owner.no_owner = true;
1307 
1308 	/* text selection */
1309 	selection_init(html->sel);
1310 	html->selection_type = HTML_SELECTION_NONE;
1311 	html->selection_owner.none = true;
1312 
1313 	html_object_open_objects(html, bw);
1314 
1315 	return NSERROR_OK;
1316 }
1317 
1318 
1319 /**
1320  * Handle a window containing a CONTENT_HTML being closed.
1321  */
1322 
html_close(struct content * c)1323 static nserror html_close(struct content *c)
1324 {
1325 	html_content *htmlc = (html_content *) c;
1326 	nserror ret = NSERROR_OK;
1327 
1328 	selection_clear(htmlc->sel, false);
1329 
1330 	/* clear the html content reference to the browser window */
1331 	htmlc->bw = NULL;
1332 
1333 	/* remove all object references from the html content */
1334 	html_object_close_objects(htmlc);
1335 
1336 	if (htmlc->jsthread != NULL) {
1337 		/* Close, but do not destroy (yet) the JS thread */
1338 		ret = js_closethread(htmlc->jsthread);
1339 	}
1340 
1341 	return ret;
1342 }
1343 
1344 
1345 /**
1346  * Return an HTML content's selection context
1347  */
1348 
html_clear_selection(struct content * c)1349 static void html_clear_selection(struct content *c)
1350 {
1351 	html_content *html = (html_content *) c;
1352 
1353 	switch (html->selection_type) {
1354 	case HTML_SELECTION_NONE:
1355 		/* Nothing to do */
1356 		assert(html->selection_owner.none == true);
1357 		break;
1358 	case HTML_SELECTION_TEXTAREA:
1359 		textarea_clear_selection(html->selection_owner.textarea->
1360 				gadget->data.text.ta);
1361 		break;
1362 	case HTML_SELECTION_SELF:
1363 		assert(html->selection_owner.none == false);
1364 		selection_clear(html->sel, true);
1365 		break;
1366 	case HTML_SELECTION_CONTENT:
1367 		content_clear_selection(html->selection_owner.content->object);
1368 		break;
1369 	default:
1370 		break;
1371 	}
1372 
1373 	/* There is no selection now. */
1374 	html->selection_type = HTML_SELECTION_NONE;
1375 	html->selection_owner.none = true;
1376 }
1377 
1378 
1379 /**
1380  * Return an HTML content's selection context
1381  */
1382 
html_get_selection(struct content * c)1383 static char *html_get_selection(struct content *c)
1384 {
1385 	html_content *html = (html_content *) c;
1386 
1387 	switch (html->selection_type) {
1388 	case HTML_SELECTION_TEXTAREA:
1389 		return textarea_get_selection(html->selection_owner.textarea->
1390 				gadget->data.text.ta);
1391 	case HTML_SELECTION_SELF:
1392 		assert(html->selection_owner.none == false);
1393 		return selection_get_copy(html->sel);
1394 	case HTML_SELECTION_CONTENT:
1395 		return content_get_selection(
1396 				html->selection_owner.content->object);
1397 	case HTML_SELECTION_NONE:
1398 		/* Nothing to do */
1399 		assert(html->selection_owner.none == true);
1400 		break;
1401 	default:
1402 		break;
1403 	}
1404 
1405 	return NULL;
1406 }
1407 
1408 
1409 /**
1410  * Get access to any content, link URLs and objects (images) currently
1411  * at the given (x, y) coordinates.
1412  *
1413  * \param[in] c html content to look inside
1414  * \param[in] x x-coordinate of point of interest
1415  * \param[in] y y-coordinate of point of interest
1416  * \param[out] data Positional features struct to be updated with any
1417  *             relevent content, or set to NULL if none.
1418  * \return NSERROR_OK on success else appropriate error code.
1419  */
1420 static nserror
html_get_contextual_content(struct content * c,int x,int y,struct browser_window_features * data)1421 html_get_contextual_content(struct content *c, int x, int y,
1422 			    struct browser_window_features *data)
1423 {
1424 	html_content *html = (html_content *) c;
1425 
1426 	struct box *box = html->layout;
1427 	struct box *next;
1428 	int box_x = 0, box_y = 0;
1429 
1430 	while ((next = box_at_point(&html->len_ctx, box, x, y,
1431 			&box_x, &box_y)) != NULL) {
1432 		box = next;
1433 
1434 		/* hidden boxes are ignored */
1435 		if ((box->style != NULL) &&
1436 		    css_computed_visibility(box->style) == CSS_VISIBILITY_HIDDEN) {
1437 			continue;
1438 		}
1439 
1440 		if (box->iframe) {
1441 			float scale = browser_window_get_scale(box->iframe);
1442 			browser_window_get_features(box->iframe,
1443 						    (x - box_x) * scale,
1444 						    (y - box_y) * scale,
1445 						    data);
1446 		}
1447 
1448 		if (box->object)
1449 			content_get_contextual_content(box->object,
1450 					x - box_x, y - box_y, data);
1451 
1452 		if (box->object)
1453 			data->object = box->object;
1454 
1455 		if (box->href)
1456 			data->link = box->href;
1457 
1458 		if (box->usemap) {
1459 			const char *target = NULL;
1460 			nsurl *url = imagemap_get(html, box->usemap, box_x,
1461 					box_y, x, y, &target);
1462 			/* Box might have imagemap, but no actual link area
1463 			 * at point */
1464 			if (url != NULL)
1465 				data->link = url;
1466 		}
1467 		if (box->gadget) {
1468 			switch (box->gadget->type) {
1469 			case GADGET_TEXTBOX:
1470 			case GADGET_TEXTAREA:
1471 			case GADGET_PASSWORD:
1472 				data->form_features = CTX_FORM_TEXT;
1473 				break;
1474 
1475 			case GADGET_FILE:
1476 				data->form_features = CTX_FORM_FILE;
1477 				break;
1478 
1479 			default:
1480 				data->form_features = CTX_FORM_NONE;
1481 				break;
1482 			}
1483 		}
1484 	}
1485 	return NSERROR_OK;
1486 }
1487 
1488 
1489 /**
1490  * Scroll deepest thing within the content which can be scrolled at given point
1491  *
1492  * \param c	html content to look inside
1493  * \param x	x-coordinate of point of interest
1494  * \param y	y-coordinate of point of interest
1495  * \param scrx	number of px try to scroll something in x direction
1496  * \param scry	number of px try to scroll something in y direction
1497  * \return true iff scroll was consumed by something in the content
1498  */
1499 static bool
html_scroll_at_point(struct content * c,int x,int y,int scrx,int scry)1500 html_scroll_at_point(struct content *c, int x, int y, int scrx, int scry)
1501 {
1502 	html_content *html = (html_content *) c;
1503 
1504 	struct box *box = html->layout;
1505 	struct box *next;
1506 	int box_x = 0, box_y = 0;
1507 	bool handled_scroll = false;
1508 
1509 	/* TODO: invert order; visit deepest box first */
1510 
1511 	while ((next = box_at_point(&html->len_ctx, box, x, y,
1512 			&box_x, &box_y)) != NULL) {
1513 		box = next;
1514 
1515 		if (box->style && css_computed_visibility(box->style) ==
1516 				CSS_VISIBILITY_HIDDEN)
1517 			continue;
1518 
1519 		/* Pass into iframe */
1520 		if (box->iframe) {
1521 			float scale = browser_window_get_scale(box->iframe);
1522 
1523 			if (browser_window_scroll_at_point(box->iframe,
1524 							   (x - box_x) * scale,
1525 							   (y - box_y) * scale,
1526 							   scrx, scry) == true)
1527 				return true;
1528 		}
1529 
1530 		/* Pass into textarea widget */
1531 		if (box->gadget && (box->gadget->type == GADGET_TEXTAREA ||
1532 				box->gadget->type == GADGET_PASSWORD ||
1533 				box->gadget->type == GADGET_TEXTBOX) &&
1534 				textarea_scroll(box->gadget->data.text.ta,
1535 						scrx, scry) == true)
1536 			return true;
1537 
1538 		/* Pass into object */
1539 		if (box->object != NULL && content_scroll_at_point(
1540 				box->object, x - box_x, y - box_y,
1541 				scrx, scry) == true)
1542 			return true;
1543 
1544 		/* Handle box scrollbars */
1545 		if (box->scroll_y && scrollbar_scroll(box->scroll_y, scry))
1546 			handled_scroll = true;
1547 
1548 		if (box->scroll_x && scrollbar_scroll(box->scroll_x, scrx))
1549 			handled_scroll = true;
1550 
1551 		if (handled_scroll == true)
1552 			return true;
1553 	}
1554 
1555 	return false;
1556 }
1557 
1558 /** Helper for file gadgets to store their filename unencoded on the
1559  * dom node associated with the gadget.
1560  *
1561  * \todo Get rid of this crap eventually
1562  */
html__dom_user_data_handler(dom_node_operation operation,dom_string * key,void * _data,struct dom_node * src,struct dom_node * dst)1563 static void html__dom_user_data_handler(dom_node_operation operation,
1564 		dom_string *key, void *_data, struct dom_node *src,
1565 		struct dom_node *dst)
1566 {
1567 	char *oldfile;
1568 	char *data = (char *)_data;
1569 
1570 	if (!dom_string_isequal(corestring_dom___ns_key_file_name_node_data,
1571 				key) || data == NULL) {
1572 		return;
1573 	}
1574 
1575 	switch (operation) {
1576 	case DOM_NODE_CLONED:
1577 		if (dom_node_set_user_data(dst,
1578 					   corestring_dom___ns_key_file_name_node_data,
1579 					   strdup(data), html__dom_user_data_handler,
1580 					   &oldfile) == DOM_NO_ERR) {
1581 			if (oldfile != NULL)
1582 				free(oldfile);
1583 		}
1584 		break;
1585 
1586 	case DOM_NODE_RENAMED:
1587 	case DOM_NODE_IMPORTED:
1588 	case DOM_NODE_ADOPTED:
1589 		break;
1590 
1591 	case DOM_NODE_DELETED:
1592 		free(data);
1593 		break;
1594 	default:
1595 		NSLOG(netsurf, INFO, "User data operation not handled.");
1596 		assert(0);
1597 	}
1598 }
1599 
html__set_file_gadget_filename(struct content * c,struct form_control * gadget,const char * fn)1600 static void html__set_file_gadget_filename(struct content *c,
1601 	struct form_control *gadget, const char *fn)
1602 {
1603 	nserror ret;
1604 	char *utf8_fn, *oldfile = NULL;
1605 	html_content *html = (html_content *)c;
1606 	struct box *file_box = gadget->box;
1607 
1608 	ret = guit->utf8->local_to_utf8(fn, 0, &utf8_fn);
1609 	if (ret != NSERROR_OK) {
1610 		assert(ret != NSERROR_BAD_ENCODING);
1611 		NSLOG(netsurf, INFO,
1612 		      "utf8 to local encoding conversion failed");
1613 		/* Load was for us - just no memory */
1614 		return;
1615 	}
1616 
1617 	form_gadget_update_value(gadget, utf8_fn);
1618 
1619 	/* corestring_dom___ns_key_file_name_node_data */
1620 	if (dom_node_set_user_data((dom_node *)file_box->gadget->node,
1621 				   corestring_dom___ns_key_file_name_node_data,
1622 				   strdup(fn), html__dom_user_data_handler,
1623 				   &oldfile) == DOM_NO_ERR) {
1624 		if (oldfile != NULL)
1625 			free(oldfile);
1626 	}
1627 
1628 	/* Redraw box. */
1629 	html__redraw_a_box(html, file_box);
1630 }
1631 
html_set_file_gadget_filename(struct hlcache_handle * hl,struct form_control * gadget,const char * fn)1632 void html_set_file_gadget_filename(struct hlcache_handle *hl,
1633 	struct form_control *gadget, const char *fn)
1634 {
1635 	return html__set_file_gadget_filename(hlcache_handle_get_content(hl),
1636 		gadget, fn);
1637 }
1638 
1639 /**
1640  * Drop a file onto a content at a particular point, or determine if a file
1641  * may be dropped onto the content at given point.
1642  *
1643  * \param c	html content to look inside
1644  * \param x	x-coordinate of point of interest
1645  * \param y	y-coordinate of point of interest
1646  * \param file	path to file to be dropped, or NULL to know if drop allowed
1647  * \return true iff file drop has been handled, or if drop possible (NULL file)
1648  */
html_drop_file_at_point(struct content * c,int x,int y,char * file)1649 static bool html_drop_file_at_point(struct content *c, int x, int y, char *file)
1650 {
1651 	html_content *html = (html_content *) c;
1652 
1653 	struct box *box = html->layout;
1654 	struct box *next;
1655 	struct box *file_box = NULL;
1656 	struct box *text_box = NULL;
1657 	int box_x = 0, box_y = 0;
1658 
1659 	/* Scan box tree for boxes that can handle drop */
1660 	while ((next = box_at_point(&html->len_ctx, box, x, y,
1661 			&box_x, &box_y)) != NULL) {
1662 		box = next;
1663 
1664 		if (box->style &&
1665 		    css_computed_visibility(box->style) == CSS_VISIBILITY_HIDDEN)
1666 			continue;
1667 
1668 		if (box->iframe) {
1669 			float scale = browser_window_get_scale(box->iframe);
1670 			return browser_window_drop_file_at_point(
1671 				box->iframe,
1672 				(x - box_x) * scale,
1673 				(y - box_y) * scale,
1674 				file);
1675 		}
1676 
1677 		if (box->object &&
1678 		    content_drop_file_at_point(box->object,
1679 					x - box_x, y - box_y, file) == true)
1680 			return true;
1681 
1682 		if (box->gadget) {
1683 			switch (box->gadget->type) {
1684 				case GADGET_FILE:
1685 					file_box = box;
1686 				break;
1687 
1688 				case GADGET_TEXTBOX:
1689 				case GADGET_TEXTAREA:
1690 				case GADGET_PASSWORD:
1691 					text_box = box;
1692 					break;
1693 
1694 				default:	/* appease compiler */
1695 					break;
1696 			}
1697 		}
1698 	}
1699 
1700 	if (!file_box && !text_box)
1701 		/* No box capable of handling drop */
1702 		return false;
1703 
1704 	if (file == NULL)
1705 		/* There is a box capable of handling drop here */
1706 		return true;
1707 
1708 	/* Handle the drop */
1709 	if (file_box) {
1710 		/* File dropped on file input */
1711 		html__set_file_gadget_filename(c, file_box->gadget, file);
1712 
1713 	} else {
1714 		/* File dropped on text input */
1715 
1716 		size_t file_len;
1717 		FILE *fp = NULL;
1718 		char *buffer;
1719 		char *utf8_buff;
1720 		nserror ret;
1721 		unsigned int size;
1722 		int bx, by;
1723 
1724 		/* Open file */
1725 		fp = fopen(file, "rb");
1726 		if (fp == NULL) {
1727 			/* Couldn't open file, but drop was for us */
1728 			return true;
1729 		}
1730 
1731 		/* Get filesize */
1732 		fseek(fp, 0, SEEK_END);
1733 		file_len = ftell(fp);
1734 		fseek(fp, 0, SEEK_SET);
1735 
1736 		if ((long)file_len == -1) {
1737 			/* unable to get file length, but drop was for us */
1738 			fclose(fp);
1739 			return true;
1740 		}
1741 
1742 		/* Allocate buffer for file data */
1743 		buffer = malloc(file_len + 1);
1744 		if (buffer == NULL) {
1745 			/* No memory, but drop was for us */
1746 			fclose(fp);
1747 			return true;
1748 		}
1749 
1750 		/* Stick file into buffer */
1751 		if (file_len != fread(buffer, 1, file_len, fp)) {
1752 			/* Failed, but drop was for us */
1753 			free(buffer);
1754 			fclose(fp);
1755 			return true;
1756 		}
1757 
1758 		/* Done with file */
1759 		fclose(fp);
1760 
1761 		/* Ensure buffer's string termination */
1762 		buffer[file_len] = '\0';
1763 
1764 		/* TODO: Sniff for text? */
1765 
1766 		/* Convert to UTF-8 */
1767 		ret = guit->utf8->local_to_utf8(buffer, file_len, &utf8_buff);
1768 		if (ret != NSERROR_OK) {
1769 			/* bad encoding shouldn't happen */
1770 			NSLOG(netsurf, ERROR,
1771 			      "local to utf8 encoding failed (%s)",
1772 			      messages_get_errorcode(ret));
1773 			assert(ret != NSERROR_BAD_ENCODING);
1774 			free(buffer);
1775 			return true;
1776 		}
1777 
1778 		/* Done with buffer */
1779 		free(buffer);
1780 
1781 		/* Get new length */
1782 		size = strlen(utf8_buff);
1783 
1784 		/* Simulate a click over the input box, to place caret */
1785 		box_coords(text_box, &bx, &by);
1786 		textarea_mouse_action(text_box->gadget->data.text.ta,
1787 				BROWSER_MOUSE_PRESS_1, x - bx, y - by);
1788 
1789 		/* Paste the file as text */
1790 		textarea_drop_text(text_box->gadget->data.text.ta,
1791 				utf8_buff, size);
1792 
1793 		free(utf8_buff);
1794 	}
1795 
1796 	return true;
1797 }
1798 
1799 
1800 /**
1801  * set debug status.
1802  *
1803  * \param c The content to debug
1804  * \param op The debug operation type
1805  */
1806 static nserror
html_debug(struct content * c,enum content_debug op)1807 html_debug(struct content *c, enum content_debug op)
1808 {
1809 	html_redraw_debug = !html_redraw_debug;
1810 
1811 	return NSERROR_OK;
1812 }
1813 
1814 
1815 /**
1816  * Dump debug info concerning the html_content
1817  *
1818  * \param c The content to debug
1819  * \param f The file to dump to
1820  * \param op The debug dump type
1821  */
1822 static nserror
html_debug_dump(struct content * c,FILE * f,enum content_debug op)1823 html_debug_dump(struct content *c, FILE *f, enum content_debug op)
1824 {
1825 	html_content *htmlc = (html_content *)c;
1826 	dom_node *html;
1827 	dom_exception exc; /* returned by libdom functions */
1828 	nserror ret;
1829 
1830 	assert(htmlc != NULL);
1831 
1832 	if (op == CONTENT_DEBUG_RENDER) {
1833 		assert(htmlc->layout != NULL);
1834 		box_dump(f, htmlc->layout, 0, true);
1835 		ret = NSERROR_OK;
1836 	} else {
1837 		if (htmlc->document == NULL) {
1838 			NSLOG(netsurf, INFO, "No document to dump");
1839 			return NSERROR_DOM;
1840 		}
1841 
1842 		exc = dom_document_get_document_element(htmlc->document, (void *) &html);
1843 		if ((exc != DOM_NO_ERR) || (html == NULL)) {
1844 			NSLOG(netsurf, INFO, "Unable to obtain root node");
1845 			return NSERROR_DOM;
1846 		}
1847 
1848 		ret = libdom_dump_structure(html, f, 0);
1849 
1850 		NSLOG(netsurf, INFO, "DOM structure dump returning %d", ret);
1851 
1852 		dom_node_unref(html);
1853 	}
1854 
1855 	return ret;
1856 }
1857 
1858 
1859 #if ALWAYS_DUMP_FRAMESET
1860 /**
1861  * Print a frameset tree to stderr.
1862  */
1863 
1864 static void
html_dump_frameset(struct content_html_frames * frame,unsigned int depth)1865 html_dump_frameset(struct content_html_frames *frame, unsigned int depth)
1866 {
1867 	unsigned int i;
1868 	int row, col, index;
1869 	const char *unit[] = {"px", "%", "*"};
1870 	const char *scrolling[] = {"auto", "yes", "no"};
1871 
1872 	assert(frame);
1873 
1874 	fprintf(stderr, "%p ", frame);
1875 
1876 	fprintf(stderr, "(%i %i) ", frame->rows, frame->cols);
1877 
1878 	fprintf(stderr, "w%g%s ", frame->width.value, unit[frame->width.unit]);
1879 	fprintf(stderr, "h%g%s ", frame->height.value,unit[frame->height.unit]);
1880 	fprintf(stderr, "(margin w%i h%i) ",
1881 			frame->margin_width, frame->margin_height);
1882 
1883 	if (frame->name)
1884 		fprintf(stderr, "'%s' ", frame->name);
1885 	if (frame->url)
1886 		fprintf(stderr, "<%s> ", frame->url);
1887 
1888 	if (frame->no_resize)
1889 		fprintf(stderr, "noresize ");
1890 	fprintf(stderr, "(scrolling %s) ", scrolling[frame->scrolling]);
1891 	if (frame->border)
1892 		fprintf(stderr, "border %x ",
1893 				(unsigned int) frame->border_colour);
1894 
1895 	fprintf(stderr, "\n");
1896 
1897 	if (frame->children) {
1898 		for (row = 0; row != frame->rows; row++) {
1899 			for (col = 0; col != frame->cols; col++) {
1900 				for (i = 0; i != depth; i++)
1901 					fprintf(stderr, "  ");
1902 				fprintf(stderr, "(%i %i): ", row, col);
1903 				index = (row * frame->cols) + col;
1904 				html_dump_frameset(&frame->children[index],
1905 						depth + 1);
1906 			}
1907 		}
1908 	}
1909 }
1910 
1911 #endif
1912 
1913 /**
1914  * Retrieve HTML document tree
1915  *
1916  * \param h  HTML content to retrieve document tree from
1917  * \return Pointer to document tree
1918  */
html_get_document(hlcache_handle * h)1919 dom_document *html_get_document(hlcache_handle *h)
1920 {
1921 	html_content *c = (html_content *) hlcache_handle_get_content(h);
1922 
1923 	assert(c != NULL);
1924 
1925 	return c->document;
1926 }
1927 
1928 /**
1929  * Retrieve box tree
1930  *
1931  * \param h  HTML content to retrieve tree from
1932  * \return Pointer to box tree
1933  *
1934  * \todo This API must die, as must all use of the box tree outside of
1935  *         HTML content handler
1936  */
html_get_box_tree(hlcache_handle * h)1937 struct box *html_get_box_tree(hlcache_handle *h)
1938 {
1939 	html_content *c = (html_content *) hlcache_handle_get_content(h);
1940 
1941 	assert(c != NULL);
1942 
1943 	return c->layout;
1944 }
1945 
1946 /**
1947  * Retrieve the charset of an HTML document
1948  *
1949  * \param c Content to retrieve charset from
1950  * \param op The content encoding operation to perform.
1951  * \return Pointer to charset, or NULL
1952  */
html_encoding(const struct content * c,enum content_encoding_type op)1953 static const char *html_encoding(const struct content *c, enum content_encoding_type op)
1954 {
1955 	html_content *html = (html_content *) c;
1956 	static char enc_token[10] = "Encoding0";
1957 
1958 	assert(html != NULL);
1959 
1960 	if (op == CONTENT_ENCODING_SOURCE) {
1961 		enc_token[8] = '0' + html->encoding_source;
1962 		return messages_get(enc_token);
1963 	}
1964 
1965 	return html->encoding;
1966 }
1967 
1968 
1969 /**
1970  * Retrieve framesets used in an HTML document
1971  *
1972  * \param h  Content to inspect
1973  * \return Pointer to framesets, or NULL if none
1974  */
html_get_frameset(hlcache_handle * h)1975 struct content_html_frames *html_get_frameset(hlcache_handle *h)
1976 {
1977 	html_content *c = (html_content *) hlcache_handle_get_content(h);
1978 
1979 	assert(c != NULL);
1980 
1981 	return c->frameset;
1982 }
1983 
1984 /**
1985  * Retrieve iframes used in an HTML document
1986  *
1987  * \param h  Content to inspect
1988  * \return Pointer to iframes, or NULL if none
1989  */
html_get_iframe(hlcache_handle * h)1990 struct content_html_iframe *html_get_iframe(hlcache_handle *h)
1991 {
1992 	html_content *c = (html_content *) hlcache_handle_get_content(h);
1993 
1994 	assert(c != NULL);
1995 
1996 	return c->iframe;
1997 }
1998 
1999 /**
2000  * Retrieve an HTML content's base URL
2001  *
2002  * \param h  Content to retrieve base target from
2003  * \return Pointer to URL
2004  */
html_get_base_url(hlcache_handle * h)2005 nsurl *html_get_base_url(hlcache_handle *h)
2006 {
2007 	html_content *c = (html_content *) hlcache_handle_get_content(h);
2008 
2009 	assert(c != NULL);
2010 
2011 	return c->base_url;
2012 }
2013 
2014 /**
2015  * Retrieve an HTML content's base target
2016  *
2017  * \param h  Content to retrieve base target from
2018  * \return Pointer to target, or NULL if none
2019  */
html_get_base_target(hlcache_handle * h)2020 const char *html_get_base_target(hlcache_handle *h)
2021 {
2022 	html_content *c = (html_content *) hlcache_handle_get_content(h);
2023 
2024 	assert(c != NULL);
2025 
2026 	return c->base_target;
2027 }
2028 
2029 
2030 /**
2031  * Retrieve layout coordinates of box with given id
2032  *
2033  * \param h        HTML document to search
2034  * \param frag_id  String containing an element id
2035  * \param x        Updated to global x coord iff id found
2036  * \param y        Updated to global y coord iff id found
2037  * \return  true iff id found
2038  */
html_get_id_offset(hlcache_handle * h,lwc_string * frag_id,int * x,int * y)2039 bool html_get_id_offset(hlcache_handle *h, lwc_string *frag_id, int *x, int *y)
2040 {
2041 	struct box *pos;
2042 	struct box *layout;
2043 
2044 	if (content_get_type(h) != CONTENT_HTML)
2045 		return false;
2046 
2047 	layout = html_get_box_tree(h);
2048 
2049 	if ((pos = box_find_by_id(layout, frag_id)) != 0) {
2050 		box_coords(pos, x, y);
2051 		return true;
2052 	}
2053 	return false;
2054 }
2055 
html_exec(struct content * c,const char * src,size_t srclen)2056 bool html_exec(struct content *c, const char *src, size_t srclen)
2057 {
2058 	html_content *htmlc = (html_content *)c;
2059 	bool result = false;
2060 	dom_exception err;
2061 	dom_html_body_element *body_node;
2062 	dom_string *dom_src;
2063 	dom_text *text_node;
2064 	dom_node *spare_node;
2065 	dom_html_script_element *script_node;
2066 
2067 	if (htmlc->document == NULL) {
2068 		NSLOG(netsurf, DEEPDEBUG, "Unable to exec, no document");
2069 		goto out_no_string;
2070 	}
2071 
2072 	err = dom_string_create((const uint8_t *)src, srclen, &dom_src);
2073 	if (err != DOM_NO_ERR) {
2074 		NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not create string");
2075 		goto out_no_string;
2076 	}
2077 
2078 	err = dom_html_document_get_body(htmlc->document, &body_node);
2079 	if (err != DOM_NO_ERR) {
2080 		NSLOG(netsurf, DEEPDEBUG, "Unable to retrieve body element");
2081 		goto out_no_body;
2082 	}
2083 
2084 	err = dom_document_create_text_node(htmlc->document, dom_src, &text_node);
2085 	if (err != DOM_NO_ERR) {
2086 		NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not create text node");
2087 		goto out_no_text_node;
2088 	}
2089 
2090 	err = dom_document_create_element(htmlc->document, corestring_dom_SCRIPT, &script_node);
2091 	if (err != DOM_NO_ERR) {
2092 		NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not create script node");
2093 		goto out_no_script_node;
2094 	}
2095 
2096 	err = dom_node_append_child(script_node, text_node, &spare_node);
2097 	if (err != DOM_NO_ERR) {
2098 		NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not insert code node into script node");
2099 		goto out_unparented;
2100 	}
2101 	dom_node_unref(spare_node); /* We do not need the spare ref at all */
2102 
2103 	err = dom_node_append_child(body_node, script_node, &spare_node);
2104 	if (err != DOM_NO_ERR) {
2105 		NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not insert script node into document body");
2106 		goto out_unparented;
2107 	}
2108 	dom_node_unref(spare_node); /* Again no need for the spare ref */
2109 
2110 	/* We successfully inserted the node into the DOM */
2111 
2112 	result = true;
2113 
2114 	/* Now we unwind, starting by removing the script from wherever it
2115 	 * ended up parented
2116 	 */
2117 
2118 	err = dom_node_get_parent_node(script_node, &spare_node);
2119 	if (err == DOM_NO_ERR && spare_node != NULL) {
2120 		dom_node *second_spare;
2121 		err = dom_node_remove_child(spare_node, script_node, &second_spare);
2122 		if (err == DOM_NO_ERR) {
2123 			dom_node_unref(second_spare);
2124 		}
2125 		dom_node_unref(spare_node);
2126 	}
2127 
2128 out_unparented:
2129 	dom_node_unref(script_node);
2130 out_no_script_node:
2131 	dom_node_unref(text_node);
2132 out_no_text_node:
2133 	dom_node_unref(body_node);
2134 out_no_body:
2135 	dom_string_unref(dom_src);
2136 out_no_string:
2137 	return result;
2138 }
2139 
2140 /* See \ref content_saw_insecure_objects */
2141 static bool
html_saw_insecure_objects(struct content * c)2142 html_saw_insecure_objects(struct content *c)
2143 {
2144 	html_content *htmlc = (html_content *)c;
2145 	struct content_html_object *obj = htmlc->object_list;
2146 
2147 	/* Check through the object list */
2148 	while (obj != NULL) {
2149 		if (obj->content != NULL) {
2150 			if (content_saw_insecure_objects(obj->content))
2151 				return true;
2152 		}
2153 		obj = obj->next;
2154 	}
2155 
2156 	/* Now check the script list */
2157 	if (html_saw_insecure_scripts(htmlc)) {
2158 		return true;
2159 	}
2160 
2161 	/* Now check stylesheets */
2162 	if (html_css_saw_insecure_stylesheets(htmlc)) {
2163 		return true;
2164 	}
2165 
2166 	return false;
2167 }
2168 
2169 /**
2170  * Compute the type of a content
2171  *
2172  * \return CONTENT_HTML
2173  */
html_content_type(void)2174 static content_type html_content_type(void)
2175 {
2176 	return CONTENT_HTML;
2177 }
2178 
2179 
html_fini(void)2180 static void html_fini(void)
2181 {
2182 	html_css_fini();
2183 }
2184 
2185 /**
2186  * Finds all occurrences of a given string in an html box
2187  *
2188  * \param pattern   the string pattern to search for
2189  * \param p_len     pattern length
2190  * \param cur       pointer to the current box
2191  * \param case_sens whether to perform a case sensitive search
2192  * \param context   The search context to add the entry to.
2193  * \return true on success, false on memory allocation failure
2194  */
2195 static nserror
find_occurrences_html_box(const char * pattern,int p_len,struct box * cur,bool case_sens,struct textsearch_context * context)2196 find_occurrences_html_box(const char *pattern,
2197 			  int p_len,
2198 			  struct box *cur,
2199 			  bool case_sens,
2200 			  struct textsearch_context *context)
2201 {
2202 	struct box *a;
2203 	nserror res = NSERROR_OK;
2204 
2205 	/* ignore this box, if there's no visible text */
2206 	if (!cur->object && cur->text) {
2207 		const char *text = cur->text;
2208 		unsigned length = cur->length;
2209 
2210 		while (length > 0) {
2211 			unsigned match_length;
2212 			unsigned match_offset;
2213 			const char *new_text;
2214 			const char *pos;
2215 
2216 			pos = content_textsearch_find_pattern(text,
2217 					   length,
2218 					   pattern,
2219 					   p_len,
2220 					   case_sens,
2221 					   &match_length);
2222 			if (!pos)
2223 				break;
2224 
2225 			/* found string in box => add to list */
2226 			match_offset = pos - cur->text;
2227 
2228 			res = content_textsearch_add_match(context,
2229 					cur->byte_offset + match_offset,
2230 					cur->byte_offset + match_offset + match_length,
2231 					cur,
2232 					cur);
2233 			if (res != NSERROR_OK) {
2234 				return res;
2235 			}
2236 
2237 			new_text = pos + match_length;
2238 			length -= (new_text - text);
2239 			text = new_text;
2240 		}
2241 	}
2242 
2243 	/* and recurse */
2244 	for (a = cur->children; a; a = a->next) {
2245 		res = find_occurrences_html_box(pattern,
2246 						p_len,
2247 						a,
2248 						case_sens,
2249 						context);
2250 		if (res != NSERROR_OK) {
2251 			return res;
2252 		}
2253 	}
2254 
2255 	return res;
2256 }
2257 
2258 /**
2259  * Finds all occurrences of a given string in the html box tree
2260  *
2261  * \param pattern   the string pattern to search for
2262  * \param p_len     pattern length
2263  * \param c The content to search
2264  * \param csens whether to perform a case sensitive search
2265  * \param context   The search context to add the entry to.
2266  * \return true on success, false on memory allocation failure
2267  */
2268 static nserror
html_textsearch_find(struct content * c,struct textsearch_context * context,const char * pattern,int p_len,bool csens)2269 html_textsearch_find(struct content *c,
2270 		     struct textsearch_context *context,
2271 		     const char *pattern,
2272 		     int p_len,
2273 		     bool csens)
2274 {
2275 	html_content *html = (html_content *)c;
2276 
2277 	if (html->layout == NULL) {
2278 		return NSERROR_INVALID;
2279 	}
2280 
2281 	return find_occurrences_html_box(pattern,
2282 					 p_len,
2283 					 html->layout,
2284 					 csens,
2285 					 context);
2286 }
2287 
2288 
2289 static nserror
html_textsearch_bounds(struct content * c,unsigned start_idx,unsigned end_idx,struct box * start_box,struct box * end_box,struct rect * bounds)2290 html_textsearch_bounds(struct content *c,
2291 		       unsigned start_idx,
2292 		       unsigned end_idx,
2293 		       struct box *start_box,
2294 		       struct box *end_box,
2295 		       struct rect *bounds)
2296 {
2297 	/* get box position and jump to it */
2298 	box_coords(start_box, &bounds->x0, &bounds->y0);
2299 	/* \todo: move x0 in by correct idx */
2300 	box_coords(end_box, &bounds->x1, &bounds->y1);
2301 	/* \todo: move x1 in by correct idx */
2302 	bounds->x1 += end_box->width;
2303 	bounds->y1 += end_box->height;
2304 
2305 	return NSERROR_OK;
2306 }
2307 
2308 
2309 /**
2310  * HTML content handler function table
2311  */
2312 static const content_handler html_content_handler = {
2313 	.fini = html_fini,
2314 	.create = html_create,
2315 	.process_data = html_process_data,
2316 	.data_complete = html_convert,
2317 	.reformat = html_reformat,
2318 	.destroy = html_destroy,
2319 	.stop = html_stop,
2320 	.mouse_track = html_mouse_track,
2321 	.mouse_action = html_mouse_action,
2322 	.keypress = html_keypress,
2323 	.redraw = html_redraw,
2324 	.open = html_open,
2325 	.close = html_close,
2326 	.get_selection = html_get_selection,
2327 	.clear_selection = html_clear_selection,
2328 	.get_contextual_content = html_get_contextual_content,
2329 	.scroll_at_point = html_scroll_at_point,
2330 	.drop_file_at_point = html_drop_file_at_point,
2331 	.debug_dump = html_debug_dump,
2332 	.debug = html_debug,
2333 	.clone = html_clone,
2334 	.get_encoding = html_encoding,
2335 	.type = html_content_type,
2336 	.exec = html_exec,
2337 	.saw_insecure_objects = html_saw_insecure_objects,
2338 	.textsearch_find = html_textsearch_find,
2339 	.textsearch_bounds = html_textsearch_bounds,
2340 	.textselection_redraw = html_textselection_redraw,
2341 	.textselection_copy = html_textselection_copy,
2342 	.textselection_get_end = html_textselection_get_end,
2343 	.no_share = true,
2344 };
2345 
2346 
2347 /* exported function documented in html/html.h */
html_init(void)2348 nserror html_init(void)
2349 {
2350 	uint32_t i;
2351 	nserror error;
2352 
2353 	error = html_css_init();
2354 	if (error != NSERROR_OK)
2355 		goto error;
2356 
2357 	for (i = 0; i < NOF_ELEMENTS(html_types); i++) {
2358 		error = content_factory_register_handler(html_types[i],
2359 				&html_content_handler);
2360 		if (error != NSERROR_OK)
2361 			goto error;
2362 	}
2363 
2364 	return NSERROR_OK;
2365 
2366 error:
2367 	html_fini();
2368 
2369 	return error;
2370 }
2371