1 /*
2  * This file is part of Hubbub.
3  * Licensed under the MIT License,
4  *                http://www.opensource.org/licenses/mit-license.php
5  * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
6  */
7 
8 #include <assert.h>
9 #include <string.h>
10 
11 #include "treebuilder/modes.h"
12 #include "treebuilder/internal.h"
13 #include "treebuilder/treebuilder.h"
14 #include "utils/utils.h"
15 
16 #undef DEBUG_IN_BODY
17 
18 /**
19  * Bookmark for formatting list. Used in adoption agency
20  */
21 typedef struct bookmark {
22 	formatting_list_entry *prev;	/**< Previous entry */
23 	formatting_list_entry *next;	/**< Next entry */
24 } bookmark;
25 
26 static hubbub_error process_character(hubbub_treebuilder *treebuilder,
27 		const hubbub_token *token);
28 static hubbub_error process_start_tag(hubbub_treebuilder *treebuilder,
29 		const hubbub_token *token);
30 static hubbub_error process_end_tag(hubbub_treebuilder *treebuilder,
31 		const hubbub_token *token);
32 
33 static hubbub_error process_html_in_body(hubbub_treebuilder *treebuilder,
34 		const hubbub_token *token);
35 static hubbub_error process_body_in_body(hubbub_treebuilder *treebuilder,
36 		const hubbub_token *token);
37 static hubbub_error process_frameset_in_body(hubbub_treebuilder *treebuilder,
38 		const hubbub_token *token);
39 static hubbub_error process_container_in_body(hubbub_treebuilder *treebuilder,
40 		const hubbub_token *token);
41 static hubbub_error process_hN_in_body(hubbub_treebuilder *treebuilder,
42 		const hubbub_token *token);
43 static hubbub_error process_form_in_body(hubbub_treebuilder *treebuilder,
44 		const hubbub_token *token);
45 static hubbub_error process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
46 		const hubbub_token *token, element_type type);
47 static hubbub_error process_plaintext_in_body(hubbub_treebuilder *treebuilder,
48 		const hubbub_token *token);
49 static hubbub_error process_a_in_body(hubbub_treebuilder *treebuilder,
50 		const hubbub_token *token);
51 static hubbub_error process_presentational_in_body(
52 		hubbub_treebuilder *treebuilder,
53 		const hubbub_token *token, element_type type);
54 static hubbub_error process_nobr_in_body(hubbub_treebuilder *treebuilder,
55 		const hubbub_token *token);
56 static hubbub_error process_button_in_body(hubbub_treebuilder *treebuilder,
57 		const hubbub_token *token);
58 static hubbub_error process_applet_marquee_object_in_body(
59 		hubbub_treebuilder *treebuilder, const hubbub_token *token,
60 		element_type type);
61 static hubbub_error process_hr_in_body(hubbub_treebuilder *treebuilder,
62 		const hubbub_token *token);
63 static hubbub_error process_image_in_body(hubbub_treebuilder *treebuilder,
64 		const hubbub_token *token);
65 static hubbub_error process_isindex_in_body(hubbub_treebuilder *treebuilder,
66 		const hubbub_token *token);
67 static hubbub_error process_textarea_in_body(hubbub_treebuilder *treebuilder,
68 		const hubbub_token *token);
69 static hubbub_error process_select_in_body(hubbub_treebuilder *treebuilder,
70 		const hubbub_token *token);
71 static hubbub_error process_opt_in_body(hubbub_treebuilder *treebuilder,
72 		const hubbub_token *token);
73 static hubbub_error process_phrasing_in_body(hubbub_treebuilder *treebuilder,
74 		const hubbub_token *token);
75 
76 static hubbub_error process_0body_in_body(hubbub_treebuilder *treebuilder);
77 static hubbub_error process_0container_in_body(hubbub_treebuilder *treebuilder,
78 		element_type type);
79 static hubbub_error process_0form_in_body(hubbub_treebuilder *treebuilder);
80 static hubbub_error process_0p_in_body(hubbub_treebuilder *treebuilder);
81 static hubbub_error process_0dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
82 		element_type type);
83 static hubbub_error process_0h_in_body(hubbub_treebuilder *treebuilder,
84 		element_type type);
85 static hubbub_error process_0presentational_in_body(
86 		hubbub_treebuilder *treebuilder,
87 		element_type type);
88 static hubbub_error process_0applet_button_marquee_object_in_body(
89 		hubbub_treebuilder *treebuilder, element_type type);
90 static hubbub_error process_0br_in_body(hubbub_treebuilder *treebuilder);
91 static hubbub_error process_0generic_in_body(hubbub_treebuilder *treebuilder,
92 		element_type type);
93 
94 static hubbub_error aa_find_and_validate_formatting_element(
95 		hubbub_treebuilder *treebuilder, element_type type,
96 		formatting_list_entry **element);
97 static formatting_list_entry *aa_find_formatting_element(
98 		hubbub_treebuilder *treebuilder, element_type type);
99 static hubbub_error aa_find_furthest_block(hubbub_treebuilder *treebuilder,
100 		formatting_list_entry *formatting_element,
101 		uint32_t *furthest_block);
102 static hubbub_error aa_reparent_node(hubbub_treebuilder *treebuilder,
103 		void *node, void *new_parent, void **reparented);
104 static hubbub_error aa_find_bookmark_location_reparenting_misnested(
105 		hubbub_treebuilder *treebuilder,
106 		uint32_t formatting_element, uint32_t *furthest_block,
107 		bookmark *bookmark, uint32_t *last_node);
108 static hubbub_error aa_remove_element_stack_item(
109 		hubbub_treebuilder *treebuilder,
110 		uint32_t index, uint32_t limit);
111 static hubbub_error aa_clone_and_replace_entries(
112 		hubbub_treebuilder *treebuilder,
113 		formatting_list_entry *element);
114 
115 
116 /**
117  * Handle tokens in "in body" insertion mode
118  *
119  * \param treebuilder  The treebuilder instance
120  * \param token        The token to process
121  * \return True to reprocess the token, false otherwise
122  */
handle_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)123 hubbub_error handle_in_body(hubbub_treebuilder *treebuilder,
124 		const hubbub_token *token)
125 {
126 	hubbub_error err = HUBBUB_OK;
127 	uint32_t i;
128 
129 #if !defined(NDEBUG) && defined(DEBUG_IN_BODY)
130 	fprintf(stdout, "Processing token %d\n", token->type);
131 	element_stack_dump(treebuilder, stdout);
132 	formatting_list_dump(treebuilder, stdout);
133 #endif
134 
135 	if (treebuilder->context.strip_leading_lr &&
136 			token->type != HUBBUB_TOKEN_CHARACTER) {
137 		/* Reset the LR stripping flag */
138 		treebuilder->context.strip_leading_lr = false;
139 	}
140 
141 	switch (token->type) {
142 	case HUBBUB_TOKEN_CHARACTER:
143 		err = process_character(treebuilder, token);
144 		break;
145 	case HUBBUB_TOKEN_COMMENT:
146 		err = process_comment_append(treebuilder, token,
147 				treebuilder->context.element_stack[
148 				treebuilder->context.current_node].node);
149 		break;
150 	case HUBBUB_TOKEN_DOCTYPE:
151 		/** \todo parse error */
152 		break;
153 	case HUBBUB_TOKEN_START_TAG:
154 		err = process_start_tag(treebuilder, token);
155 		break;
156 	case HUBBUB_TOKEN_END_TAG:
157 		err = process_end_tag(treebuilder, token);
158 		break;
159 	case HUBBUB_TOKEN_EOF:
160 		for (i = treebuilder->context.current_node;
161 				i > 0; i--) {
162 			element_type type =
163 				treebuilder->context.element_stack[i].type;
164 
165 			if (!(type == DD || type == DT || type == LI ||
166 					type == P || type == TBODY ||
167 					type == TD || type == TFOOT ||
168 					type == TH || type == THEAD ||
169 					type == TR || type == BODY)) {
170 				/** \todo parse error */
171 				break;
172 			}
173 		}
174 		break;
175 	}
176 
177 #if !defined(NDEBUG) && defined(DEBUG_IN_BODY)
178 	fprintf(stdout, "Processed\n");
179 	element_stack_dump(treebuilder, stdout);
180 	formatting_list_dump(treebuilder, stdout);
181 #endif
182 
183 	return err;
184 }
185 
186 /**
187  * Process a character token
188  *
189  * \param treebuilder  The treebuilder instance
190  * \param token        The token to process
191  */
process_character(hubbub_treebuilder * treebuilder,const hubbub_token * token)192 hubbub_error process_character(hubbub_treebuilder *treebuilder,
193 		const hubbub_token *token)
194 {
195 	hubbub_error err = HUBBUB_OK;
196 	hubbub_string dummy = token->data.character;
197 	bool lr_flag = treebuilder->context.strip_leading_lr;
198 	const uint8_t *p;
199 
200 	err = reconstruct_active_formatting_list(treebuilder);
201 	if (err != HUBBUB_OK)
202 		return err;
203 
204 	if (treebuilder->context.strip_leading_lr) {
205 		const uint8_t *str = dummy.ptr;
206 
207 		if (*str == '\n') {
208 			dummy.ptr++;
209 			dummy.len--;
210 		}
211 
212 		treebuilder->context.strip_leading_lr = false;
213 	}
214 
215 	if (dummy.len) {
216 		err = append_text(treebuilder, &dummy);
217 		if (err != HUBBUB_OK) {
218 			/* Restore LR stripping flag */
219 			treebuilder->context.strip_leading_lr = lr_flag;
220 
221 			return err;
222 		}
223 	}
224 
225 	if (treebuilder->context.frameset_ok) {
226 		for (p = dummy.ptr; p < dummy.ptr + dummy.len; p++) {
227 			if (*p != 0x0009 && *p != 0x000a &&
228 					*p != 0x000c && *p != 0x0020) {
229 				treebuilder->context.frameset_ok = false;
230 				break;
231 			}
232 		}
233 	}
234 
235 	return HUBBUB_OK;
236 }
237 
238 /**
239  * Process a start tag
240  *
241  * \param treebuilder  The treebuilder instance
242  * \param token        The token to process
243  * \return HUBBUB_OK on success,
244  *         HUBBUB_REPROCESS to reprocess the token,
245  *         appropriate error otherwise.
246  */
process_start_tag(hubbub_treebuilder * treebuilder,const hubbub_token * token)247 hubbub_error process_start_tag(hubbub_treebuilder *treebuilder,
248 		const hubbub_token *token)
249 {
250 	hubbub_error err = HUBBUB_OK;
251 	element_type type = element_type_from_name(treebuilder,
252 			&token->data.tag.name);
253 
254 	if (type == HTML) {
255 		err = process_html_in_body(treebuilder, token);
256 	} else if (type == BASE || type == COMMAND || type == LINK ||
257 			type == META || type == NOFRAMES || type == SCRIPT ||
258 			type == STYLE || type == TITLE) {
259 		/* Process as "in head" */
260 		err = handle_in_head(treebuilder, token);
261 	} else if (type == BODY) {
262 		err = process_body_in_body(treebuilder, token);
263 	} else if (type == FRAMESET) {
264 		err = process_frameset_in_body(treebuilder, token);
265 	} else if (type == ADDRESS || type == ARTICLE || type == ASIDE ||
266 			type == BLOCKQUOTE || type == CENTER ||
267 			type == DATAGRID || type == DETAILS ||
268 			type == DIALOG || type == DIR ||
269 			type == DIV || type == DL || type == FIELDSET ||
270 			type == FIGCAPTION || type == FIGURE ||
271 			type == FOOTER || type == HEADER || type == MAIN ||
272 			type == MENU || type == NAV || type == OL ||
273 			type == P || type == SECTION || type == SUMMARY ||
274 			type == UL) {
275 		err = process_container_in_body(treebuilder, token);
276 	} else if (type == H1 || type == H2 || type == H3 ||
277 			type == H4 || type == H5 || type == H6) {
278 		err = process_hN_in_body(treebuilder, token);
279 	} else if (type == PRE || type == LISTING) {
280 		err = process_container_in_body(treebuilder, token);
281 
282 		if (err == HUBBUB_OK) {
283 			treebuilder->context.strip_leading_lr = true;
284 			treebuilder->context.frameset_ok = false;
285 		}
286 	} else if (type == FORM) {
287 		err = process_form_in_body(treebuilder, token);
288 	} else if (type == DD || type == DT || type == LI) {
289 		err = process_dd_dt_li_in_body(treebuilder, token, type);
290 	} else if (type == PLAINTEXT) {
291 		err = process_plaintext_in_body(treebuilder, token);
292 	} else if (type == A) {
293 		err = process_a_in_body(treebuilder, token);
294 	} else if (type == B || type == BIG || type == CODE || type == EM ||
295 			type == FONT || type == I || type == S ||
296 			type == SMALL || type == STRIKE ||
297 			type == STRONG || type == TT || type == U) {
298 		err = process_presentational_in_body(treebuilder,
299 				token, type);
300 	} else if (type == NOBR) {
301 		err = process_nobr_in_body(treebuilder, token);
302 	} else if (type == BUTTON) {
303 		err = process_button_in_body(treebuilder, token);
304 	} else if (type == APPLET || type == MARQUEE ||
305 			type == OBJECT) {
306 		err = process_applet_marquee_object_in_body(treebuilder,
307 				token, type);
308 	} else if (type == XMP) {
309 		err = reconstruct_active_formatting_list(treebuilder);
310 		if (err != HUBBUB_OK)
311 			return err;
312 
313 		treebuilder->context.frameset_ok = false;
314 
315 		err = parse_generic_rcdata(treebuilder, token, false);
316 	} else if (type == TABLE) {
317 		err = process_container_in_body(treebuilder, token);
318 		if (err == HUBBUB_OK) {
319 			treebuilder->context.frameset_ok = false;
320 
321 			treebuilder->context.element_stack[
322 				current_table(treebuilder)].tainted = false;
323 			treebuilder->context.mode = IN_TABLE;
324 		}
325 	} else if (type == AREA || type == BASEFONT ||
326 			type == BGSOUND || type == BR ||
327 			type == EMBED || type == IMG || type == INPUT ||
328 			type == PARAM || type == SPACER || type == WBR) {
329 		err = reconstruct_active_formatting_list(treebuilder);
330 		if (err != HUBBUB_OK)
331 			return err;
332 
333 		err = insert_element(treebuilder, &token->data.tag, false);
334 		if (err == HUBBUB_OK)
335 			treebuilder->context.frameset_ok = false;
336 	} else if (type == HR) {
337 		err = process_hr_in_body(treebuilder, token);
338 	} else if (type == IMAGE) {
339 		err = process_image_in_body(treebuilder, token);
340 	} else if (type == ISINDEX) {
341 		err = process_isindex_in_body(treebuilder, token);
342 	} else if (type == TEXTAREA) {
343 		err = process_textarea_in_body(treebuilder, token);
344 	} else if (type == IFRAME || type == NOEMBED ||
345 			type == NOFRAMES ||
346 			(treebuilder->context.enable_scripting &&
347 			type == NOSCRIPT)) {
348 		if (type == IFRAME)
349 			treebuilder->context.frameset_ok = false;
350 		err = parse_generic_rcdata(treebuilder, token, false);
351 	} else if (type == SELECT) {
352 		err = process_select_in_body(treebuilder, token);
353 		if (err != HUBBUB_OK)
354 			return err;
355 
356 		if (treebuilder->context.mode == IN_BODY) {
357 			treebuilder->context.mode = IN_SELECT;
358 		} else if (treebuilder->context.mode == IN_TABLE ||
359 				treebuilder->context.mode == IN_CAPTION ||
360 				treebuilder->context.mode == IN_COLUMN_GROUP ||
361 				treebuilder->context.mode == IN_TABLE_BODY ||
362 				treebuilder->context.mode == IN_ROW ||
363 				treebuilder->context.mode == IN_CELL) {
364 			treebuilder->context.mode = IN_SELECT_IN_TABLE;
365 		}
366 	} else if (type == OPTGROUP || type == OPTION) {
367 		err = process_opt_in_body(treebuilder, token);
368 	} else if (type == RP || type == RT) {
369 		/** \todo ruby */
370 	} else if (type == MATH || type == SVG) {
371 		hubbub_tag tag = token->data.tag;
372 
373 		err = reconstruct_active_formatting_list(treebuilder);
374 		if (err != HUBBUB_OK)
375 			return err;
376 
377 		adjust_foreign_attributes(treebuilder, &tag);
378 
379 		if (type == SVG) {
380 			adjust_svg_attributes(treebuilder, &tag);
381 			tag.ns = HUBBUB_NS_SVG;
382 		} else {
383 			adjust_mathml_attributes(treebuilder, &tag);
384 			tag.ns = HUBBUB_NS_MATHML;
385 		}
386 
387 		if (token->data.tag.self_closing) {
388 			err = insert_element(treebuilder, &tag, false);
389 			/** \todo ack sc flag */
390 		} else {
391 			err = insert_element(treebuilder, &tag, true);
392 			if (err == HUBBUB_OK) {
393 				treebuilder->context.second_mode =
394 						treebuilder->context.mode;
395 				treebuilder->context.mode = IN_FOREIGN_CONTENT;
396 			}
397 		}
398 	} else if (type == CAPTION || type == COL || type == COLGROUP ||
399 			type == FRAME || type == HEAD || type == TBODY ||
400 			type == TD || type == TFOOT || type == TH ||
401 			type == THEAD || type == TR) {
402 		/** \todo parse error */
403 	} else {
404 		err = process_phrasing_in_body(treebuilder, token);
405 	}
406 
407 	return err;
408 }
409 
410 /**
411  * Process an end tag
412  *
413  * \param treebuilder  The treebuilder instance
414  * \param token        The token to process
415  * \return True to reprocess the token
416  */
process_end_tag(hubbub_treebuilder * treebuilder,const hubbub_token * token)417 hubbub_error process_end_tag(hubbub_treebuilder *treebuilder,
418 		const hubbub_token *token)
419 {
420 	hubbub_error err = HUBBUB_OK;
421 	element_type type = element_type_from_name(treebuilder,
422 			&token->data.tag.name);
423 
424 	if (type == BODY) {
425 		err = process_0body_in_body(treebuilder);
426 		/* Never reprocess */
427 		if (err == HUBBUB_REPROCESS)
428 			err = HUBBUB_OK;
429 	} else if (type == HTML) {
430 		/* Act as if </body> has been seen then, if
431 		 * that wasn't ignored, reprocess this token */
432 		err = process_0body_in_body(treebuilder);
433 	} else if (type == ADDRESS || type == ARTICLE || type == ASIDE ||
434 			type == BLOCKQUOTE || type == CENTER ||
435 			type == DETAILS || type == DIALOG || type == DIR ||
436 			type == DATAGRID || type == DIV || type == DL ||
437 			type == FIELDSET || type == FIGCAPTION ||
438 			type == FIGURE || type == FOOTER || type == HEADER ||
439 			type == LISTING || type == MAIN|| type == MENU ||
440 			type == NAV || type == OL || type == PRE ||
441 			type == SECTION || type == SUMMARY || type == UL) {
442 		err = process_0container_in_body(treebuilder, type);
443 	} else if (type == FORM) {
444 		err = process_0form_in_body(treebuilder);
445 	} else if (type == P) {
446 		err = process_0p_in_body(treebuilder);
447 	} else if (type == DD || type == DT || type == LI) {
448 		err = process_0dd_dt_li_in_body(treebuilder, type);
449 	} else if (type == H1 || type == H2 || type == H3 ||
450 			type == H4 || type == H5 || type == H6) {
451 		err = process_0h_in_body(treebuilder, type);
452 	} else if (type == A || type == B || type == BIG || type == CODE ||
453 			type == EM || type == FONT || type == I ||
454 			type == NOBR || type == S || type == SMALL ||
455 			type == STRIKE || type == STRONG ||
456 			type == TT || type == U) {
457 		err = process_0presentational_in_body(treebuilder, type);
458 	} else if (type == APPLET || type == BUTTON ||
459 			type == MARQUEE || type == OBJECT) {
460 		err = process_0applet_button_marquee_object_in_body(
461 				treebuilder, type);
462 	} else if (type == BR) {
463 		err = process_0br_in_body(treebuilder);
464 	} else if (type == AREA || type == BASEFONT ||
465 			type == BGSOUND || type == EMBED ||
466 			type == HR || type == IFRAME ||
467 			type == IMAGE || type == IMG ||
468 			type == INPUT || type == ISINDEX ||
469 			type == NOEMBED || type == NOFRAMES ||
470 			type == PARAM || type == SELECT ||
471 			type == SPACER || type == TABLE ||
472 			type == TEXTAREA || type == WBR ||
473 			(treebuilder->context.enable_scripting &&
474 					type == NOSCRIPT)) {
475 		/** \todo parse error */
476 	} else {
477 		err = process_0generic_in_body(treebuilder, type);
478 	}
479 
480 	return err;
481 }
482 
483 /**
484  * Process a html start tag as if in "in body"
485  *
486  * \param treebuilder  The treebuilder instance
487  * \param token        The token to process
488  */
process_html_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)489 hubbub_error process_html_in_body(hubbub_treebuilder *treebuilder,
490 		const hubbub_token *token)
491 {
492 	/** \todo parse error */
493 
494 	return treebuilder->tree_handler->add_attributes(
495 			treebuilder->tree_handler->ctx,
496 			treebuilder->context.element_stack[0].node,
497 			token->data.tag.attributes,
498 			token->data.tag.n_attributes);
499 }
500 
501 /**
502  * Process a body start tag as if in "in body"
503  *
504  * \param treebuilder  The treebuilder instance
505  * \param token        The token to process
506  */
process_body_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)507 hubbub_error process_body_in_body(hubbub_treebuilder *treebuilder,
508 		const hubbub_token *token)
509 {
510 	/** \todo parse error */
511 
512 	if (treebuilder->context.current_node < 1 ||
513 			treebuilder->context.element_stack[1].type != BODY)
514 		return HUBBUB_OK;
515 
516 	return treebuilder->tree_handler->add_attributes(
517 			treebuilder->tree_handler->ctx,
518 			treebuilder->context.element_stack[1].node,
519 			token->data.tag.attributes,
520 			token->data.tag.n_attributes);
521 }
522 
523 /**
524  * Process a frameset start tag as if in "in body"
525  *
526  * \param treebuilder  The treebuilder instance
527  * \param token        The token to process
528  */
process_frameset_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)529 hubbub_error process_frameset_in_body(hubbub_treebuilder *treebuilder,
530 		const hubbub_token *token)
531 {
532 	hubbub_error err = HUBBUB_OK;
533 
534 	/** \todo parse error */
535 
536 	if (treebuilder->context.current_node < 1 ||
537 			treebuilder->context.element_stack[1].type != BODY)
538 		return HUBBUB_OK;
539 
540 	if (treebuilder->context.frameset_ok == false)
541 		return HUBBUB_OK;
542 
543 	err = remove_node_from_dom(treebuilder,
544 			treebuilder->context.element_stack[1].node);
545 	if (err != HUBBUB_OK)
546 		return err;
547 
548 	err = element_stack_pop_until(treebuilder, BODY);
549 	assert(err == HUBBUB_OK);
550 
551 	err = insert_element(treebuilder, &token->data.tag, true);
552 	if (err == HUBBUB_OK)
553 		treebuilder->context.mode = IN_FRAMESET;
554 
555 	return err;
556 }
557 
558 /**
559  * Process a generic container start tag as if in "in body"
560  *
561  * \param treebuilder  The treebuilder instance
562  * \param token        The token to process
563  */
process_container_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)564 hubbub_error process_container_in_body(hubbub_treebuilder *treebuilder,
565 		const hubbub_token *token)
566 {
567 	hubbub_error err;
568 
569 	if (element_in_scope(treebuilder, P, false)) {
570 		err = process_0p_in_body(treebuilder);
571 		if (err != HUBBUB_OK)
572 			return err;
573 	}
574 
575 	return insert_element(treebuilder, &token->data.tag, true);
576 }
577 
578 /**
579  * Process a hN start tag as if in "in body"
580  *
581  * \param treebuilder  The treebuilder instance
582  * \param token        The token to process
583  */
process_hN_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)584 hubbub_error process_hN_in_body(hubbub_treebuilder *treebuilder,
585 		const hubbub_token *token)
586 {
587 	hubbub_error err;
588 	element_type type;
589 
590 	if (element_in_scope(treebuilder, P, false)) {
591 		err = process_0p_in_body(treebuilder);
592 		if (err != HUBBUB_OK)
593 			return err;
594 	}
595 
596 	type = treebuilder->context.element_stack[
597 			treebuilder->context.current_node].type;
598 
599 	if (type == H1 || type == H2 || type == H3 || type == H4 ||
600 			type == H5 || type == H6) {
601 		hubbub_ns ns;
602 		element_type otype;
603 		void *node;
604 
605 		/** \todo parse error */
606 
607 		err = element_stack_pop(treebuilder, &ns, &otype, &node);
608 		assert(err == HUBBUB_OK);
609 
610 		treebuilder->tree_handler->unref_node(
611 				treebuilder->tree_handler->ctx,
612 				node);
613 	}
614 
615 	return insert_element(treebuilder, &token->data.tag, true);
616 }
617 
618 /**
619  * Process a form start tag as if in "in body"
620  *
621  * \param treebuilder  The treebuilder instance
622  * \param token        The token to process
623  */
process_form_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)624 hubbub_error process_form_in_body(hubbub_treebuilder *treebuilder,
625 		const hubbub_token *token)
626 {
627 	hubbub_error err;
628 
629 	if (treebuilder->context.form_element != NULL) {
630 		/** \todo parse error */
631 	} else {
632 		if (element_in_scope(treebuilder, P, false)) {
633 			err = process_0p_in_body(treebuilder);
634 			if (err != HUBBUB_OK)
635 				return err;
636 		}
637 
638 		err = insert_element(treebuilder, &token->data.tag, true);
639 		if (err != HUBBUB_OK)
640 			return err;
641 
642 		/* Claim a reference on the node and
643 		 * use it as the current form element */
644 		treebuilder->tree_handler->ref_node(
645 			treebuilder->tree_handler->ctx,
646 			treebuilder->context.element_stack[
647 			treebuilder->context.current_node].node);
648 
649 		treebuilder->context.form_element =
650 			treebuilder->context.element_stack[
651 			treebuilder->context.current_node].node;
652 	}
653 
654 	return HUBBUB_OK;
655 }
656 
657 /**
658  * Process a dd, dt or li start tag as if in "in body"
659  *
660  * \param treebuilder  The treebuilder instance
661  * \param token        The token to process
662  * \param type         The element type
663  */
process_dd_dt_li_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token,element_type type)664 hubbub_error process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
665 		const hubbub_token *token, element_type type)
666 {
667 	hubbub_error err;
668 	element_context *stack = treebuilder->context.element_stack;
669 	uint32_t node;
670 
671 	treebuilder->context.frameset_ok = false;
672 
673 	if (element_in_scope(treebuilder, P, false)) {
674 		err = process_0p_in_body(treebuilder);
675 		if (err != HUBBUB_OK)
676 			return err;
677 	}
678 
679 	/* Find last LI/(DD,DT) on stack, if any */
680 	for (node = treebuilder->context.current_node; node > 0; node--) {
681 		element_type ntype = stack[node].type;
682 
683 		if (type == LI && ntype == LI)
684 			break;
685 
686 		if (((type == DD || type == DT) &&
687 				(ntype == DD || ntype == DT)))
688 			break;
689 
690 		if (!is_formatting_element(ntype) &&
691 				!is_phrasing_element(ntype) &&
692 				ntype != ADDRESS &&
693 				ntype != DIV)
694 			break;
695 	}
696 
697 	/* If we found one, then pop all nodes up to and including it */
698 	if (stack[node].type == LI || stack[node].type == DD ||
699 			stack[node].type == DT) {
700 		/* Check that we're only popping one node
701 		 * and emit a parse error if not */
702 		if (treebuilder->context.current_node > node) {
703 			/** \todo parse error */
704 		}
705 
706 		do {
707 			hubbub_ns ns;
708 			element_type otype;
709 			void *node;
710 
711 			err = element_stack_pop(treebuilder, &ns,
712 					&otype, &node);
713 			assert(err == HUBBUB_OK);
714 
715 			treebuilder->tree_handler->unref_node(
716 					treebuilder->tree_handler->ctx,
717 					node);
718 		} while (treebuilder->context.current_node >= node);
719 	}
720 
721 	return insert_element(treebuilder, &token->data.tag, true);
722 }
723 
724 /**
725  * Process a plaintext start tag as if in "in body"
726  *
727  * \param treebuilder  The treebuilder instance
728  * \param token        The token to process
729  */
process_plaintext_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)730 hubbub_error process_plaintext_in_body(hubbub_treebuilder *treebuilder,
731 		const hubbub_token *token)
732 {
733 	hubbub_error err;
734 	hubbub_tokeniser_optparams params;
735 
736 	if (element_in_scope(treebuilder, P, false)) {
737 		err = process_0p_in_body(treebuilder);
738 		if (err != HUBBUB_OK)
739 			return err;
740 	}
741 
742 	err = insert_element(treebuilder, &token->data.tag, true);
743 	if (err != HUBBUB_OK)
744 		return err;
745 
746 	params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT;
747 
748 	err = hubbub_tokeniser_setopt(treebuilder->tokeniser,
749 			HUBBUB_TOKENISER_CONTENT_MODEL,
750 			&params);
751 	assert(err == HUBBUB_OK);
752 
753 	return HUBBUB_OK;
754 }
755 
756 /**
757  * Process an "a" start tag as if in "in body"
758  *
759  * \param treebuilder  The treebuilder instance
760  * \param token        The token to process
761  */
process_a_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)762 hubbub_error process_a_in_body(hubbub_treebuilder *treebuilder,
763 		const hubbub_token *token)
764 {
765 	hubbub_error err;
766 	formatting_list_entry *entry =
767 			aa_find_formatting_element(treebuilder, A);
768 
769 	if (entry != NULL) {
770 		uint32_t index = entry->stack_index;
771 		void *node = entry->details.node;
772 		formatting_list_entry *entry2;
773 
774 		/** \todo parse error */
775 
776 		/* Act as if </a> were seen */
777 		err = process_0presentational_in_body(treebuilder, A);
778 		if (err != HUBBUB_OK)
779 			return err;
780 
781 		entry2 = aa_find_formatting_element(treebuilder, A);
782 
783 		/* Remove from formatting list, if it's still there */
784 		if (entry2 == entry && entry2->details.node == node) {
785 			hubbub_ns ons;
786 			element_type otype;
787 			void *onode;
788 			uint32_t oindex;
789 
790 			err = formatting_list_remove(treebuilder, entry,
791 					&ons, &otype, &onode, &oindex);
792 			assert(err == HUBBUB_OK);
793 
794 			treebuilder->tree_handler->unref_node(
795 					treebuilder->tree_handler->ctx, onode);
796 
797 		}
798 
799 		/* Remove from the stack of open elements, if still there */
800 		if (index <= treebuilder->context.current_node &&
801 				treebuilder->context.element_stack[index].node
802 				== node) {
803 			hubbub_ns ns;
804 			element_type otype;
805 			void *onode;
806 
807 			err = element_stack_remove(treebuilder, index, &ns,
808 					&otype,	&onode);
809 			assert(err == HUBBUB_OK);
810 
811 			treebuilder->tree_handler->unref_node(
812 					treebuilder->tree_handler->ctx, onode);
813 		}
814 	}
815 
816 	err = reconstruct_active_formatting_list(treebuilder);
817 	if (err != HUBBUB_OK)
818 		return err;
819 
820 	err = insert_element(treebuilder, &token->data.tag, true);
821 	if (err != HUBBUB_OK)
822 		return err;
823 
824 	treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx,
825 		treebuilder->context.element_stack[
826 		treebuilder->context.current_node].node);
827 
828 	err = formatting_list_append(treebuilder, token->data.tag.ns, A,
829 		treebuilder->context.element_stack[
830 			treebuilder->context.current_node].node,
831 		treebuilder->context.current_node);
832 	if (err != HUBBUB_OK) {
833 		hubbub_ns ns;
834 		element_type type;
835 		void *node;
836 
837 		remove_node_from_dom(treebuilder,
838 				treebuilder->context.element_stack[
839 				treebuilder->context.current_node].node);
840 
841 		element_stack_pop(treebuilder, &ns, &type, &node);
842 
843 		/* Unref twice (once for stack, once for formatting list) */
844 		treebuilder->tree_handler->unref_node(
845 				treebuilder->tree_handler->ctx, node);
846 
847 		treebuilder->tree_handler->unref_node(
848 				treebuilder->tree_handler->ctx, node);
849 
850 		return err;
851 	}
852 
853 	return HUBBUB_OK;
854 }
855 
856 /**
857  * Process a b, big, em, font, i, s, small,
858  * strike, strong, tt, or u start tag as if in "in body"
859  *
860  * \param treebuilder  The treebuilder instance
861  * \param token        The token to process
862  * \param type         The element type
863  */
process_presentational_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token,element_type type)864 hubbub_error process_presentational_in_body(hubbub_treebuilder *treebuilder,
865 		const hubbub_token *token, element_type type)
866 {
867 	hubbub_error err;
868 
869 	err = reconstruct_active_formatting_list(treebuilder);
870 	if (err != HUBBUB_OK)
871 		return err;
872 
873 	err = insert_element(treebuilder, &token->data.tag, true);
874 	if (err != HUBBUB_OK)
875 		return err;
876 
877 	treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx,
878 		treebuilder->context.element_stack[
879 		treebuilder->context.current_node].node);
880 
881 	err = formatting_list_append(treebuilder, token->data.tag.ns, type,
882 		treebuilder->context.element_stack[
883 		treebuilder->context.current_node].node,
884 		treebuilder->context.current_node);
885 	if (err != HUBBUB_OK) {
886 		hubbub_ns ns;
887 		element_type type;
888 		void *node;
889 
890 		remove_node_from_dom(treebuilder,
891 				treebuilder->context.element_stack[
892 				treebuilder->context.current_node].node);
893 
894 		element_stack_pop(treebuilder, &ns, &type, &node);
895 
896 		/* Unref twice (once for stack, once for formatting list) */
897 		treebuilder->tree_handler->unref_node(
898 				treebuilder->tree_handler->ctx, node);
899 
900 		treebuilder->tree_handler->unref_node(
901 				treebuilder->tree_handler->ctx, node);
902 
903 		return err;
904 	}
905 
906 	return HUBBUB_OK;
907 }
908 
909 /**
910  * Process a nobr start tag as if in "in body"
911  *
912  * \param treebuilder  The treebuilder instance
913  * \param token        The token to process
914  */
process_nobr_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)915 hubbub_error process_nobr_in_body(hubbub_treebuilder *treebuilder,
916 		const hubbub_token *token)
917 {
918 	hubbub_error err;
919 
920 	err = reconstruct_active_formatting_list(treebuilder);
921 	if (err != HUBBUB_OK)
922 		return err;
923 
924 	if (element_in_scope(treebuilder, NOBR, false)) {
925 		/** \todo parse error */
926 
927 		/* Act as if </nobr> were seen */
928 		err = process_0presentational_in_body(treebuilder, NOBR);
929 		if (err != HUBBUB_OK)
930 			return err;
931 
932 		/* Yes, again */
933 		err = reconstruct_active_formatting_list(treebuilder);
934 		if (err != HUBBUB_OK)
935 			return err;
936 	}
937 
938 	err = insert_element(treebuilder, &token->data.tag, true);
939 	if (err != HUBBUB_OK)
940 		return err;
941 
942 	treebuilder->tree_handler->ref_node(
943 		treebuilder->tree_handler->ctx,
944 		treebuilder->context.element_stack[
945 		treebuilder->context.current_node].node);
946 
947 	err = formatting_list_append(treebuilder, token->data.tag.ns, NOBR,
948 		treebuilder->context.element_stack[
949 		treebuilder->context.current_node].node,
950 		treebuilder->context.current_node);
951 	if (err != HUBBUB_OK) {
952 		hubbub_ns ns;
953 		element_type type;
954 		void *node;
955 
956 		remove_node_from_dom(treebuilder,
957 				treebuilder->context.element_stack[
958 				treebuilder->context.current_node].node);
959 
960 		element_stack_pop(treebuilder, &ns, &type, &node);
961 
962 		/* Unref twice (once for stack, once for formatting list) */
963 		treebuilder->tree_handler->unref_node(
964 				treebuilder->tree_handler->ctx, node);
965 
966 		treebuilder->tree_handler->unref_node(
967 				treebuilder->tree_handler->ctx, node);
968 
969 		return err;
970 	}
971 
972 	return HUBBUB_OK;
973 }
974 
975 /**
976  * Process a button start tag as if in "in body"
977  *
978  * \param treebuilder  The treebuilder instance
979  * \param token        The token to process
980  */
process_button_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)981 hubbub_error process_button_in_body(hubbub_treebuilder *treebuilder,
982 		const hubbub_token *token)
983 {
984 	hubbub_error err;
985 
986 	if (element_in_scope(treebuilder, BUTTON, false)) {
987 		/** \todo parse error */
988 
989 		/* Act as if </button> has been seen */
990 		err = process_0applet_button_marquee_object_in_body(
991 				treebuilder, BUTTON);
992 		assert(err == HUBBUB_OK);
993 	}
994 
995 	err = reconstruct_active_formatting_list(treebuilder);
996 	if (err != HUBBUB_OK)
997 		return err;
998 
999 	err = insert_element(treebuilder, &token->data.tag, true);
1000 	if (err != HUBBUB_OK)
1001 		return err;
1002 
1003 	treebuilder->tree_handler->ref_node(
1004 		treebuilder->tree_handler->ctx,
1005 		treebuilder->context.element_stack[
1006 		treebuilder->context.current_node].node);
1007 
1008 	err = formatting_list_append(treebuilder, token->data.tag.ns, BUTTON,
1009 		treebuilder->context.element_stack[
1010 		treebuilder->context.current_node].node,
1011 		treebuilder->context.current_node);
1012 	if (err != HUBBUB_OK) {
1013 		hubbub_ns ns;
1014 		element_type type;
1015 		void *node;
1016 
1017 		remove_node_from_dom(treebuilder,
1018 				treebuilder->context.element_stack[
1019 				treebuilder->context.current_node].node);
1020 
1021 		element_stack_pop(treebuilder, &ns, &type, &node);
1022 
1023 		/* Unref twice (once for stack, once for formatting list) */
1024 		treebuilder->tree_handler->unref_node(
1025 				treebuilder->tree_handler->ctx, node);
1026 
1027 		treebuilder->tree_handler->unref_node(
1028 				treebuilder->tree_handler->ctx, node);
1029 
1030 		return err;
1031 	}
1032 
1033 	treebuilder->context.frameset_ok = false;
1034 
1035 	return HUBBUB_OK;
1036 }
1037 
1038 /**
1039  * Process an applet, marquee or object start tag as if in "in body"
1040  *
1041  * \param treebuilder  The treebuilder instance
1042  * \param token        The token to process
1043  * \param type         The element type
1044  */
process_applet_marquee_object_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token,element_type type)1045 hubbub_error process_applet_marquee_object_in_body(
1046 		hubbub_treebuilder *treebuilder,
1047 		const hubbub_token *token, element_type type)
1048 {
1049 	hubbub_error err;
1050 
1051 	err = reconstruct_active_formatting_list(treebuilder);
1052 	if (err != HUBBUB_OK)
1053 		return err;
1054 
1055 	err = insert_element(treebuilder, &token->data.tag, true);
1056 	if (err != HUBBUB_OK)
1057 		return err;
1058 
1059 	treebuilder->tree_handler->ref_node(
1060 		treebuilder->tree_handler->ctx,
1061 		treebuilder->context.element_stack[
1062 		treebuilder->context.current_node].node);
1063 
1064 	err = formatting_list_append(treebuilder, token->data.tag.ns, type,
1065 		treebuilder->context.element_stack[
1066 		treebuilder->context.current_node].node,
1067 		treebuilder->context.current_node);
1068 	if (err != HUBBUB_OK) {
1069 		hubbub_ns ns;
1070 		element_type type;
1071 		void *node;
1072 
1073 		remove_node_from_dom(treebuilder,
1074 				treebuilder->context.element_stack[
1075 				treebuilder->context.current_node].node);
1076 
1077 		element_stack_pop(treebuilder, &ns, &type, &node);
1078 
1079 		/* Unref twice (once for stack, once for formatting list) */
1080 		treebuilder->tree_handler->unref_node(
1081 				treebuilder->tree_handler->ctx, node);
1082 
1083 		treebuilder->tree_handler->unref_node(
1084 				treebuilder->tree_handler->ctx, node);
1085 
1086 		return err;
1087 	}
1088 
1089 	treebuilder->context.frameset_ok = false;
1090 
1091 	return HUBBUB_OK;
1092 }
1093 
1094 /**
1095  * Process an hr start tag as if in "in body"
1096  *
1097  * \param treebuilder  The treebuilder instance
1098  * \param token        The token to process
1099  */
process_hr_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)1100 hubbub_error process_hr_in_body(hubbub_treebuilder *treebuilder,
1101 		const hubbub_token *token)
1102 {
1103 	hubbub_error err;
1104 
1105 	if (element_in_scope(treebuilder, P, false)) {
1106 		err = process_0p_in_body(treebuilder);
1107 		if (err != HUBBUB_OK)
1108 			return err;
1109 	}
1110 
1111 	err = insert_element(treebuilder, &token->data.tag, false);
1112 	if (err == HUBBUB_OK)
1113 		treebuilder->context.frameset_ok = false;
1114 
1115 	return err;
1116 }
1117 
1118 /**
1119  * Process an image start tag as if in "in body"
1120  *
1121  * \param treebuilder  The treebuilder instance
1122  * \param token        The token to process
1123  */
process_image_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)1124 hubbub_error process_image_in_body(hubbub_treebuilder *treebuilder,
1125 		const hubbub_token *token)
1126 {
1127 	hubbub_error err;
1128 	hubbub_tag tag;
1129 
1130 	tag.ns = HUBBUB_NS_HTML;
1131 	tag.name.ptr = (const uint8_t *) "img";
1132 	tag.name.len = SLEN("img");
1133 
1134 	tag.n_attributes = token->data.tag.n_attributes;
1135 	tag.attributes = token->data.tag.attributes;
1136 
1137 	err = reconstruct_active_formatting_list(treebuilder);
1138 	if (err != HUBBUB_OK)
1139 		return err;
1140 
1141 	return insert_element(treebuilder, &tag, false);
1142 }
1143 
1144 /**
1145  * Process an isindex start tag as if in "in body"
1146  *
1147  * \param treebuilder  The treebuilder instance
1148  * \param token        The token to process
1149  */
process_isindex_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)1150 hubbub_error process_isindex_in_body(hubbub_treebuilder *treebuilder,
1151 		const hubbub_token *token)
1152 {
1153 	hubbub_error err;
1154 	hubbub_token dummy;
1155 	hubbub_attribute *action = NULL;
1156 	hubbub_attribute *prompt = NULL;
1157 	hubbub_attribute *attrs = NULL;
1158 	size_t n_attrs = 0;
1159 
1160 	/** \todo parse error */
1161 
1162 	if (treebuilder->context.form_element != NULL)
1163 		return HUBBUB_OK;
1164 
1165 	/* First up, clone the token's attributes */
1166 	if (token->data.tag.n_attributes > 0) {
1167 		uint32_t i;
1168 		attrs = malloc((token->data.tag.n_attributes + 1) *
1169 						sizeof(hubbub_attribute));
1170 		if (attrs == NULL)
1171 			return HUBBUB_NOMEM;
1172 
1173 		for (i = 0; i < token->data.tag.n_attributes; i++) {
1174 			hubbub_attribute *attr = &token->data.tag.attributes[i];
1175 			const uint8_t *name = attr->name.ptr;
1176 
1177 			if (strncmp((const char *) name, "action",
1178 					attr->name.len) == 0) {
1179 				action = attr;
1180 			} else if (strncmp((const char *) name, "prompt",
1181 					attr->name.len) == 0) {
1182 				prompt = attr;
1183 			} else if (strncmp((const char *) name, "name",
1184 					attr->name.len) == 0) {
1185 			} else {
1186 				attrs[n_attrs++] = *attr;
1187 			}
1188 		}
1189 
1190 		attrs[n_attrs].ns = HUBBUB_NS_HTML;
1191 		attrs[n_attrs].name.ptr = (const uint8_t *) "name";
1192 		attrs[n_attrs].name.len = SLEN("name");
1193 		attrs[n_attrs].value.ptr = (const uint8_t *) "isindex";
1194 		attrs[n_attrs].value.len = SLEN("isindex");
1195 		n_attrs++;
1196 	}
1197 
1198 	/* isindex algorithm */
1199 
1200 	/* Set up dummy as a start tag token */
1201 	dummy.type = HUBBUB_TOKEN_START_TAG;
1202 	dummy.data.tag.ns = HUBBUB_NS_HTML;
1203 
1204 	/* Act as if <form> were seen */
1205 	dummy.data.tag.name.ptr = (const uint8_t *) "form";
1206 	dummy.data.tag.name.len = SLEN("form");
1207 
1208 	dummy.data.tag.n_attributes = action != NULL ? 1 : 0;
1209 	dummy.data.tag.attributes = action;
1210 
1211 	err = process_form_in_body(treebuilder, &dummy);
1212 	if (err != HUBBUB_OK) {
1213 		free(attrs);
1214 		return err;
1215 	}
1216 
1217 	/* Act as if <hr> were seen */
1218 	dummy.data.tag.name.ptr = (const uint8_t *) "hr";
1219 	dummy.data.tag.name.len = SLEN("hr");
1220 	dummy.data.tag.n_attributes = 0;
1221 	dummy.data.tag.attributes = NULL;
1222 
1223 	err = process_hr_in_body(treebuilder, &dummy);
1224 	if (err != HUBBUB_OK) {
1225 		free(attrs);
1226 		return err;
1227 	}
1228 
1229 	/* Act as if <p> were seen */
1230 	dummy.data.tag.name.ptr = (const uint8_t *) "p";
1231 	dummy.data.tag.name.len = SLEN("p");
1232 	dummy.data.tag.n_attributes = 0;
1233 	dummy.data.tag.attributes = NULL;
1234 
1235 	err = process_container_in_body(treebuilder, &dummy);
1236 	if (err != HUBBUB_OK) {
1237 		free(attrs);
1238 		return err;
1239 	}
1240 
1241 	/* Act as if <label> were seen */
1242 	dummy.data.tag.name.ptr = (const uint8_t *) "label";
1243 	dummy.data.tag.name.len = SLEN("label");
1244 	dummy.data.tag.n_attributes = 0;
1245 	dummy.data.tag.attributes = NULL;
1246 
1247 	err = process_phrasing_in_body(treebuilder, &dummy);
1248 	if (err != HUBBUB_OK) {
1249 		free(attrs);
1250 		return err;
1251 	}
1252 
1253 	/* Act as if a stream of characters were seen */
1254 	dummy.type = HUBBUB_TOKEN_CHARACTER;
1255 	if (prompt != NULL) {
1256 		dummy.data.character = prompt->value;
1257 	} else {
1258 		/** \todo Localisation */
1259 #define PROMPT "This is a searchable index. Insert your search keywords here: "
1260 		dummy.data.character.ptr = (const uint8_t *) PROMPT;
1261 		dummy.data.character.len = SLEN(PROMPT);
1262 #undef PROMPT
1263 	}
1264 
1265 	err = process_character(treebuilder, &dummy);
1266 	if (err != HUBBUB_OK) {
1267 		free(attrs);
1268 		return err;
1269 	}
1270 
1271 	/* Act as if <input> was seen */
1272 	dummy.type = HUBBUB_TOKEN_START_TAG;
1273 	dummy.data.tag.ns = HUBBUB_NS_HTML;
1274 	dummy.data.tag.name.ptr = (const uint8_t *) "input";
1275 	dummy.data.tag.name.len = SLEN("input");
1276 
1277 	dummy.data.tag.n_attributes = n_attrs;
1278 	dummy.data.tag.attributes = attrs;
1279 
1280 	err = reconstruct_active_formatting_list(treebuilder);
1281 	if (err != HUBBUB_OK) {
1282 		free(attrs);
1283 		return err;
1284 	}
1285 
1286 	err = insert_element(treebuilder, &dummy.data.tag, false);
1287 	if (err != HUBBUB_OK) {
1288 		free(attrs);
1289 		return err;
1290 	}
1291 
1292 	/* No longer need attrs */
1293 	free(attrs);
1294 
1295 	treebuilder->context.frameset_ok = false;
1296 
1297 	/* Act as if </label> was seen */
1298 	err = process_0generic_in_body(treebuilder, LABEL);
1299 	assert(err == HUBBUB_OK);
1300 
1301 	/* Act as if </p> was seen */
1302 	err = process_0p_in_body(treebuilder);
1303 	if (err != HUBBUB_OK)
1304 		return err;
1305 
1306 	/* Act as if <hr> was seen */
1307 	dummy.data.tag.name.ptr = (const uint8_t *) "hr";
1308 	dummy.data.tag.name.len = SLEN("hr");
1309 	dummy.data.tag.n_attributes = 0;
1310 	dummy.data.tag.attributes = NULL;
1311 
1312 	err = process_hr_in_body(treebuilder, &dummy);
1313 	if (err != HUBBUB_OK)
1314 		return err;
1315 
1316 	/* Act as if </form> was seen */
1317 	return process_0container_in_body(treebuilder, FORM);
1318 }
1319 
1320 /**
1321  * Process a textarea start tag as if in "in body"
1322  *
1323  * \param treebuilder  The treebuilder instance
1324  * \param token        The token to process
1325  */
process_textarea_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)1326 hubbub_error process_textarea_in_body(hubbub_treebuilder *treebuilder,
1327 		const hubbub_token *token)
1328 {
1329 	treebuilder->context.strip_leading_lr = true;
1330 	treebuilder->context.frameset_ok = false;
1331 	return parse_generic_rcdata(treebuilder, token, true);
1332 }
1333 
1334 /**
1335  * Process a select start tag as if in "in body"
1336  *
1337  * \param treebuilder  The treebuilder instance
1338  * \param token        The token to process
1339  */
process_select_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)1340 hubbub_error process_select_in_body(hubbub_treebuilder *treebuilder,
1341 		const hubbub_token *token)
1342 {
1343 	hubbub_error err;
1344 
1345 	err = reconstruct_active_formatting_list(treebuilder);
1346 	if (err != HUBBUB_OK)
1347 		return err;
1348 
1349 	err = insert_element(treebuilder, &token->data.tag, true);
1350 	if (err == HUBBUB_OK)
1351 		treebuilder->context.frameset_ok = false;
1352 
1353 	return err;
1354 }
1355 
1356 /**
1357  * Process an option or optgroup start tag as if in "in body"
1358  *
1359  * \param treebuilder  The treebuilder instance
1360  * \param token        The token to process
1361  */
process_opt_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)1362 hubbub_error process_opt_in_body(hubbub_treebuilder *treebuilder,
1363 		const hubbub_token *token)
1364 {
1365 	hubbub_error err;
1366 
1367 	if (element_in_scope(treebuilder, OPTION, false)) {
1368 		err = process_0generic_in_body(treebuilder, OPTION);
1369 		/* Cannot fail */
1370 		assert(err == HUBBUB_OK);
1371 	}
1372 
1373 	err = reconstruct_active_formatting_list(treebuilder);
1374 	if (err != HUBBUB_OK)
1375 		return err;
1376 
1377 	return insert_element(treebuilder, &token->data.tag, true);
1378 }
1379 
1380 /**
1381  * Process a phrasing start tag as if in "in body"
1382  *
1383  * \param treebuilder  The treebuilder instance
1384  * \param token        The token to process
1385  */
process_phrasing_in_body(hubbub_treebuilder * treebuilder,const hubbub_token * token)1386 hubbub_error process_phrasing_in_body(hubbub_treebuilder *treebuilder,
1387 		const hubbub_token *token)
1388 {
1389 	hubbub_error err;
1390 
1391 	err = reconstruct_active_formatting_list(treebuilder);
1392 	if (err != HUBBUB_OK)
1393 		return err;
1394 
1395 	return insert_element(treebuilder, &token->data.tag, true);
1396 }
1397 
1398 /**
1399  * Process a body end tag as if in "in body"
1400  *
1401  * \param treebuilder  The treebuilder instance
1402  * \return True if processed, false otherwise
1403  */
process_0body_in_body(hubbub_treebuilder * treebuilder)1404 hubbub_error process_0body_in_body(hubbub_treebuilder *treebuilder)
1405 {
1406 	hubbub_error err = HUBBUB_OK;
1407 
1408 	if (!element_in_scope(treebuilder, BODY, false)) {
1409 		/** \todo parse error */
1410 	} else {
1411 		element_context *stack = treebuilder->context.element_stack;
1412 		uint32_t node;
1413 
1414 		for (node = treebuilder->context.current_node;
1415 				node > 0; node--) {
1416 			element_type ntype = stack[node].type;
1417 
1418 			if (ntype != DD && ntype != DT && ntype != LI &&
1419 					ntype != OPTGROUP && ntype != OPTION &&
1420 					ntype != P && ntype != RP &&
1421 					ntype != RT && ntype != TBODY &&
1422 					ntype != TD && ntype != TFOOT &&
1423 					ntype != TH && ntype != THEAD &&
1424 					ntype != TR && ntype != BODY) {
1425 				/** \todo parse error */
1426 			}
1427 		}
1428 
1429 		if (treebuilder->context.mode == IN_BODY)
1430 			treebuilder->context.mode = AFTER_BODY;
1431 
1432 		err = HUBBUB_REPROCESS;
1433 	}
1434 
1435 	return err;
1436 }
1437 
1438 /**
1439  * Process a container end tag as if in "in body"
1440  *
1441  * \param treebuilder  The treebuilder instance
1442  * \param type         The element type
1443  */
process_0container_in_body(hubbub_treebuilder * treebuilder,element_type type)1444 hubbub_error process_0container_in_body(hubbub_treebuilder *treebuilder,
1445 		element_type type)
1446 {
1447 	if (!element_in_scope(treebuilder, type, false)) {
1448 		/** \todo parse error */
1449 	} else {
1450 		uint32_t popped = 0;
1451 		element_type otype;
1452 
1453 		close_implied_end_tags(treebuilder, UNKNOWN);
1454 
1455 		do {
1456 			hubbub_ns ns;
1457 			void *node;
1458 
1459 			element_stack_pop(treebuilder, &ns, &otype, &node);
1460 
1461 			treebuilder->tree_handler->unref_node(
1462 					treebuilder->tree_handler->ctx,
1463 					node);
1464 
1465 			popped++;
1466 		} while (otype != type);
1467 
1468 		if (popped > 1) {
1469 			/** \todo parse error */
1470 		}
1471 	}
1472 
1473 	return HUBBUB_OK;
1474 }
1475 
1476 /**
1477  * Process a form end tag as if in "in body"
1478  *
1479  * \param treebuilder  The treebuilder instance
1480  */
process_0form_in_body(hubbub_treebuilder * treebuilder)1481 hubbub_error process_0form_in_body(hubbub_treebuilder *treebuilder)
1482 {
1483 	void *node = treebuilder->context.form_element;
1484 	uint32_t idx = 0;
1485 
1486 	if (treebuilder->context.form_element != NULL)
1487 		treebuilder->tree_handler->unref_node(
1488 				treebuilder->tree_handler->ctx,
1489 				treebuilder->context.form_element);
1490 	treebuilder->context.form_element = NULL;
1491 
1492 	idx = element_in_scope(treebuilder, FORM, false);
1493 
1494 	if (idx == 0 || node == NULL ||
1495 			treebuilder->context.element_stack[idx].node != node) {
1496 		/** \todo parse error */
1497 	} else {
1498 		hubbub_ns ns;
1499 		element_type otype;
1500 		void *onode;
1501 
1502 		close_implied_end_tags(treebuilder, UNKNOWN);
1503 
1504 		if (treebuilder->context.element_stack[
1505 				treebuilder->context.current_node].node !=
1506 				node) {
1507 			/** \todo parse error */
1508 		}
1509 
1510 		element_stack_remove(treebuilder, idx,
1511 				&ns, &otype, &onode);
1512 
1513 		treebuilder->tree_handler->unref_node(
1514 				treebuilder->tree_handler->ctx,
1515 				onode);
1516 	}
1517 
1518 	return HUBBUB_OK;
1519 }
1520 
1521 
1522 /**
1523  * Process a p end tag as if in "in body"
1524  *
1525  * \param treebuilder  The treebuilder instance
1526  */
process_0p_in_body(hubbub_treebuilder * treebuilder)1527 hubbub_error process_0p_in_body(hubbub_treebuilder *treebuilder)
1528 {
1529 	hubbub_error err = HUBBUB_OK;
1530 	uint32_t popped = 0;
1531 
1532 	if (treebuilder->context.element_stack[
1533 			treebuilder->context.current_node].type != P) {
1534 		/** \todo parse error */
1535 	}
1536 
1537 	while (element_in_scope(treebuilder, P, false)) {
1538 		hubbub_ns ns;
1539 		element_type type;
1540 		void *node;
1541 
1542 		err = element_stack_pop(treebuilder, &ns, &type, &node);
1543 		assert(err == HUBBUB_OK);
1544 
1545 		treebuilder->tree_handler->unref_node(
1546 				treebuilder->tree_handler->ctx, node);
1547 
1548 		popped++;
1549 	}
1550 
1551 	if (popped == 0) {
1552 		hubbub_token dummy;
1553 
1554 		dummy.type = HUBBUB_TOKEN_START_TAG;
1555 		dummy.data.tag.ns = HUBBUB_NS_HTML;
1556 		dummy.data.tag.name.ptr = (const uint8_t *) "p";
1557 		dummy.data.tag.name.len = SLEN("p");
1558 		dummy.data.tag.n_attributes = 0;
1559 		dummy.data.tag.attributes = NULL;
1560 
1561 		err = process_container_in_body(treebuilder, &dummy);
1562 		if (err != HUBBUB_OK)
1563 			return err;
1564 
1565 		/* Reprocess the end tag. This is safe as we've just
1566 		 * inserted a <p> into the current scope */
1567 		err = process_0p_in_body(treebuilder);
1568 		/* Cannot fail */
1569 		assert(err == HUBBUB_OK);
1570 	}
1571 
1572 	return err;
1573 }
1574 
1575 /**
1576  * Process a dd, dt, or li end tag as if in "in body"
1577  *
1578  * \param treebuilder  The treebuilder instance
1579  * \param type         The element type
1580  */
process_0dd_dt_li_in_body(hubbub_treebuilder * treebuilder,element_type type)1581 hubbub_error process_0dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
1582 		element_type type)
1583 {
1584 	if (!element_in_scope(treebuilder, type, false)) {
1585 		/** \todo parse error */
1586 	} else {
1587 		uint32_t popped = 0;
1588 		element_type otype;
1589 
1590 		close_implied_end_tags(treebuilder, type);
1591 
1592 		do {
1593 			hubbub_ns ns;
1594 			void *node;
1595 
1596 			element_stack_pop(treebuilder,
1597 					&ns, &otype, &node);
1598 
1599 			treebuilder->tree_handler->unref_node(
1600 					treebuilder->tree_handler->ctx,
1601 					node);
1602 
1603 			popped++;
1604 		} while (otype != type);
1605 
1606 		if (popped > 1) {
1607 			/** \todo parse error */
1608 		}
1609 	}
1610 
1611 	return HUBBUB_OK;
1612 }
1613 
1614 /**
1615  * Process a h1, h2, h3, h4, h5, or h6 end tag as if in "in body"
1616  *
1617  * \param treebuilder  The treebuilder instance
1618  * \param type         The element type
1619  */
process_0h_in_body(hubbub_treebuilder * treebuilder,element_type type)1620 hubbub_error process_0h_in_body(hubbub_treebuilder *treebuilder,
1621 		element_type type)
1622 {
1623 	UNUSED(type);
1624 
1625 	/** \todo optimise this */
1626 	if (element_in_scope(treebuilder, H1, false) ||
1627 			element_in_scope(treebuilder, H2, false) ||
1628 			element_in_scope(treebuilder, H3, false) ||
1629 			element_in_scope(treebuilder, H4, false) ||
1630 			element_in_scope(treebuilder, H5, false) ||
1631 			element_in_scope(treebuilder, H6, false)) {
1632 		uint32_t popped = 0;
1633 		element_type otype;
1634 
1635 		close_implied_end_tags(treebuilder, UNKNOWN);
1636 
1637 		do {
1638 			hubbub_ns ns;
1639 			void *node;
1640 
1641 			element_stack_pop(treebuilder, &ns, &otype, &node);
1642 
1643 			treebuilder->tree_handler->unref_node(
1644 					treebuilder->tree_handler->ctx,
1645 					node);
1646 
1647 			popped++;
1648 		} while (otype != H1 && otype != H2 &&
1649 				otype != H3 && otype != H4 &&
1650 				otype != H5 && otype != H6);
1651 
1652 		if (popped > 1) {
1653 			/** \todo parse error */
1654 		}
1655 	} else {
1656 		/** \todo parse error */
1657 	}
1658 
1659 	return HUBBUB_OK;
1660 }
1661 
1662 /**
1663  * Process a presentational end tag as if in "in body"
1664  *
1665  * \param treebuilder  The treebuilder instance
1666  * \param type         The element type
1667  */
process_0presentational_in_body(hubbub_treebuilder * treebuilder,element_type type)1668 hubbub_error process_0presentational_in_body(hubbub_treebuilder *treebuilder,
1669 		element_type type)
1670 {
1671 	hubbub_error err;
1672 
1673 	/* Welcome to the adoption agency */
1674 
1675 	while (true) {
1676 		element_context *stack = treebuilder->context.element_stack;
1677 
1678 		formatting_list_entry *entry;
1679 		uint32_t formatting_element;
1680 		uint32_t common_ancestor;
1681 		uint32_t furthest_block;
1682 		bookmark bookmark;
1683 		uint32_t last_node;
1684 		void *reparented;
1685 		void *fe_clone = NULL;
1686 		void *clone_appended = NULL;
1687 		hubbub_ns ons;
1688 		element_type otype;
1689 		void *onode;
1690 		uint32_t oindex;
1691 
1692 		/* 1 */
1693 		err = aa_find_and_validate_formatting_element(treebuilder,
1694 				type, &entry);
1695 		assert(err == HUBBUB_OK || err == HUBBUB_REPROCESS);
1696 		if (err == HUBBUB_OK)
1697 			return err;
1698 
1699 		assert(entry->details.type == type);
1700 
1701 		/* Take a copy of the stack index for use
1702 		 * during stack manipulation */
1703 		formatting_element = entry->stack_index;
1704 
1705 		/* 2 & 3 */
1706 		err = aa_find_furthest_block(treebuilder,
1707 				entry, &furthest_block);
1708 		assert(err == HUBBUB_OK || err == HUBBUB_REPROCESS);
1709 		if (err == HUBBUB_OK)
1710 			return err;
1711 
1712 		/* 4 */
1713 		common_ancestor = formatting_element - 1;
1714 
1715 		/* 5 */
1716 		bookmark.prev = entry->prev;
1717 		bookmark.next = entry->next;
1718 
1719 		/* 6 */
1720 		err = aa_find_bookmark_location_reparenting_misnested(
1721 				treebuilder, formatting_element,
1722 				&furthest_block, &bookmark, &last_node);
1723 		if (err != HUBBUB_OK)
1724 			return err;
1725 
1726 		/* 7 */
1727 		if (stack[common_ancestor].type == TABLE ||
1728 				stack[common_ancestor].type == TBODY ||
1729 				stack[common_ancestor].type == TFOOT ||
1730 				stack[common_ancestor].type == THEAD ||
1731 				stack[common_ancestor].type == TR) {
1732 			err = aa_insert_into_foster_parent(treebuilder,
1733 					stack[last_node].node, &reparented);
1734 		} else {
1735 			err = aa_reparent_node(treebuilder,
1736 					stack[last_node].node,
1737 					stack[common_ancestor].node,
1738 					&reparented);
1739 		}
1740 		if (err != HUBBUB_OK)
1741 			return err;
1742 
1743 		treebuilder->tree_handler->unref_node(
1744 				treebuilder->tree_handler->ctx,
1745 				stack[last_node].node);
1746 
1747 		/* If the reparented node is not the same as the one we were
1748 		 * previously using, then have it take the place of the other
1749 		 * one in the formatting list and stack. */
1750 		if (reparented != stack[last_node].node) {
1751 			struct formatting_list_entry *node_entry;
1752 			for (node_entry = treebuilder->context.formatting_list_end;
1753 					node_entry != NULL;
1754 					node_entry = node_entry->prev) {
1755 				if (node_entry->stack_index == last_node) {
1756 					treebuilder->tree_handler->ref_node(
1757 						treebuilder->tree_handler->ctx,
1758 						reparented);
1759 					node_entry->details.node = reparented;
1760 					treebuilder->tree_handler->unref_node(
1761 						treebuilder->tree_handler->ctx,
1762 						stack[last_node].node);
1763 					break;
1764 				}
1765 			}
1766 			/* Already have enough references, so don't need to
1767 			 * explicitly reference it here. */
1768 			stack[last_node].node = reparented;
1769 		}
1770 
1771 		/* 8 */
1772 		err = treebuilder->tree_handler->clone_node(
1773 				treebuilder->tree_handler->ctx,
1774 				entry->details.node, false, &fe_clone);
1775 		if (err != HUBBUB_OK)
1776 			return err;
1777 
1778 		/* 9 */
1779 		err = treebuilder->tree_handler->reparent_children(
1780 				treebuilder->tree_handler->ctx,
1781 				stack[furthest_block].node, fe_clone);
1782 		if (err != HUBBUB_OK) {
1783 			treebuilder->tree_handler->unref_node(
1784 					treebuilder->tree_handler->ctx,
1785 					fe_clone);
1786 			return err;
1787 		}
1788 
1789 		/* 10 */
1790 		err = treebuilder->tree_handler->append_child(
1791 				treebuilder->tree_handler->ctx,
1792 				stack[furthest_block].node, fe_clone,
1793 				&clone_appended);
1794 		if (err != HUBBUB_OK) {
1795 			treebuilder->tree_handler->unref_node(
1796 					treebuilder->tree_handler->ctx,
1797 					fe_clone);
1798 			return err;
1799 		}
1800 
1801 		if (clone_appended != fe_clone) {
1802 			/* No longer interested in fe_clone */
1803 			treebuilder->tree_handler->unref_node(
1804 					treebuilder->tree_handler->ctx,
1805 					fe_clone);
1806 			/* Need an extra reference, as we'll insert into the
1807 			 * formatting list and element stack */
1808 			treebuilder->tree_handler->ref_node(
1809 					treebuilder->tree_handler->ctx,
1810 					clone_appended);
1811 		}
1812 
1813 		/* 11 and 12 are reversed here so that we know the correct
1814 		 * stack index to use when inserting into the formatting list */
1815 
1816 		/* 12 */
1817 		err = aa_remove_element_stack_item(treebuilder,
1818 				formatting_element, furthest_block);
1819 		assert(err == HUBBUB_OK);
1820 
1821 		/* Fix up furthest block index */
1822 		furthest_block--;
1823 
1824 		/* Now, in the gap after furthest block,
1825 		 * we insert an entry for clone */
1826 		stack[furthest_block + 1].type = entry->details.type;
1827 		stack[furthest_block + 1].node = clone_appended;
1828 
1829 		/* 11 */
1830 		err = formatting_list_remove(treebuilder, entry,
1831 				&ons, &otype, &onode, &oindex);
1832 		assert(err == HUBBUB_OK);
1833 
1834 		treebuilder->tree_handler->unref_node(
1835 				treebuilder->tree_handler->ctx,	onode);
1836 
1837 		err = formatting_list_insert(treebuilder,
1838 				bookmark.prev, bookmark.next,
1839 				ons, otype, clone_appended, furthest_block + 1);
1840 		if (err != HUBBUB_OK) {
1841 			treebuilder->tree_handler->unref_node(
1842 					treebuilder->tree_handler->ctx,
1843 					clone_appended);
1844 			return err;
1845 		}
1846 
1847 		/* 13 */
1848 	}
1849 }
1850 
1851 /**
1852  * Adoption agency: find and validate the formatting element
1853  *
1854  * \param treebuilder  The treebuilder instance
1855  * \param type         Element type to search for
1856  * \param element      Pointer to location to receive list entry
1857  * \return HUBBUB_REPROCESS to continue processing,
1858  *         HUBBUB_OK to stop.
1859  */
aa_find_and_validate_formatting_element(hubbub_treebuilder * treebuilder,element_type type,formatting_list_entry ** element)1860 hubbub_error aa_find_and_validate_formatting_element(
1861 		hubbub_treebuilder *treebuilder,
1862 		element_type type, formatting_list_entry **element)
1863 {
1864 	formatting_list_entry *entry;
1865 
1866 	entry = aa_find_formatting_element(treebuilder, type);
1867 
1868 	if (entry == NULL || (entry->stack_index != 0 &&
1869 			element_in_scope(treebuilder, entry->details.type,
1870 					false) != entry->stack_index)) {
1871 		/** \todo parse error */
1872 		return HUBBUB_OK;
1873 	}
1874 
1875 	if (entry->stack_index == 0) {
1876 		/* Not in element stack => remove from formatting list */
1877 		hubbub_ns ns;
1878 		element_type type;
1879 		void *node;
1880 		uint32_t index;
1881 
1882 		/** \todo parse error */
1883 
1884 		formatting_list_remove(treebuilder, entry,
1885 				&ns, &type, &node, &index);
1886 
1887 		treebuilder->tree_handler->unref_node(
1888 				treebuilder->tree_handler->ctx, node);
1889 
1890 		return HUBBUB_OK;
1891 	}
1892 
1893 	if (entry->stack_index != treebuilder->context.current_node) {
1894 		/** \todo parse error */
1895 	}
1896 
1897 	*element = entry;
1898 
1899 	return HUBBUB_REPROCESS;
1900 }
1901 
1902 /**
1903  * Adoption agency: find formatting element
1904  *
1905  * \param treebuilder  The treebuilder instance
1906  * \param type         Type of element to search for
1907  * \return Pointer to formatting element, or NULL if none found
1908  */
aa_find_formatting_element(hubbub_treebuilder * treebuilder,element_type type)1909 formatting_list_entry *aa_find_formatting_element(
1910 		hubbub_treebuilder *treebuilder, element_type type)
1911 {
1912 	formatting_list_entry *entry;
1913 
1914 	for (entry = treebuilder->context.formatting_list_end;
1915 			entry != NULL; entry = entry->prev) {
1916 
1917 		/* Assumption: HTML and TABLE elements are not in the list */
1918 		if (is_scoping_element(entry->details.type) ||
1919 				entry->details.type == type)
1920 			break;
1921 	}
1922 
1923 	/* Check if we stopped on a marker, rather than a formatting element */
1924 	if (entry != NULL && is_scoping_element(entry->details.type))
1925 		entry = NULL;
1926 
1927 	return entry;
1928 }
1929 
1930 /**
1931  * Adoption agency: find furthest block
1932  *
1933  * \param treebuilder         The treebuilder instance
1934  * \param formatting_element  The formatting element
1935  * \param furthest_block      Pointer to location to receive furthest block
1936  * \return HUBBUB_REPROCESS to continue processing (::furthest_block filled in),
1937  *         HUBBUB_OK to stop.
1938  */
aa_find_furthest_block(hubbub_treebuilder * treebuilder,formatting_list_entry * formatting_element,uint32_t * furthest_block)1939 hubbub_error aa_find_furthest_block(hubbub_treebuilder *treebuilder,
1940 		formatting_list_entry *formatting_element,
1941 		uint32_t *furthest_block)
1942 {
1943 	uint32_t fe_index = formatting_element->stack_index;
1944 	uint32_t fb;
1945 
1946 	for (fb = fe_index + 1; fb <= treebuilder->context.current_node; fb++) {
1947 		element_type type = treebuilder->context.element_stack[fb].type;
1948 
1949 		if (!(is_phrasing_element(type) || is_formatting_element(type)))
1950 			break;
1951 	}
1952 
1953 	if (fb > treebuilder->context.current_node) {
1954 		hubbub_ns ns;
1955 		element_type type;
1956 		void *node;
1957 		uint32_t index;
1958 
1959 		/* Pop all elements off the stack up to,
1960 		 * and including, the formatting element */
1961 		do {
1962 			element_stack_pop(treebuilder, &ns, &type, &node);
1963 
1964 			treebuilder->tree_handler->unref_node(
1965 					treebuilder->tree_handler->ctx,
1966 					node);
1967 		} while (treebuilder->context.current_node >= fe_index);
1968 
1969 		/* Remove the formatting element from the list */
1970 		formatting_list_remove(treebuilder, formatting_element,
1971 				&ns, &type, &node, &index);
1972 
1973 		treebuilder->tree_handler->unref_node(
1974 				treebuilder->tree_handler->ctx, node);
1975 
1976 		return HUBBUB_OK;
1977 	}
1978 
1979 	*furthest_block = fb;
1980 
1981 	return HUBBUB_REPROCESS;
1982 }
1983 
1984 /**
1985  * Adoption agency: reparent a node
1986  *
1987  * \param treebuilder  The treebuilder instance
1988  * \param node         The node to reparent
1989  * \param new_parent   The new parent
1990  * \param reparented   Pointer to location to receive reparented node
1991  * \return HUBBUB_OK on success, appropriate error otherwise
1992  */
aa_reparent_node(hubbub_treebuilder * treebuilder,void * node,void * new_parent,void ** reparented)1993 hubbub_error aa_reparent_node(hubbub_treebuilder *treebuilder, void *node,
1994 		void *new_parent, void **reparented)
1995 {
1996 	hubbub_error err;
1997 
1998 	err = remove_node_from_dom(treebuilder, node);
1999 	if (err != HUBBUB_OK)
2000 		return err;
2001 
2002 	return treebuilder->tree_handler->append_child(
2003 			treebuilder->tree_handler->ctx,
2004 			new_parent, node, reparented);
2005 }
2006 
2007 /**
2008  * Adoption agency: this is step 6
2009  *
2010  * \param treebuilder         The treebuilder instance
2011  * \param formatting_element  The stack index of the formatting element
2012  * \param furthest_block      Pointer to index of furthest block in element
2013  *                            stack (updated on exit)
2014  * \param bookmark            Pointer to bookmark (pre-initialised)
2015  * \param last_node           Pointer to location to receive index of last node
2016  */
aa_find_bookmark_location_reparenting_misnested(hubbub_treebuilder * treebuilder,uint32_t formatting_element,uint32_t * furthest_block,bookmark * bookmark,uint32_t * last_node)2017 hubbub_error aa_find_bookmark_location_reparenting_misnested(
2018 		hubbub_treebuilder *treebuilder,
2019 		uint32_t formatting_element, uint32_t *furthest_block,
2020 		bookmark *bookmark, uint32_t *last_node)
2021 {
2022 	hubbub_error err;
2023 	element_context *stack = treebuilder->context.element_stack;
2024 	uint32_t node, last, fb;
2025 	formatting_list_entry *node_entry;
2026 
2027 	node = last = fb = *furthest_block;
2028 
2029 	while (true) {
2030 		void *reparented;
2031 
2032 		/* i */
2033 		node--;
2034 
2035 		/* ii */
2036 		for (node_entry = treebuilder->context.formatting_list_end;
2037 				node_entry != NULL;
2038 				node_entry = node_entry->prev) {
2039 			if (node_entry->stack_index == node)
2040 				break;
2041 		}
2042 
2043 		/* Node is not in list of active formatting elements */
2044 		if (node_entry == NULL) {
2045 			err = aa_remove_element_stack_item(treebuilder,
2046 				node, treebuilder->context.current_node);
2047 			assert(err == HUBBUB_OK);
2048 
2049 			/* Update furthest block index and the last node index,
2050 			 * as these are always below node in the stack */
2051 			fb--;
2052 			last--;
2053 
2054 			/* Fixup the current_node index */
2055 			treebuilder->context.current_node--;
2056 
2057 			/* Back to i */
2058 			continue;
2059 		}
2060 
2061 		/* iii */
2062 		if (node == formatting_element)
2063 			break;
2064 
2065 		/* iv */
2066 		if (last == fb) {
2067 			bookmark->prev = node_entry;
2068 			bookmark->next = node_entry->next;
2069 		}
2070 
2071 		/* v */
2072 		err = aa_clone_and_replace_entries(treebuilder, node_entry);
2073 		if (err != HUBBUB_OK)
2074 			return err;
2075 
2076 		/* vi */
2077 		err = aa_reparent_node(treebuilder, stack[last].node,
2078 				stack[node].node, &reparented);
2079 		if (err != HUBBUB_OK)
2080 			return err;
2081 
2082 		treebuilder->tree_handler->unref_node(
2083 				treebuilder->tree_handler->ctx,
2084 				stack[last].node);
2085 
2086 		/* If the reparented node is not the same as the one we were
2087 		 * previously using, then have it take the place of the other
2088 		 * one in the formatting list and stack. */
2089 		if (reparented != stack[last].node) {
2090 			for (node_entry =
2091 				treebuilder->context.formatting_list_end;
2092 					node_entry != NULL;
2093 					node_entry = node_entry->prev) {
2094 				if (node_entry->stack_index == last) {
2095 					treebuilder->tree_handler->ref_node(
2096 						treebuilder->tree_handler->ctx,
2097 						reparented);
2098 					node_entry->details.node = reparented;
2099 					treebuilder->tree_handler->unref_node(
2100 						treebuilder->tree_handler->ctx,
2101 						stack[last].node);
2102 					break;
2103 				}
2104 			}
2105 			/* Already have enough references, so don't need to
2106 			 * explicitly reference it here. */
2107 			stack[last].node = reparented;
2108 		}
2109 
2110 		/* vii */
2111 		last = node;
2112 
2113 		/* viii */
2114 	}
2115 
2116 	*furthest_block = fb;
2117 	*last_node = last;
2118 
2119 	return HUBBUB_OK;
2120 }
2121 
2122 /**
2123  * Adoption agency: remove an entry from the stack at the given index
2124  *
2125  * \param treebuilder  The treebuilder instance
2126  * \param index        The index of the item to remove
2127  * \param limit        The index of the last item to move
2128  *
2129  * Preconditions: index < limit, limit <= current_node
2130  * Postcondition: stack[limit] is empty
2131  */
aa_remove_element_stack_item(hubbub_treebuilder * treebuilder,uint32_t index,uint32_t limit)2132 hubbub_error aa_remove_element_stack_item(hubbub_treebuilder *treebuilder,
2133 		uint32_t index, uint32_t limit)
2134 {
2135 	element_context *stack = treebuilder->context.element_stack;
2136 	uint32_t n;
2137 
2138 	assert(index < limit);
2139 	assert(limit <= treebuilder->context.current_node);
2140 
2141 	/* First, scan over subsequent entries in the stack,
2142 	 * searching for them in the list of active formatting
2143 	 * entries. If found, update the corresponding
2144 	 * formatting list entry's stack index to match the
2145 	 * new stack location */
2146 	for (n = index + 1; n <= limit; n++) {
2147 		if (is_formatting_element(stack[n].type) ||
2148 				(is_scoping_element(stack[n].type) &&
2149 				stack[n].type != HTML &&
2150 				stack[n].type != TABLE)) {
2151 			formatting_list_entry *e;
2152 
2153 			for (e = treebuilder->context.formatting_list_end;
2154 					e != NULL; e = e->prev) {
2155 				if (e->stack_index == n)
2156 					e->stack_index--;
2157 			}
2158 		}
2159 	}
2160 
2161 	/* Reduce node's reference count */
2162 	treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
2163 					stack[index].node);
2164 
2165 	/* Now, shuffle the stack up one, removing node in the process */
2166 	memmove(&stack[index], &stack[index + 1],
2167 			(limit - index) * sizeof(element_context));
2168 
2169 	return HUBBUB_OK;
2170 }
2171 
2172 /**
2173  * Adoption agency: shallow clone a node and replace its formatting list
2174  * and element stack entries
2175  *
2176  * \param treebuilder  The treebuilder instance
2177  * \param element      The item in the formatting list containing the node
2178  */
aa_clone_and_replace_entries(hubbub_treebuilder * treebuilder,formatting_list_entry * element)2179 hubbub_error aa_clone_and_replace_entries(hubbub_treebuilder *treebuilder,
2180 		formatting_list_entry *element)
2181 {
2182 	hubbub_error err;
2183 	hubbub_ns ons;
2184 	element_type otype;
2185 	uint32_t oindex;
2186 	void *clone, *onode;
2187 
2188 	/* Shallow clone of node */
2189 	err = treebuilder->tree_handler->clone_node(
2190 			treebuilder->tree_handler->ctx,
2191 			element->details.node, false, &clone);
2192 	if (err != HUBBUB_OK)
2193 		return err;
2194 
2195 	/* Replace formatting list entry for node with clone */
2196 	err = formatting_list_replace(treebuilder, element,
2197 			element->details.ns, element->details.type,
2198 			clone, element->stack_index,
2199 			&ons, &otype, &onode, &oindex);
2200 	assert(err == HUBBUB_OK);
2201 
2202 	treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
2203 			onode);
2204 
2205 	treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx,
2206 			clone);
2207 
2208 	/* Replace node's stack entry with clone */
2209 	treebuilder->context.element_stack[element->stack_index].node = clone;
2210 
2211 	treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
2212 			onode);
2213 
2214 	return HUBBUB_OK;
2215 }
2216 
2217 /**
2218  * Adoption agency: locate foster parent and insert node into it
2219  *
2220  * \param treebuilder  The treebuilder instance
2221  * \param node         The node to insert
2222  * \param inserted     Pointer to location to receive inserted node
2223  * \return HUBBUB_OK on success, appropriate error otherwise
2224  */
aa_insert_into_foster_parent(hubbub_treebuilder * treebuilder,void * node,void ** inserted)2225 hubbub_error aa_insert_into_foster_parent(hubbub_treebuilder *treebuilder,
2226 		void *node, void **inserted)
2227 {
2228 	hubbub_error err;
2229 	element_context *stack = treebuilder->context.element_stack;
2230 	void *foster_parent = NULL;
2231 	bool insert = false;
2232 
2233 	uint32_t cur_table = current_table(treebuilder);
2234 
2235 	stack[cur_table].tainted = true;
2236 
2237 	if (cur_table == 0) {
2238 		treebuilder->tree_handler->ref_node(
2239 				treebuilder->tree_handler->ctx,
2240 				stack[0].node);
2241 
2242 		foster_parent = stack[0].node;
2243 	} else {
2244 		void *t_parent = NULL;
2245 
2246 		treebuilder->tree_handler->get_parent(
2247 			treebuilder->tree_handler->ctx,
2248 			stack[cur_table].node,
2249 			true, &t_parent);
2250 
2251 		if (t_parent != NULL) {
2252 			foster_parent = t_parent;
2253 			insert = true;
2254 		} else {
2255 			treebuilder->tree_handler->ref_node(
2256 					treebuilder->tree_handler->ctx,
2257 					stack[cur_table - 1].node);
2258 			foster_parent = stack[cur_table - 1].node;
2259 		}
2260 	}
2261 
2262 	err = remove_node_from_dom(treebuilder, node);
2263 	if (err != HUBBUB_OK) {
2264 		treebuilder->tree_handler->unref_node(
2265 				treebuilder->tree_handler->ctx,
2266 				foster_parent);
2267 		return err;
2268 	}
2269 
2270 	if (insert) {
2271 		err = treebuilder->tree_handler->insert_before(
2272 				treebuilder->tree_handler->ctx,
2273 				foster_parent, node,
2274 				stack[cur_table].node,
2275 				inserted);
2276 	} else {
2277 		err = treebuilder->tree_handler->append_child(
2278 				treebuilder->tree_handler->ctx,
2279 				foster_parent, node,
2280 				inserted);
2281 	}
2282 	if (err != HUBBUB_OK) {
2283 		treebuilder->tree_handler->unref_node(
2284 				treebuilder->tree_handler->ctx,
2285 				foster_parent);
2286 		return err;
2287 	}
2288 
2289 	treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
2290 			foster_parent);
2291 
2292 	return HUBBUB_OK;
2293 }
2294 
2295 
2296 /**
2297  * Process an applet, button, marquee, or object end tag as if in "in body"
2298  *
2299  * \param treebuilder  The treebuilder instance
2300  * \param type         The element type
2301  */
process_0applet_button_marquee_object_in_body(hubbub_treebuilder * treebuilder,element_type type)2302 hubbub_error process_0applet_button_marquee_object_in_body(
2303 		hubbub_treebuilder *treebuilder, element_type type)
2304 {
2305 	if (!element_in_scope(treebuilder, type, false)) {
2306 		/** \todo parse error */
2307 	} else {
2308 		uint32_t popped = 0;
2309 		element_type otype;
2310 
2311 		close_implied_end_tags(treebuilder, UNKNOWN);
2312 
2313 		do {
2314 			hubbub_ns ns;
2315 			void *node;
2316 
2317 			element_stack_pop(treebuilder, &ns, &otype, &node);
2318 
2319 			treebuilder->tree_handler->unref_node(
2320 					treebuilder->tree_handler->ctx,
2321 					node);
2322 
2323 			popped++;
2324 		} while (otype != type);
2325 
2326 		if (popped > 1) {
2327 			/** \todo parse error */
2328 		}
2329 
2330 		clear_active_formatting_list_to_marker(treebuilder);
2331 	}
2332 
2333 	return HUBBUB_OK;
2334 }
2335 
2336 /**
2337  * Process a br end tag as if in "in body"
2338  *
2339  * \param treebuilder  The treebuilder instance
2340  */
process_0br_in_body(hubbub_treebuilder * treebuilder)2341 hubbub_error process_0br_in_body(hubbub_treebuilder *treebuilder)
2342 {
2343 	hubbub_error err;
2344 	hubbub_tag tag;
2345 
2346 	/** \todo parse error */
2347 
2348 	/* Act as if <br> has been seen. */
2349 
2350 	tag.ns = HUBBUB_NS_HTML;
2351 	tag.name.ptr = (const uint8_t *) "br";
2352 	tag.name.len = SLEN("br");
2353 
2354 	tag.n_attributes = 0;
2355 	tag.attributes = NULL;
2356 
2357 	err = reconstruct_active_formatting_list(treebuilder);
2358 	if (err != HUBBUB_OK)
2359 		return err;
2360 
2361 	return insert_element(treebuilder, &tag, false);
2362 }
2363 
2364 /**
2365  * Process a generic end tag as if in "in body"
2366  *
2367  * \param treebuilder  The treebuilder instance
2368  * \param type         The element type
2369  */
process_0generic_in_body(hubbub_treebuilder * treebuilder,element_type type)2370 hubbub_error process_0generic_in_body(hubbub_treebuilder *treebuilder,
2371 		element_type type)
2372 {
2373 	element_context *stack = treebuilder->context.element_stack;
2374 	uint32_t node = treebuilder->context.current_node;
2375 
2376 	do {
2377 		if (stack[node].type == type) {
2378 			uint32_t popped = 0;
2379 			element_type otype;
2380 
2381 			close_implied_end_tags(treebuilder, UNKNOWN);
2382 
2383 			while (treebuilder->context.current_node >= node) {
2384 				hubbub_ns ns;
2385 				void *node;
2386 
2387 				element_stack_pop(treebuilder,
2388 						&ns, &otype, &node);
2389 
2390 				treebuilder->tree_handler->unref_node(
2391 						treebuilder->tree_handler->ctx,
2392 						node);
2393 
2394 				popped++;
2395 
2396 				if (otype == type)
2397 					break;
2398 			}
2399 
2400 			if (popped > 1) {
2401 				/** \todo parse error */
2402 			}
2403 
2404 			break;
2405 		} else if (!is_formatting_element(stack[node].type) &&
2406 				!is_phrasing_element(stack[node].type)) {
2407 			/** \todo parse error */
2408 			break;
2409 		}
2410 	} while (--node > 0);
2411 
2412 	return HUBBUB_OK;
2413 }
2414 
2415