1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 #![allow(warnings)]
11
12 //! The HTML5 tree builder.
13
14 pub use interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink};
15 pub use interface::{AppendNode, AppendText, Attribute, NodeOrText};
16 pub use interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
17
18 use self::types::*;
19
20 use tendril::StrTendril;
21 use {ExpandedName, LocalName, Namespace, QualName};
22
23 use tokenizer;
24 use tokenizer::states as tok_state;
25 use tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult};
26
27 use util::str::is_ascii_whitespace;
28
29 use std::borrow::Cow::Borrowed;
30 use std::collections::VecDeque;
31 use std::default::Default;
32 use std::iter::{Enumerate, Rev};
33 use std::mem::replace;
34 use std::{fmt, slice};
35
36 use log::Level;
37 use tokenizer::states::{RawData, RawKind};
38 use tree_builder::tag_sets::*;
39 use tree_builder::types::*;
40 use util::str::to_escaped_string;
41
42 pub use self::PushFlag::*;
43
44 #[macro_use]
45 mod tag_sets;
46
47 mod data;
48 mod types;
49
50 include!(concat!(env!("OUT_DIR"), "/rules.rs"));
51
52 /// Tree builder options, with an impl for Default.
53 #[derive(Copy, Clone)]
54 pub struct TreeBuilderOpts {
55 /// Report all parse errors described in the spec, at some
56 /// performance penalty? Default: false
57 pub exact_errors: bool,
58
59 /// Is scripting enabled?
60 pub scripting_enabled: bool,
61
62 /// Is this an `iframe srcdoc` document?
63 pub iframe_srcdoc: bool,
64
65 /// Should we drop the DOCTYPE (if any) from the tree?
66 pub drop_doctype: bool,
67
68 /// Obsolete, ignored.
69 pub ignore_missing_rules: bool,
70
71 /// Initial TreeBuilder quirks mode. Default: NoQuirks
72 pub quirks_mode: QuirksMode,
73 }
74
75 impl Default for TreeBuilderOpts {
default() -> TreeBuilderOpts76 fn default() -> TreeBuilderOpts {
77 TreeBuilderOpts {
78 exact_errors: false,
79 scripting_enabled: true,
80 iframe_srcdoc: false,
81 drop_doctype: false,
82 ignore_missing_rules: false,
83 quirks_mode: NoQuirks,
84 }
85 }
86 }
87
88 /// The HTML tree builder.
89 pub struct TreeBuilder<Handle, Sink> {
90 /// Options controlling the behavior of the tree builder.
91 opts: TreeBuilderOpts,
92
93 /// Consumer of tree modifications.
94 pub sink: Sink,
95
96 /// Insertion mode.
97 mode: InsertionMode,
98
99 /// Original insertion mode, used by Text and InTableText modes.
100 orig_mode: Option<InsertionMode>,
101
102 /// Stack of template insertion modes.
103 template_modes: Vec<InsertionMode>,
104
105 /// Pending table character tokens.
106 pending_table_text: Vec<(SplitStatus, StrTendril)>,
107
108 /// Quirks mode as set by the parser.
109 /// FIXME: can scripts etc. change this?
110 quirks_mode: QuirksMode,
111
112 /// The document node, which is created by the sink.
113 doc_handle: Handle,
114
115 /// Stack of open elements, most recently added at end.
116 open_elems: Vec<Handle>,
117
118 /// List of active formatting elements.
119 active_formatting: Vec<FormatEntry<Handle>>,
120
121 //§ the-element-pointers
122 /// Head element pointer.
123 head_elem: Option<Handle>,
124
125 /// Form element pointer.
126 form_elem: Option<Handle>,
127 //§ END
128 /// Frameset-ok flag.
129 frameset_ok: bool,
130
131 /// Ignore a following U+000A LINE FEED?
132 ignore_lf: bool,
133
134 /// Is foster parenting enabled?
135 foster_parenting: bool,
136
137 /// The context element for the fragment parsing algorithm.
138 context_elem: Option<Handle>,
139
140 /// Track current line
141 current_line: u64,
142 // WARNING: If you add new fields that contain Handles, you
143 // must add them to trace_handles() below to preserve memory
144 // safety!
145 //
146 // FIXME: Auto-generate the trace hooks like Servo does.
147 }
148
149 impl<Handle, Sink> TreeBuilder<Handle, Sink>
150 where
151 Handle: Clone,
152 Sink: TreeSink<Handle = Handle>,
153 {
154 /// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
155 ///
156 /// The tree builder is also a `TokenSink`.
new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink>157 pub fn new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
158 let doc_handle = sink.get_document();
159 TreeBuilder {
160 opts: opts,
161 sink: sink,
162 mode: Initial,
163 orig_mode: None,
164 template_modes: vec![],
165 pending_table_text: vec![],
166 quirks_mode: opts.quirks_mode,
167 doc_handle: doc_handle,
168 open_elems: vec![],
169 active_formatting: vec![],
170 head_elem: None,
171 form_elem: None,
172 frameset_ok: true,
173 ignore_lf: false,
174 foster_parenting: false,
175 context_elem: None,
176 current_line: 1,
177 }
178 }
179
180 /// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
181 /// This is for parsing fragments.
182 ///
183 /// The tree builder is also a `TokenSink`.
new_for_fragment( mut sink: Sink, context_elem: Handle, form_elem: Option<Handle>, opts: TreeBuilderOpts, ) -> TreeBuilder<Handle, Sink>184 pub fn new_for_fragment(
185 mut sink: Sink,
186 context_elem: Handle,
187 form_elem: Option<Handle>,
188 opts: TreeBuilderOpts,
189 ) -> TreeBuilder<Handle, Sink> {
190 let doc_handle = sink.get_document();
191 let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template");
192 let mut tb = TreeBuilder {
193 opts: opts,
194 sink: sink,
195 mode: Initial,
196 orig_mode: None,
197 template_modes: if context_is_template {
198 vec![InTemplate]
199 } else {
200 vec![]
201 },
202 pending_table_text: vec![],
203 quirks_mode: opts.quirks_mode,
204 doc_handle: doc_handle,
205 open_elems: vec![],
206 active_formatting: vec![],
207 head_elem: None,
208 form_elem: form_elem,
209 frameset_ok: true,
210 ignore_lf: false,
211 foster_parenting: false,
212 context_elem: Some(context_elem),
213 current_line: 1,
214 };
215
216 // https://html.spec.whatwg.org/multipage/#parsing-html-fragments
217 // 5. Let root be a new html element with no attributes.
218 // 6. Append the element root to the Document node created above.
219 // 7. Set up the parser's stack of open elements so that it contains just the single element root.
220 tb.create_root(vec![]);
221 // 10. Reset the parser's insertion mode appropriately.
222 tb.mode = tb.reset_insertion_mode();
223
224 tb
225 }
226
227 // https://html.spec.whatwg.org/multipage/#concept-frag-parse-context
228 // Step 4. Set the state of the HTML parser's tokenization stage as follows:
tokenizer_state_for_context_elem(&self) -> tok_state::State229 pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State {
230 let elem = self.context_elem.as_ref().expect("no context element");
231 let name = match self.sink.elem_name(elem) {
232 ExpandedName {
233 ns: &ns!(html),
234 local,
235 } => local,
236 _ => return tok_state::Data,
237 };
238 match *name {
239 local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata),
240
241 local_name!("style") |
242 local_name!("xmp") |
243 local_name!("iframe") |
244 local_name!("noembed") |
245 local_name!("noframes") => tok_state::RawData(tok_state::Rawtext),
246
247 local_name!("script") => tok_state::RawData(tok_state::ScriptData),
248
249 local_name!("noscript") => {
250 if self.opts.scripting_enabled {
251 tok_state::RawData(tok_state::Rawtext)
252 } else {
253 tok_state::Data
254 }
255 },
256
257 local_name!("plaintext") => tok_state::Plaintext,
258
259 _ => tok_state::Data,
260 }
261 }
262
263 /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's
264 /// internal state. This is intended to support garbage-collected DOMs.
trace_handles(&self, tracer: &Tracer<Handle = Handle>)265 pub fn trace_handles(&self, tracer: &Tracer<Handle = Handle>) {
266 tracer.trace_handle(&self.doc_handle);
267 for e in &self.open_elems {
268 tracer.trace_handle(e);
269 }
270 for e in &self.active_formatting {
271 match e {
272 &Element(ref h, _) => tracer.trace_handle(h),
273 _ => (),
274 }
275 }
276 self.head_elem.as_ref().map(|h| tracer.trace_handle(h));
277 self.form_elem.as_ref().map(|h| tracer.trace_handle(h));
278 self.context_elem.as_ref().map(|h| tracer.trace_handle(h));
279 }
280
281 #[allow(dead_code)]
dump_state(&self, label: String)282 fn dump_state(&self, label: String) {
283 println!("dump_state on {}", label);
284 print!(" open_elems:");
285 for node in self.open_elems.iter() {
286 let name = self.sink.elem_name(node);
287 match *name.ns {
288 ns!(html) => print!(" {}", name.local),
289 _ => panic!(),
290 }
291 }
292 println!("");
293 print!(" active_formatting:");
294 for entry in self.active_formatting.iter() {
295 match entry {
296 &Marker => print!(" Marker"),
297 &Element(ref h, _) => {
298 let name = self.sink.elem_name(h);
299 match *name.ns {
300 ns!(html) => print!(" {}", name.local),
301 _ => panic!(),
302 }
303 },
304 }
305 }
306 println!("");
307 }
308
debug_step(&self, mode: InsertionMode, token: &Token)309 fn debug_step(&self, mode: InsertionMode, token: &Token) {
310 if log_enabled!(Level::Debug) {
311 debug!(
312 "processing {} in insertion mode {:?}",
313 to_escaped_string(token),
314 mode
315 );
316 }
317 }
318
process_to_completion(&mut self, mut token: Token) -> TokenSinkResult<Handle>319 fn process_to_completion(&mut self, mut token: Token) -> TokenSinkResult<Handle> {
320 // Queue of additional tokens yet to be processed.
321 // This stays empty in the common case where we don't split whitespace.
322 let mut more_tokens = VecDeque::new();
323
324 loop {
325 let should_have_acknowledged_self_closing_flag = matches!(
326 token,
327 TagToken(Tag {
328 self_closing: true,
329 kind: StartTag,
330 ..
331 })
332 );
333 let result = if self.is_foreign(&token) {
334 self.step_foreign(token)
335 } else {
336 let mode = self.mode;
337 self.step(mode, token)
338 };
339 match result {
340 Done => {
341 if should_have_acknowledged_self_closing_flag {
342 self.sink
343 .parse_error(Borrowed("Unacknowledged self-closing tag"));
344 }
345 token = unwrap_or_return!(
346 more_tokens.pop_front(),
347 tokenizer::TokenSinkResult::Continue
348 );
349 },
350 DoneAckSelfClosing => {
351 token = unwrap_or_return!(
352 more_tokens.pop_front(),
353 tokenizer::TokenSinkResult::Continue
354 );
355 },
356 Reprocess(m, t) => {
357 self.mode = m;
358 token = t;
359 },
360 ReprocessForeign(t) => {
361 token = t;
362 },
363 SplitWhitespace(mut buf) => {
364 let p = buf.pop_front_char_run(is_ascii_whitespace);
365 let (first, is_ws) = unwrap_or_return!(p, tokenizer::TokenSinkResult::Continue);
366 let status = if is_ws { Whitespace } else { NotWhitespace };
367 token = CharacterTokens(status, first);
368
369 if buf.len32() > 0 {
370 more_tokens.push_back(CharacterTokens(NotSplit, buf));
371 }
372 },
373 Script(node) => {
374 assert!(more_tokens.is_empty());
375 return tokenizer::TokenSinkResult::Script(node);
376 },
377 ToPlaintext => {
378 assert!(more_tokens.is_empty());
379 return tokenizer::TokenSinkResult::Plaintext;
380 },
381 ToRawData(k) => {
382 assert!(more_tokens.is_empty());
383 return tokenizer::TokenSinkResult::RawData(k);
384 },
385 }
386 }
387 }
388
389 /// Are we parsing a HTML fragment?
is_fragment(&self) -> bool390 pub fn is_fragment(&self) -> bool {
391 self.context_elem.is_some()
392 }
393
394 /// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node
appropriate_place_for_insertion( &mut self, override_target: Option<Handle>, ) -> InsertionPoint<Handle>395 fn appropriate_place_for_insertion(
396 &mut self,
397 override_target: Option<Handle>,
398 ) -> InsertionPoint<Handle> {
399 use self::tag_sets::*;
400
401 declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr");
402 let target = override_target.unwrap_or_else(|| self.current_node().clone());
403 if !(self.foster_parenting && self.elem_in(&target, foster_target)) {
404 if self.html_elem_named(&target, local_name!("template")) {
405 // No foster parenting (inside template).
406 let contents = self.sink.get_template_contents(&target);
407 return LastChild(contents);
408 } else {
409 // No foster parenting (the common case).
410 return LastChild(target);
411 }
412 }
413
414 // Foster parenting
415 let mut iter = self.open_elems.iter().rev().peekable();
416 while let Some(elem) = iter.next() {
417 if self.html_elem_named(&elem, local_name!("template")) {
418 let contents = self.sink.get_template_contents(&elem);
419 return LastChild(contents);
420 } else if self.html_elem_named(&elem, local_name!("table")) {
421 return TableFosterParenting {
422 element: elem.clone(),
423 prev_element: (*iter.peek().unwrap()).clone(),
424 };
425 }
426 }
427 let html_elem = self.html_elem();
428 LastChild(html_elem.clone())
429 }
430
insert_at(&mut self, insertion_point: InsertionPoint<Handle>, child: NodeOrText<Handle>)431 fn insert_at(&mut self, insertion_point: InsertionPoint<Handle>, child: NodeOrText<Handle>) {
432 match insertion_point {
433 LastChild(parent) => self.sink.append(&parent, child),
434 BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child),
435 TableFosterParenting {
436 element,
437 prev_element,
438 } => self
439 .sink
440 .append_based_on_parent_node(&element, &prev_element, child),
441 }
442 }
443 }
444
445 impl<Handle, Sink> TokenSink for TreeBuilder<Handle, Sink>
446 where
447 Handle: Clone,
448 Sink: TreeSink<Handle = Handle>,
449 {
450 type Handle = Handle;
451
process_token( &mut self, token: tokenizer::Token, line_number: u64, ) -> TokenSinkResult<Handle>452 fn process_token(
453 &mut self,
454 token: tokenizer::Token,
455 line_number: u64,
456 ) -> TokenSinkResult<Handle> {
457 if line_number != self.current_line {
458 self.sink.set_current_line(line_number);
459 }
460 let ignore_lf = replace(&mut self.ignore_lf, false);
461
462 // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type.
463 let token = match token {
464 tokenizer::ParseError(e) => {
465 self.sink.parse_error(e);
466 return tokenizer::TokenSinkResult::Continue;
467 },
468
469 tokenizer::DoctypeToken(dt) => {
470 if self.mode == Initial {
471 let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
472 if err {
473 self.sink.parse_error(format_if!(
474 self.opts.exact_errors,
475 "Bad DOCTYPE",
476 "Bad DOCTYPE: {:?}",
477 dt
478 ));
479 }
480 let Doctype {
481 name,
482 public_id,
483 system_id,
484 force_quirks: _,
485 } = dt;
486 if !self.opts.drop_doctype {
487 self.sink.append_doctype_to_document(
488 name.unwrap_or(StrTendril::new()),
489 public_id.unwrap_or(StrTendril::new()),
490 system_id.unwrap_or(StrTendril::new()),
491 );
492 }
493 self.set_quirks_mode(quirk);
494
495 self.mode = BeforeHtml;
496 return tokenizer::TokenSinkResult::Continue;
497 } else {
498 self.sink.parse_error(format_if!(
499 self.opts.exact_errors,
500 "DOCTYPE in body",
501 "DOCTYPE in insertion mode {:?}",
502 self.mode
503 ));
504 return tokenizer::TokenSinkResult::Continue;
505 }
506 },
507
508 tokenizer::TagToken(x) => TagToken(x),
509 tokenizer::CommentToken(x) => CommentToken(x),
510 tokenizer::NullCharacterToken => NullCharacterToken,
511 tokenizer::EOFToken => EOFToken,
512
513 tokenizer::CharacterTokens(mut x) => {
514 if ignore_lf && x.starts_with("\n") {
515 x.pop_front(1);
516 }
517 if x.is_empty() {
518 return tokenizer::TokenSinkResult::Continue;
519 }
520 CharacterTokens(NotSplit, x)
521 },
522 };
523
524 self.process_to_completion(token)
525 }
526
end(&mut self)527 fn end(&mut self) {
528 for elem in self.open_elems.drain(..).rev() {
529 self.sink.pop(&elem);
530 }
531 }
532
adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool533 fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
534 !self.open_elems.is_empty() &&
535 self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html)
536 }
537 }
538
html_elem<Handle>(open_elems: &[Handle]) -> &Handle539 pub fn html_elem<Handle>(open_elems: &[Handle]) -> &Handle {
540 &open_elems[0]
541 }
542
543 pub struct ActiveFormattingIter<'a, Handle: 'a> {
544 iter: Rev<Enumerate<slice::Iter<'a, FormatEntry<Handle>>>>,
545 }
546
547 impl<'a, Handle> Iterator for ActiveFormattingIter<'a, Handle> {
548 type Item = (usize, &'a Handle, &'a Tag);
next(&mut self) -> Option<(usize, &'a Handle, &'a Tag)>549 fn next(&mut self) -> Option<(usize, &'a Handle, &'a Tag)> {
550 match self.iter.next() {
551 None | Some((_, &Marker)) => None,
552 Some((i, &Element(ref h, ref t))) => Some((i, h, t)),
553 }
554 }
555 }
556
557 pub enum PushFlag {
558 Push,
559 NoPush,
560 }
561
562 enum Bookmark<Handle> {
563 Replace(Handle),
564 InsertAfter(Handle),
565 }
566
567 macro_rules! qualname {
568 ("", $local:tt) => {
569 QualName {
570 prefix: None,
571 ns: ns!(),
572 local: local_name!($local),
573 }
574 };
575 ($prefix: tt $ns:tt $local:tt) => {
576 QualName {
577 prefix: Some(namespace_prefix!($prefix)),
578 ns: ns!($ns),
579 local: local_name!($local),
580 }
581 };
582 }
583
584 #[doc(hidden)]
585 impl<Handle, Sink> TreeBuilder<Handle, Sink>
586 where
587 Handle: Clone,
588 Sink: TreeSink<Handle = Handle>,
589 {
unexpected<T: fmt::Debug>(&mut self, _thing: &T) -> ProcessResult<Handle>590 fn unexpected<T: fmt::Debug>(&mut self, _thing: &T) -> ProcessResult<Handle> {
591 self.sink.parse_error(format_if!(
592 self.opts.exact_errors,
593 "Unexpected token",
594 "Unexpected token {} in insertion mode {:?}",
595 to_escaped_string(_thing),
596 self.mode
597 ));
598 Done
599 }
600
assert_named(&mut self, node: &Handle, name: LocalName)601 fn assert_named(&mut self, node: &Handle, name: LocalName) {
602 assert!(self.html_elem_named(&node, name));
603 }
604
605 /// Iterate over the active formatting elements (with index in the list) from the end
606 /// to the last marker, or the beginning if there are no markers.
active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingIter<'a, Handle>607 fn active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingIter<'a, Handle> {
608 ActiveFormattingIter {
609 iter: self.active_formatting.iter().enumerate().rev(),
610 }
611 }
612
position_in_active_formatting(&self, element: &Handle) -> Option<usize>613 fn position_in_active_formatting(&self, element: &Handle) -> Option<usize> {
614 self.active_formatting.iter().position(|n| match n {
615 &Marker => false,
616 &Element(ref handle, _) => self.sink.same_node(handle, element),
617 })
618 }
619
set_quirks_mode(&mut self, mode: QuirksMode)620 fn set_quirks_mode(&mut self, mode: QuirksMode) {
621 self.quirks_mode = mode;
622 self.sink.set_quirks_mode(mode);
623 }
624
stop_parsing(&mut self) -> ProcessResult<Handle>625 fn stop_parsing(&mut self) -> ProcessResult<Handle> {
626 Done
627 }
628
629 //§ parsing-elements-that-contain-only-text
630 // Switch to `Text` insertion mode, save the old mode, and
631 // switch the tokenizer to a raw-data state.
632 // The latter only takes effect after the current / next
633 // `process_token` of a start tag returns!
to_raw_text_mode(&mut self, k: RawKind) -> ProcessResult<Handle>634 fn to_raw_text_mode(&mut self, k: RawKind) -> ProcessResult<Handle> {
635 self.orig_mode = Some(self.mode);
636 self.mode = Text;
637 ToRawData(k)
638 }
639
640 // The generic raw text / RCDATA parsing algorithm.
parse_raw_data(&mut self, tag: Tag, k: RawKind) -> ProcessResult<Handle>641 fn parse_raw_data(&mut self, tag: Tag, k: RawKind) -> ProcessResult<Handle> {
642 self.insert_element_for(tag);
643 self.to_raw_text_mode(k)
644 }
645 //§ END
646
current_node(&self) -> &Handle647 fn current_node(&self) -> &Handle {
648 self.open_elems.last().expect("no current element")
649 }
650
adjusted_current_node(&self) -> &Handle651 fn adjusted_current_node(&self) -> &Handle {
652 if self.open_elems.len() == 1 {
653 if let Some(ctx) = self.context_elem.as_ref() {
654 return ctx;
655 }
656 }
657 self.current_node()
658 }
659
current_node_in<TagSet>(&self, set: TagSet) -> bool where TagSet: Fn(ExpandedName) -> bool,660 fn current_node_in<TagSet>(&self, set: TagSet) -> bool
661 where
662 TagSet: Fn(ExpandedName) -> bool,
663 {
664 set(self.sink.elem_name(self.current_node()))
665 }
666
667 // Insert at the "appropriate place for inserting a node".
insert_appropriately(&mut self, child: NodeOrText<Handle>, override_target: Option<Handle>)668 fn insert_appropriately(&mut self, child: NodeOrText<Handle>, override_target: Option<Handle>) {
669 let insertion_point = self.appropriate_place_for_insertion(override_target);
670 self.insert_at(insertion_point, child);
671 }
672
adoption_agency(&mut self, subject: LocalName)673 fn adoption_agency(&mut self, subject: LocalName) {
674 // 1.
675 if self.current_node_named(subject.clone()) {
676 if self
677 .position_in_active_formatting(self.current_node())
678 .is_none()
679 {
680 self.pop();
681 return;
682 }
683 }
684
685 // 2. 3. 4.
686 for _ in 0..8 {
687 // 5.
688 let (fmt_elem_index, fmt_elem, fmt_elem_tag) = unwrap_or_return!(
689 // We clone the Handle and Tag so they don't cause an immutable borrow of self.
690 self.active_formatting_end_to_marker()
691 .filter(|&(_, _, tag)| tag.name == subject)
692 .next()
693 .map(|(i, h, t)| (i, h.clone(), t.clone())),
694 {
695 self.process_end_tag_in_body(Tag {
696 kind: EndTag,
697 name: subject,
698 self_closing: false,
699 attrs: vec![],
700 });
701 }
702 );
703
704 let fmt_elem_stack_index = unwrap_or_return!(
705 self.open_elems
706 .iter()
707 .rposition(|n| self.sink.same_node(n, &fmt_elem)),
708 {
709 self.sink
710 .parse_error(Borrowed("Formatting element not open"));
711 self.active_formatting.remove(fmt_elem_index);
712 }
713 );
714
715 // 7.
716 if !self.in_scope(default_scope, |n| self.sink.same_node(&n, &fmt_elem)) {
717 self.sink
718 .parse_error(Borrowed("Formatting element not in scope"));
719 return;
720 }
721
722 // 8.
723 if !self.sink.same_node(self.current_node(), &fmt_elem) {
724 self.sink
725 .parse_error(Borrowed("Formatting element not current node"));
726 }
727
728 // 9.
729 let (furthest_block_index, furthest_block) = unwrap_or_return!(
730 self.open_elems
731 .iter()
732 .enumerate()
733 .skip(fmt_elem_stack_index)
734 .filter(|&(_, open_element)| self.elem_in(open_element, special_tag))
735 .next()
736 .map(|(i, h)| (i, h.clone())),
737 // 10.
738 {
739 self.open_elems.truncate(fmt_elem_stack_index);
740 self.active_formatting.remove(fmt_elem_index);
741 }
742 );
743
744 // 11.
745 let common_ancestor = self.open_elems[fmt_elem_stack_index - 1].clone();
746
747 // 12.
748 let mut bookmark = Bookmark::Replace(fmt_elem.clone());
749
750 // 13.
751 let mut node;
752 let mut node_index = furthest_block_index;
753 let mut last_node = furthest_block.clone();
754
755 // 13.1.
756 let mut inner_counter = 0;
757 loop {
758 // 13.2.
759 inner_counter += 1;
760
761 // 13.3.
762 node_index -= 1;
763 node = self.open_elems[node_index].clone();
764
765 // 13.4.
766 if self.sink.same_node(&node, &fmt_elem) {
767 break;
768 }
769
770 // 13.5.
771 if inner_counter > 3 {
772 self.position_in_active_formatting(&node)
773 .map(|position| self.active_formatting.remove(position));
774 self.open_elems.remove(node_index);
775 continue;
776 }
777
778 let node_formatting_index = unwrap_or_else!(
779 self.position_in_active_formatting(&node),
780 // 13.6.
781 {
782 self.open_elems.remove(node_index);
783 continue;
784 }
785 );
786
787 // 13.7.
788 let tag = match self.active_formatting[node_formatting_index] {
789 Element(ref h, ref t) => {
790 assert!(self.sink.same_node(h, &node));
791 t.clone()
792 },
793 Marker => panic!("Found marker during adoption agency"),
794 };
795 // FIXME: Is there a way to avoid cloning the attributes twice here (once on their
796 // own, once as part of t.clone() above)?
797 let new_element = create_element(
798 &mut self.sink,
799 QualName::new(None, ns!(html), tag.name.clone()),
800 tag.attrs.clone(),
801 );
802 self.open_elems[node_index] = new_element.clone();
803 self.active_formatting[node_formatting_index] = Element(new_element.clone(), tag);
804 node = new_element;
805
806 // 13.8.
807 if self.sink.same_node(&last_node, &furthest_block) {
808 bookmark = Bookmark::InsertAfter(node.clone());
809 }
810
811 // 13.9.
812 self.sink.remove_from_parent(&last_node);
813 self.sink.append(&node, AppendNode(last_node.clone()));
814
815 // 13.10.
816 last_node = node.clone();
817
818 // 13.11.
819 }
820
821 // 14.
822 self.sink.remove_from_parent(&last_node);
823 self.insert_appropriately(AppendNode(last_node.clone()), Some(common_ancestor));
824
825 // 15.
826 // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
827 // once as part of t.clone() above)?
828 let new_element = create_element(
829 &mut self.sink,
830 QualName::new(None, ns!(html), fmt_elem_tag.name.clone()),
831 fmt_elem_tag.attrs.clone(),
832 );
833 let new_entry = Element(new_element.clone(), fmt_elem_tag);
834
835 // 16.
836 self.sink.reparent_children(&furthest_block, &new_element);
837
838 // 17.
839 self.sink
840 .append(&furthest_block, AppendNode(new_element.clone()));
841
842 // 18.
843 // FIXME: We could probably get rid of the position_in_active_formatting() calls here
844 // if we had a more clever Bookmark representation.
845 match bookmark {
846 Bookmark::Replace(to_replace) => {
847 let index = self
848 .position_in_active_formatting(&to_replace)
849 .expect("bookmark not found in active formatting elements");
850 self.active_formatting[index] = new_entry;
851 },
852 Bookmark::InsertAfter(previous) => {
853 let index = self
854 .position_in_active_formatting(&previous)
855 .expect("bookmark not found in active formatting elements") +
856 1;
857 self.active_formatting.insert(index, new_entry);
858 let old_index = self
859 .position_in_active_formatting(&fmt_elem)
860 .expect("formatting element not found in active formatting elements");
861 self.active_formatting.remove(old_index);
862 },
863 }
864
865 // 19.
866 self.remove_from_stack(&fmt_elem);
867 let new_furthest_block_index = self
868 .open_elems
869 .iter()
870 .position(|n| self.sink.same_node(n, &furthest_block))
871 .expect("furthest block missing from open element stack");
872 self.open_elems
873 .insert(new_furthest_block_index + 1, new_element);
874
875 // 20.
876 }
877 }
878
push(&mut self, elem: &Handle)879 fn push(&mut self, elem: &Handle) {
880 self.open_elems.push(elem.clone());
881 }
882
pop(&mut self) -> Handle883 fn pop(&mut self) -> Handle {
884 let elem = self.open_elems.pop().expect("no current element");
885 self.sink.pop(&elem);
886 elem
887 }
888
remove_from_stack(&mut self, elem: &Handle)889 fn remove_from_stack(&mut self, elem: &Handle) {
890 let sink = &mut self.sink;
891 let position = self
892 .open_elems
893 .iter()
894 .rposition(|x| sink.same_node(elem, &x));
895 if let Some(position) = position {
896 self.open_elems.remove(position);
897 sink.pop(elem);
898 }
899 }
900
is_marker_or_open(&self, entry: &FormatEntry<Handle>) -> bool901 fn is_marker_or_open(&self, entry: &FormatEntry<Handle>) -> bool {
902 match *entry {
903 Marker => true,
904 Element(ref node, _) => self
905 .open_elems
906 .iter()
907 .rev()
908 .any(|n| self.sink.same_node(&n, &node)),
909 }
910 }
911
912 /// Reconstruct the active formatting elements.
reconstruct_formatting(&mut self)913 fn reconstruct_formatting(&mut self) {
914 {
915 let last = unwrap_or_return!(self.active_formatting.last(), ());
916 if self.is_marker_or_open(last) {
917 return;
918 }
919 }
920
921 let mut entry_index = self.active_formatting.len() - 1;
922 loop {
923 if entry_index == 0 {
924 break;
925 }
926 entry_index -= 1;
927 if self.is_marker_or_open(&self.active_formatting[entry_index]) {
928 entry_index += 1;
929 break;
930 }
931 }
932
933 loop {
934 let tag = match self.active_formatting[entry_index] {
935 Element(_, ref t) => t.clone(),
936 Marker => panic!("Found marker during formatting element reconstruction"),
937 };
938
939 // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
940 // once as part of t.clone() above)?
941 let new_element =
942 self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone());
943 self.active_formatting[entry_index] = Element(new_element, tag);
944 if entry_index == self.active_formatting.len() - 1 {
945 break;
946 }
947 entry_index += 1;
948 }
949 }
950
951 /// Get the first element on the stack, which will be the <html> element.
html_elem(&self) -> &Handle952 fn html_elem(&self) -> &Handle {
953 &self.open_elems[0]
954 }
955
956 /// Get the second element on the stack, if it's a HTML body element.
body_elem(&self) -> Option<&Handle>957 fn body_elem(&self) -> Option<&Handle> {
958 if self.open_elems.len() <= 1 {
959 return None;
960 }
961
962 let node = &self.open_elems[1];
963 if self.html_elem_named(node, local_name!("body")) {
964 Some(node)
965 } else {
966 None
967 }
968 }
969
970 /// Signal an error depending on the state of the stack of open elements at
971 /// the end of the body.
check_body_end(&mut self)972 fn check_body_end(&mut self) {
973 declare_tag_set!(body_end_ok =
974 "dd" "dt" "li" "optgroup" "option" "p" "rp" "rt" "tbody" "td" "tfoot" "th"
975 "thead" "tr" "body" "html");
976
977 for elem in self.open_elems.iter() {
978 let error;
979 {
980 let name = self.sink.elem_name(elem);
981 if body_end_ok(name) {
982 continue;
983 }
984 error = format_if!(
985 self.opts.exact_errors,
986 "Unexpected open tag at end of body",
987 "Unexpected open tag {:?} at end of body",
988 name
989 );
990 }
991 self.sink.parse_error(error);
992 // FIXME: Do we keep checking after finding one bad tag?
993 // The spec suggests not.
994 return;
995 }
996 }
997
in_scope<TagSet, Pred>(&self, scope: TagSet, pred: Pred) -> bool where TagSet: Fn(ExpandedName) -> bool, Pred: Fn(Handle) -> bool,998 fn in_scope<TagSet, Pred>(&self, scope: TagSet, pred: Pred) -> bool
999 where
1000 TagSet: Fn(ExpandedName) -> bool,
1001 Pred: Fn(Handle) -> bool,
1002 {
1003 for node in self.open_elems.iter().rev() {
1004 if pred(node.clone()) {
1005 return true;
1006 }
1007 if scope(self.sink.elem_name(node)) {
1008 return false;
1009 }
1010 }
1011
1012 // supposed to be impossible, because <html> is always in scope
1013
1014 false
1015 }
1016
elem_in<TagSet>(&self, elem: &Handle, set: TagSet) -> bool where TagSet: Fn(ExpandedName) -> bool,1017 fn elem_in<TagSet>(&self, elem: &Handle, set: TagSet) -> bool
1018 where
1019 TagSet: Fn(ExpandedName) -> bool,
1020 {
1021 set(self.sink.elem_name(elem))
1022 }
1023
html_elem_named(&self, elem: &Handle, name: LocalName) -> bool1024 fn html_elem_named(&self, elem: &Handle, name: LocalName) -> bool {
1025 let expanded = self.sink.elem_name(elem);
1026 *expanded.ns == ns!(html) && *expanded.local == name
1027 }
1028
in_html_elem_named(&self, name: LocalName) -> bool1029 fn in_html_elem_named(&self, name: LocalName) -> bool {
1030 self.open_elems
1031 .iter()
1032 .any(|elem| self.html_elem_named(elem, name.clone()))
1033 }
1034
current_node_named(&self, name: LocalName) -> bool1035 fn current_node_named(&self, name: LocalName) -> bool {
1036 self.html_elem_named(self.current_node(), name)
1037 }
1038
in_scope_named<TagSet>(&self, scope: TagSet, name: LocalName) -> bool where TagSet: Fn(ExpandedName) -> bool,1039 fn in_scope_named<TagSet>(&self, scope: TagSet, name: LocalName) -> bool
1040 where
1041 TagSet: Fn(ExpandedName) -> bool,
1042 {
1043 self.in_scope(scope, |elem| self.html_elem_named(&elem, name.clone()))
1044 }
1045
1046 //§ closing-elements-that-have-implied-end-tags
generate_implied_end<TagSet>(&mut self, set: TagSet) where TagSet: Fn(ExpandedName) -> bool,1047 fn generate_implied_end<TagSet>(&mut self, set: TagSet)
1048 where
1049 TagSet: Fn(ExpandedName) -> bool,
1050 {
1051 loop {
1052 {
1053 let elem = unwrap_or_return!(self.open_elems.last(), ());
1054 let nsname = self.sink.elem_name(elem);
1055 if !set(nsname) {
1056 return;
1057 }
1058 }
1059 self.pop();
1060 }
1061 }
1062
generate_implied_end_except(&mut self, except: LocalName)1063 fn generate_implied_end_except(&mut self, except: LocalName) {
1064 self.generate_implied_end(|p| {
1065 if *p.ns == ns!(html) && *p.local == except {
1066 false
1067 } else {
1068 cursory_implied_end(p)
1069 }
1070 });
1071 }
1072 //§ END
1073
1074 // Pop elements until the current element is in the set.
pop_until_current<TagSet>(&mut self, pred: TagSet) where TagSet: Fn(ExpandedName) -> bool,1075 fn pop_until_current<TagSet>(&mut self, pred: TagSet)
1076 where
1077 TagSet: Fn(ExpandedName) -> bool,
1078 {
1079 loop {
1080 if self.current_node_in(|x| pred(x)) {
1081 break;
1082 }
1083 self.open_elems.pop();
1084 }
1085 }
1086
1087 // Pop elements until an element from the set has been popped. Returns the
1088 // number of elements popped.
pop_until<P>(&mut self, pred: P) -> usize where P: Fn(ExpandedName) -> bool,1089 fn pop_until<P>(&mut self, pred: P) -> usize
1090 where
1091 P: Fn(ExpandedName) -> bool,
1092 {
1093 let mut n = 0;
1094 loop {
1095 n += 1;
1096 match self.open_elems.pop() {
1097 None => break,
1098 Some(elem) => {
1099 if pred(self.sink.elem_name(&elem)) {
1100 break;
1101 }
1102 },
1103 }
1104 }
1105 n
1106 }
1107
pop_until_named(&mut self, name: LocalName) -> usize1108 fn pop_until_named(&mut self, name: LocalName) -> usize {
1109 self.pop_until(|p| *p.ns == ns!(html) && *p.local == name)
1110 }
1111
1112 // Pop elements until one with the specified name has been popped.
1113 // Signal an error if it was not the first one.
expect_to_close(&mut self, name: LocalName)1114 fn expect_to_close(&mut self, name: LocalName) {
1115 if self.pop_until_named(name.clone()) != 1 {
1116 self.sink.parse_error(format_if!(
1117 self.opts.exact_errors,
1118 "Unexpected open element",
1119 "Unexpected open element while closing {:?}",
1120 name
1121 ));
1122 }
1123 }
1124
close_p_element(&mut self)1125 fn close_p_element(&mut self) {
1126 declare_tag_set!(implied = [cursory_implied_end] - "p");
1127 self.generate_implied_end(implied);
1128 self.expect_to_close(local_name!("p"));
1129 }
1130
close_p_element_in_button_scope(&mut self)1131 fn close_p_element_in_button_scope(&mut self) {
1132 if self.in_scope_named(button_scope, local_name!("p")) {
1133 self.close_p_element();
1134 }
1135 }
1136
1137 // Check <input> tags for type=hidden
is_type_hidden(&self, tag: &Tag) -> bool1138 fn is_type_hidden(&self, tag: &Tag) -> bool {
1139 match tag
1140 .attrs
1141 .iter()
1142 .find(|&at| at.name.expanded() == expanded_name!("", "type"))
1143 {
1144 None => false,
1145 Some(at) => (&*at.value).eq_ignore_ascii_case("hidden"),
1146 }
1147 }
1148
foster_parent_in_body(&mut self, token: Token) -> ProcessResult<Handle>1149 fn foster_parent_in_body(&mut self, token: Token) -> ProcessResult<Handle> {
1150 warn!("foster parenting not implemented");
1151 self.foster_parenting = true;
1152 let res = self.step(InBody, token);
1153 // FIXME: what if res is Reprocess?
1154 self.foster_parenting = false;
1155 res
1156 }
1157
process_chars_in_table(&mut self, token: Token) -> ProcessResult<Handle>1158 fn process_chars_in_table(&mut self, token: Token) -> ProcessResult<Handle> {
1159 declare_tag_set!(table_outer = "table" "tbody" "tfoot" "thead" "tr");
1160 if self.current_node_in(table_outer) {
1161 assert!(self.pending_table_text.is_empty());
1162 self.orig_mode = Some(self.mode);
1163 Reprocess(InTableText, token)
1164 } else {
1165 self.sink.parse_error(format_if!(
1166 self.opts.exact_errors,
1167 "Unexpected characters in table",
1168 "Unexpected characters {} in table",
1169 to_escaped_string(&token)
1170 ));
1171 self.foster_parent_in_body(token)
1172 }
1173 }
1174
1175 // https://html.spec.whatwg.org/multipage/#reset-the-insertion-mode-appropriately
reset_insertion_mode(&mut self) -> InsertionMode1176 fn reset_insertion_mode(&mut self) -> InsertionMode {
1177 for (i, mut node) in self.open_elems.iter().enumerate().rev() {
1178 let last = i == 0usize;
1179 if let (true, Some(ctx)) = (last, self.context_elem.as_ref()) {
1180 node = ctx;
1181 }
1182 let name = match self.sink.elem_name(node) {
1183 ExpandedName {
1184 ns: &ns!(html),
1185 local,
1186 } => local,
1187 _ => continue,
1188 };
1189 match *name {
1190 local_name!("select") => {
1191 for ancestor in self.open_elems[0..i].iter().rev() {
1192 if self.html_elem_named(ancestor, local_name!("template")) {
1193 return InSelect;
1194 } else if self.html_elem_named(ancestor, local_name!("table")) {
1195 return InSelectInTable;
1196 }
1197 }
1198 return InSelect;
1199 },
1200 local_name!("td") | local_name!("th") => {
1201 if !last {
1202 return InCell;
1203 }
1204 },
1205 local_name!("tr") => return InRow,
1206 local_name!("tbody") | local_name!("thead") | local_name!("tfoot") => {
1207 return InTableBody;
1208 },
1209 local_name!("caption") => return InCaption,
1210 local_name!("colgroup") => return InColumnGroup,
1211 local_name!("table") => return InTable,
1212 local_name!("template") => return *self.template_modes.last().unwrap(),
1213 local_name!("head") => {
1214 if !last {
1215 return InHead;
1216 }
1217 },
1218 local_name!("body") => return InBody,
1219 local_name!("frameset") => return InFrameset,
1220 local_name!("html") => match self.head_elem {
1221 None => return BeforeHead,
1222 Some(_) => return AfterHead,
1223 },
1224
1225 _ => (),
1226 }
1227 }
1228 InBody
1229 }
1230
close_the_cell(&mut self)1231 fn close_the_cell(&mut self) {
1232 self.generate_implied_end(cursory_implied_end);
1233 if self.pop_until(td_th) != 1 {
1234 self.sink
1235 .parse_error(Borrowed("expected to close <td> or <th> with cell"));
1236 }
1237 self.clear_active_formatting_to_marker();
1238 }
1239
append_text(&mut self, text: StrTendril) -> ProcessResult<Handle>1240 fn append_text(&mut self, text: StrTendril) -> ProcessResult<Handle> {
1241 self.insert_appropriately(AppendText(text), None);
1242 Done
1243 }
1244
append_comment(&mut self, text: StrTendril) -> ProcessResult<Handle>1245 fn append_comment(&mut self, text: StrTendril) -> ProcessResult<Handle> {
1246 let comment = self.sink.create_comment(text);
1247 self.insert_appropriately(AppendNode(comment), None);
1248 Done
1249 }
1250
append_comment_to_doc(&mut self, text: StrTendril) -> ProcessResult<Handle>1251 fn append_comment_to_doc(&mut self, text: StrTendril) -> ProcessResult<Handle> {
1252 let comment = self.sink.create_comment(text);
1253 self.sink.append(&self.doc_handle, AppendNode(comment));
1254 Done
1255 }
1256
append_comment_to_html(&mut self, text: StrTendril) -> ProcessResult<Handle>1257 fn append_comment_to_html(&mut self, text: StrTendril) -> ProcessResult<Handle> {
1258 let target = html_elem(&self.open_elems);
1259 let comment = self.sink.create_comment(text);
1260 self.sink.append(target, AppendNode(comment));
1261 Done
1262 }
1263
1264 //§ creating-and-inserting-nodes
create_root(&mut self, attrs: Vec<Attribute>)1265 fn create_root(&mut self, attrs: Vec<Attribute>) {
1266 let elem = create_element(
1267 &mut self.sink,
1268 QualName::new(None, ns!(html), local_name!("html")),
1269 attrs,
1270 );
1271 self.push(&elem);
1272 self.sink.append(&self.doc_handle, AppendNode(elem));
1273 // FIXME: application cache selection algorithm
1274 }
1275
1276 // https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token
insert_element( &mut self, push: PushFlag, ns: Namespace, name: LocalName, attrs: Vec<Attribute>, ) -> Handle1277 fn insert_element(
1278 &mut self,
1279 push: PushFlag,
1280 ns: Namespace,
1281 name: LocalName,
1282 attrs: Vec<Attribute>,
1283 ) -> Handle {
1284 declare_tag_set!(form_associatable =
1285 "button" "fieldset" "input" "object"
1286 "output" "select" "textarea" "img");
1287
1288 declare_tag_set!(listed = [form_associatable] - "img");
1289
1290 // Step 7.
1291 let qname = QualName::new(None, ns, name);
1292 let elem = create_element(&mut self.sink, qname.clone(), attrs.clone());
1293
1294 let insertion_point = self.appropriate_place_for_insertion(None);
1295 let (node1, node2) = match insertion_point {
1296 LastChild(ref p) | BeforeSibling(ref p) => (p.clone(), None),
1297 TableFosterParenting {
1298 ref element,
1299 ref prev_element,
1300 } => (element.clone(), Some(prev_element.clone())),
1301 };
1302
1303 // Step 12.
1304 if form_associatable(qname.expanded()) &&
1305 self.form_elem.is_some() &&
1306 !self.in_html_elem_named(local_name!("template")) &&
1307 !(listed(qname.expanded()) &&
1308 attrs
1309 .iter()
1310 .any(|a| a.name.expanded() == expanded_name!("", "form")))
1311 {
1312 let form = self.form_elem.as_ref().unwrap().clone();
1313 let node2 = match node2 {
1314 Some(ref n) => Some(n),
1315 None => None,
1316 };
1317 self.sink.associate_with_form(&elem, &form, (&node1, node2));
1318 }
1319
1320 self.insert_at(insertion_point, AppendNode(elem.clone()));
1321
1322 match push {
1323 Push => self.push(&elem),
1324 NoPush => (),
1325 }
1326 // FIXME: Remove from the stack if we can't append?
1327 elem
1328 }
1329
insert_element_for(&mut self, tag: Tag) -> Handle1330 fn insert_element_for(&mut self, tag: Tag) -> Handle {
1331 self.insert_element(Push, ns!(html), tag.name, tag.attrs)
1332 }
1333
insert_and_pop_element_for(&mut self, tag: Tag) -> Handle1334 fn insert_and_pop_element_for(&mut self, tag: Tag) -> Handle {
1335 self.insert_element(NoPush, ns!(html), tag.name, tag.attrs)
1336 }
1337
insert_phantom(&mut self, name: LocalName) -> Handle1338 fn insert_phantom(&mut self, name: LocalName) -> Handle {
1339 self.insert_element(Push, ns!(html), name, vec![])
1340 }
1341 //§ END
1342
create_formatting_element_for(&mut self, tag: Tag) -> Handle1343 fn create_formatting_element_for(&mut self, tag: Tag) -> Handle {
1344 // FIXME: This really wants unit tests.
1345 let mut first_match = None;
1346 let mut matches = 0usize;
1347 for (i, _, old_tag) in self.active_formatting_end_to_marker() {
1348 if tag.equiv_modulo_attr_order(old_tag) {
1349 first_match = Some(i);
1350 matches += 1;
1351 }
1352 }
1353
1354 if matches >= 3 {
1355 self.active_formatting
1356 .remove(first_match.expect("matches with no index"));
1357 }
1358
1359 let elem = self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone());
1360 self.active_formatting.push(Element(elem.clone(), tag));
1361 elem
1362 }
1363
clear_active_formatting_to_marker(&mut self)1364 fn clear_active_formatting_to_marker(&mut self) {
1365 loop {
1366 match self.active_formatting.pop() {
1367 None | Some(Marker) => break,
1368 _ => (),
1369 }
1370 }
1371 }
1372
process_end_tag_in_body(&mut self, tag: Tag)1373 fn process_end_tag_in_body(&mut self, tag: Tag) {
1374 // Look back for a matching open element.
1375 let mut match_idx = None;
1376 for (i, elem) in self.open_elems.iter().enumerate().rev() {
1377 if self.html_elem_named(elem, tag.name.clone()) {
1378 match_idx = Some(i);
1379 break;
1380 }
1381
1382 if self.elem_in(elem, special_tag) {
1383 self.sink
1384 .parse_error(Borrowed("Found special tag while closing generic tag"));
1385 return;
1386 }
1387 }
1388
1389 // Can't use unwrap_or_return!() due to rust-lang/rust#16617.
1390 let match_idx = match match_idx {
1391 None => {
1392 // I believe this is impossible, because the root
1393 // <html> element is in special_tag.
1394 self.unexpected(&tag);
1395 return;
1396 },
1397 Some(x) => x,
1398 };
1399
1400 self.generate_implied_end_except(tag.name.clone());
1401
1402 if match_idx != self.open_elems.len() - 1 {
1403 // mis-nested tags
1404 self.unexpected(&tag);
1405 }
1406 self.open_elems.truncate(match_idx);
1407 }
1408
handle_misnested_a_tags(&mut self, tag: &Tag)1409 fn handle_misnested_a_tags(&mut self, tag: &Tag) {
1410 let node = unwrap_or_return!(
1411 self.active_formatting_end_to_marker()
1412 .filter(|&(_, n, _)| self.html_elem_named(n, local_name!("a")))
1413 .next()
1414 .map(|(_, n, _)| n.clone()),
1415 ()
1416 );
1417
1418 self.unexpected(tag);
1419 self.adoption_agency(local_name!("a"));
1420 self.position_in_active_formatting(&node)
1421 .map(|index| self.active_formatting.remove(index));
1422 self.remove_from_stack(&node);
1423 }
1424
1425 //§ tree-construction
is_foreign(&mut self, token: &Token) -> bool1426 fn is_foreign(&mut self, token: &Token) -> bool {
1427 if let EOFToken = *token {
1428 return false;
1429 }
1430
1431 if self.open_elems.len() == 0 {
1432 return false;
1433 }
1434
1435 let name = self.sink.elem_name(self.adjusted_current_node());
1436 if let ns!(html) = *name.ns {
1437 return false;
1438 }
1439
1440 if mathml_text_integration_point(name) {
1441 match *token {
1442 CharacterTokens(..) | NullCharacterToken => return false,
1443 TagToken(Tag {
1444 kind: StartTag,
1445 ref name,
1446 ..
1447 }) if !matches!(*name, local_name!("mglyph") | local_name!("malignmark")) => {
1448 return false;
1449 },
1450 _ => (),
1451 }
1452 }
1453
1454 if svg_html_integration_point(name) {
1455 match *token {
1456 CharacterTokens(..) | NullCharacterToken => return false,
1457 TagToken(Tag { kind: StartTag, .. }) => return false,
1458 _ => (),
1459 }
1460 }
1461
1462 if let expanded_name!(mathml "annotation-xml") = name {
1463 match *token {
1464 TagToken(Tag {
1465 kind: StartTag,
1466 name: local_name!("svg"),
1467 ..
1468 }) => return false,
1469 CharacterTokens(..) | NullCharacterToken | TagToken(Tag { kind: StartTag, .. }) => {
1470 return !self
1471 .sink
1472 .is_mathml_annotation_xml_integration_point(self.adjusted_current_node());
1473 },
1474 _ => {},
1475 };
1476 }
1477
1478 true
1479 }
1480 //§ END
1481
enter_foreign(&mut self, mut tag: Tag, ns: Namespace) -> ProcessResult<Handle>1482 fn enter_foreign(&mut self, mut tag: Tag, ns: Namespace) -> ProcessResult<Handle> {
1483 match ns {
1484 ns!(mathml) => self.adjust_mathml_attributes(&mut tag),
1485 ns!(svg) => self.adjust_svg_attributes(&mut tag),
1486 _ => (),
1487 }
1488 self.adjust_foreign_attributes(&mut tag);
1489
1490 if tag.self_closing {
1491 self.insert_element(NoPush, ns, tag.name, tag.attrs);
1492 DoneAckSelfClosing
1493 } else {
1494 self.insert_element(Push, ns, tag.name, tag.attrs);
1495 Done
1496 }
1497 }
1498
adjust_svg_tag_name(&mut self, tag: &mut Tag)1499 fn adjust_svg_tag_name(&mut self, tag: &mut Tag) {
1500 let Tag { ref mut name, .. } = *tag;
1501 match *name {
1502 local_name!("altglyph") => *name = local_name!("altGlyph"),
1503 local_name!("altglyphdef") => *name = local_name!("altGlyphDef"),
1504 local_name!("altglyphitem") => *name = local_name!("altGlyphItem"),
1505 local_name!("animatecolor") => *name = local_name!("animateColor"),
1506 local_name!("animatemotion") => *name = local_name!("animateMotion"),
1507 local_name!("animatetransform") => *name = local_name!("animateTransform"),
1508 local_name!("clippath") => *name = local_name!("clipPath"),
1509 local_name!("feblend") => *name = local_name!("feBlend"),
1510 local_name!("fecolormatrix") => *name = local_name!("feColorMatrix"),
1511 local_name!("fecomponenttransfer") => *name = local_name!("feComponentTransfer"),
1512 local_name!("fecomposite") => *name = local_name!("feComposite"),
1513 local_name!("feconvolvematrix") => *name = local_name!("feConvolveMatrix"),
1514 local_name!("fediffuselighting") => *name = local_name!("feDiffuseLighting"),
1515 local_name!("fedisplacementmap") => *name = local_name!("feDisplacementMap"),
1516 local_name!("fedistantlight") => *name = local_name!("feDistantLight"),
1517 local_name!("fedropshadow") => *name = local_name!("feDropShadow"),
1518 local_name!("feflood") => *name = local_name!("feFlood"),
1519 local_name!("fefunca") => *name = local_name!("feFuncA"),
1520 local_name!("fefuncb") => *name = local_name!("feFuncB"),
1521 local_name!("fefuncg") => *name = local_name!("feFuncG"),
1522 local_name!("fefuncr") => *name = local_name!("feFuncR"),
1523 local_name!("fegaussianblur") => *name = local_name!("feGaussianBlur"),
1524 local_name!("feimage") => *name = local_name!("feImage"),
1525 local_name!("femerge") => *name = local_name!("feMerge"),
1526 local_name!("femergenode") => *name = local_name!("feMergeNode"),
1527 local_name!("femorphology") => *name = local_name!("feMorphology"),
1528 local_name!("feoffset") => *name = local_name!("feOffset"),
1529 local_name!("fepointlight") => *name = local_name!("fePointLight"),
1530 local_name!("fespecularlighting") => *name = local_name!("feSpecularLighting"),
1531 local_name!("fespotlight") => *name = local_name!("feSpotLight"),
1532 local_name!("fetile") => *name = local_name!("feTile"),
1533 local_name!("feturbulence") => *name = local_name!("feTurbulence"),
1534 local_name!("foreignobject") => *name = local_name!("foreignObject"),
1535 local_name!("glyphref") => *name = local_name!("glyphRef"),
1536 local_name!("lineargradient") => *name = local_name!("linearGradient"),
1537 local_name!("radialgradient") => *name = local_name!("radialGradient"),
1538 local_name!("textpath") => *name = local_name!("textPath"),
1539 _ => (),
1540 }
1541 }
1542
adjust_attributes<F>(&mut self, tag: &mut Tag, mut map: F) where F: FnMut(LocalName) -> Option<QualName>,1543 fn adjust_attributes<F>(&mut self, tag: &mut Tag, mut map: F)
1544 where
1545 F: FnMut(LocalName) -> Option<QualName>,
1546 {
1547 for &mut Attribute { ref mut name, .. } in &mut tag.attrs {
1548 if let Some(replacement) = map(name.local.clone()) {
1549 *name = replacement;
1550 }
1551 }
1552 }
1553
adjust_svg_attributes(&mut self, tag: &mut Tag)1554 fn adjust_svg_attributes(&mut self, tag: &mut Tag) {
1555 self.adjust_attributes(tag, |k| match k {
1556 local_name!("attributename") => Some(qualname!("", "attributeName")),
1557 local_name!("attributetype") => Some(qualname!("", "attributeType")),
1558 local_name!("basefrequency") => Some(qualname!("", "baseFrequency")),
1559 local_name!("baseprofile") => Some(qualname!("", "baseProfile")),
1560 local_name!("calcmode") => Some(qualname!("", "calcMode")),
1561 local_name!("clippathunits") => Some(qualname!("", "clipPathUnits")),
1562 local_name!("diffuseconstant") => Some(qualname!("", "diffuseConstant")),
1563 local_name!("edgemode") => Some(qualname!("", "edgeMode")),
1564 local_name!("filterunits") => Some(qualname!("", "filterUnits")),
1565 local_name!("glyphref") => Some(qualname!("", "glyphRef")),
1566 local_name!("gradienttransform") => Some(qualname!("", "gradientTransform")),
1567 local_name!("gradientunits") => Some(qualname!("", "gradientUnits")),
1568 local_name!("kernelmatrix") => Some(qualname!("", "kernelMatrix")),
1569 local_name!("kernelunitlength") => Some(qualname!("", "kernelUnitLength")),
1570 local_name!("keypoints") => Some(qualname!("", "keyPoints")),
1571 local_name!("keysplines") => Some(qualname!("", "keySplines")),
1572 local_name!("keytimes") => Some(qualname!("", "keyTimes")),
1573 local_name!("lengthadjust") => Some(qualname!("", "lengthAdjust")),
1574 local_name!("limitingconeangle") => Some(qualname!("", "limitingConeAngle")),
1575 local_name!("markerheight") => Some(qualname!("", "markerHeight")),
1576 local_name!("markerunits") => Some(qualname!("", "markerUnits")),
1577 local_name!("markerwidth") => Some(qualname!("", "markerWidth")),
1578 local_name!("maskcontentunits") => Some(qualname!("", "maskContentUnits")),
1579 local_name!("maskunits") => Some(qualname!("", "maskUnits")),
1580 local_name!("numoctaves") => Some(qualname!("", "numOctaves")),
1581 local_name!("pathlength") => Some(qualname!("", "pathLength")),
1582 local_name!("patterncontentunits") => Some(qualname!("", "patternContentUnits")),
1583 local_name!("patterntransform") => Some(qualname!("", "patternTransform")),
1584 local_name!("patternunits") => Some(qualname!("", "patternUnits")),
1585 local_name!("pointsatx") => Some(qualname!("", "pointsAtX")),
1586 local_name!("pointsaty") => Some(qualname!("", "pointsAtY")),
1587 local_name!("pointsatz") => Some(qualname!("", "pointsAtZ")),
1588 local_name!("preservealpha") => Some(qualname!("", "preserveAlpha")),
1589 local_name!("preserveaspectratio") => Some(qualname!("", "preserveAspectRatio")),
1590 local_name!("primitiveunits") => Some(qualname!("", "primitiveUnits")),
1591 local_name!("refx") => Some(qualname!("", "refX")),
1592 local_name!("refy") => Some(qualname!("", "refY")),
1593 local_name!("repeatcount") => Some(qualname!("", "repeatCount")),
1594 local_name!("repeatdur") => Some(qualname!("", "repeatDur")),
1595 local_name!("requiredextensions") => Some(qualname!("", "requiredExtensions")),
1596 local_name!("requiredfeatures") => Some(qualname!("", "requiredFeatures")),
1597 local_name!("specularconstant") => Some(qualname!("", "specularConstant")),
1598 local_name!("specularexponent") => Some(qualname!("", "specularExponent")),
1599 local_name!("spreadmethod") => Some(qualname!("", "spreadMethod")),
1600 local_name!("startoffset") => Some(qualname!("", "startOffset")),
1601 local_name!("stddeviation") => Some(qualname!("", "stdDeviation")),
1602 local_name!("stitchtiles") => Some(qualname!("", "stitchTiles")),
1603 local_name!("surfacescale") => Some(qualname!("", "surfaceScale")),
1604 local_name!("systemlanguage") => Some(qualname!("", "systemLanguage")),
1605 local_name!("tablevalues") => Some(qualname!("", "tableValues")),
1606 local_name!("targetx") => Some(qualname!("", "targetX")),
1607 local_name!("targety") => Some(qualname!("", "targetY")),
1608 local_name!("textlength") => Some(qualname!("", "textLength")),
1609 local_name!("viewbox") => Some(qualname!("", "viewBox")),
1610 local_name!("viewtarget") => Some(qualname!("", "viewTarget")),
1611 local_name!("xchannelselector") => Some(qualname!("", "xChannelSelector")),
1612 local_name!("ychannelselector") => Some(qualname!("", "yChannelSelector")),
1613 local_name!("zoomandpan") => Some(qualname!("", "zoomAndPan")),
1614 _ => None,
1615 });
1616 }
1617
adjust_mathml_attributes(&mut self, tag: &mut Tag)1618 fn adjust_mathml_attributes(&mut self, tag: &mut Tag) {
1619 self.adjust_attributes(tag, |k| match k {
1620 local_name!("definitionurl") => Some(qualname!("", "definitionURL")),
1621 _ => None,
1622 });
1623 }
1624
adjust_foreign_attributes(&mut self, tag: &mut Tag)1625 fn adjust_foreign_attributes(&mut self, tag: &mut Tag) {
1626 self.adjust_attributes(tag, |k| match k {
1627 local_name!("xlink:actuate") => Some(qualname!("xlink" xlink "actuate")),
1628 local_name!("xlink:arcrole") => Some(qualname!("xlink" xlink "arcrole")),
1629 local_name!("xlink:href") => Some(qualname!("xlink" xlink "href")),
1630 local_name!("xlink:role") => Some(qualname!("xlink" xlink "role")),
1631 local_name!("xlink:show") => Some(qualname!("xlink" xlink "show")),
1632 local_name!("xlink:title") => Some(qualname!("xlink" xlink "title")),
1633 local_name!("xlink:type") => Some(qualname!("xlink" xlink "type")),
1634 local_name!("xml:base") => Some(qualname!("xml" xml "base")),
1635 local_name!("xml:lang") => Some(qualname!("xml" xml "lang")),
1636 local_name!("xml:space") => Some(qualname!("xml" xml "space")),
1637 local_name!("xmlns") => Some(qualname!("" xmlns "xmlns")),
1638 local_name!("xmlns:xlink") => Some(qualname!("xmlns" xmlns "xlink")),
1639 _ => None,
1640 });
1641 }
1642
foreign_start_tag(&mut self, mut tag: Tag) -> ProcessResult<Handle>1643 fn foreign_start_tag(&mut self, mut tag: Tag) -> ProcessResult<Handle> {
1644 let current_ns = self.sink.elem_name(self.adjusted_current_node()).ns.clone();
1645 match current_ns {
1646 ns!(mathml) => self.adjust_mathml_attributes(&mut tag),
1647 ns!(svg) => {
1648 self.adjust_svg_tag_name(&mut tag);
1649 self.adjust_svg_attributes(&mut tag);
1650 },
1651 _ => (),
1652 }
1653 self.adjust_foreign_attributes(&mut tag);
1654 if tag.self_closing {
1655 // FIXME(#118): <script /> in SVG
1656 self.insert_element(NoPush, current_ns, tag.name, tag.attrs);
1657 DoneAckSelfClosing
1658 } else {
1659 self.insert_element(Push, current_ns, tag.name, tag.attrs);
1660 Done
1661 }
1662 }
1663
unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult<Handle>1664 fn unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult<Handle> {
1665 self.unexpected(&tag);
1666 if self.is_fragment() {
1667 self.foreign_start_tag(tag)
1668 } else {
1669 self.pop();
1670 while !self.current_node_in(|n| {
1671 *n.ns == ns!(html) ||
1672 mathml_text_integration_point(n) ||
1673 svg_html_integration_point(n)
1674 }) {
1675 self.pop();
1676 }
1677 ReprocessForeign(TagToken(tag))
1678 }
1679 }
1680 }
1681
1682 #[cfg(test)]
1683 #[allow(non_snake_case)]
1684 mod test {
1685 use markup5ever::interface::{AppendNode, AppendText, NodeOrText};
1686 use markup5ever::interface::{ElementFlags, Tracer, TreeSink};
1687 use markup5ever::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
1688
1689 use super::types::*;
1690
1691 use tendril::stream::{TendrilSink, Utf8LossyDecoder};
1692 use tendril::StrTendril;
1693 use ExpandedName;
1694 use QualName;
1695
1696 use tokenizer;
1697 use tokenizer::states as tok_state;
1698 use tokenizer::{Doctype, StartTag, Tag, TokenSink};
1699 use tokenizer::{Tokenizer, TokenizerOpts};
1700
1701 use util::str::is_ascii_whitespace;
1702
1703 use std::borrow::Cow;
1704 use std::borrow::Cow::Borrowed;
1705 use std::collections::VecDeque;
1706 use std::default::Default;
1707 use std::mem::replace;
1708
1709 use super::{TreeBuilder, TreeBuilderOpts};
1710 use driver::*;
1711 use markup5ever::Attribute;
1712 use rcdom::{Handle, Node, NodeData, RcDom};
1713
1714 pub struct LineCountingDOM {
1715 pub line_vec: Vec<(QualName, u64)>,
1716 pub current_line: u64,
1717 pub rcdom: RcDom,
1718 }
1719
1720 impl TreeSink for LineCountingDOM {
1721 type Output = Self;
1722
finish(self) -> Self1723 fn finish(self) -> Self {
1724 self
1725 }
1726
1727 type Handle = Handle;
1728
parse_error(&mut self, msg: Cow<'static, str>)1729 fn parse_error(&mut self, msg: Cow<'static, str>) {
1730 self.rcdom.parse_error(msg);
1731 }
1732
get_document(&mut self) -> Handle1733 fn get_document(&mut self) -> Handle {
1734 self.rcdom.get_document()
1735 }
1736
get_template_contents(&mut self, target: &Handle) -> Handle1737 fn get_template_contents(&mut self, target: &Handle) -> Handle {
1738 self.rcdom.get_template_contents(target)
1739 }
1740
set_quirks_mode(&mut self, mode: QuirksMode)1741 fn set_quirks_mode(&mut self, mode: QuirksMode) {
1742 self.rcdom.set_quirks_mode(mode)
1743 }
1744
same_node(&self, x: &Handle, y: &Handle) -> bool1745 fn same_node(&self, x: &Handle, y: &Handle) -> bool {
1746 self.rcdom.same_node(x, y)
1747 }
1748
elem_name<'a>(&'a self, target: &'a Handle) -> ExpandedName<'a>1749 fn elem_name<'a>(&'a self, target: &'a Handle) -> ExpandedName<'a> {
1750 self.rcdom.elem_name(target)
1751 }
1752
create_element( &mut self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags, ) -> Handle1753 fn create_element(
1754 &mut self,
1755 name: QualName,
1756 attrs: Vec<Attribute>,
1757 flags: ElementFlags,
1758 ) -> Handle {
1759 self.line_vec.push((name.clone(), self.current_line));
1760 self.rcdom.create_element(name, attrs, flags)
1761 }
1762
create_comment(&mut self, text: StrTendril) -> Handle1763 fn create_comment(&mut self, text: StrTendril) -> Handle {
1764 self.rcdom.create_comment(text)
1765 }
1766
create_pi(&mut self, target: StrTendril, content: StrTendril) -> Handle1767 fn create_pi(&mut self, target: StrTendril, content: StrTendril) -> Handle {
1768 self.rcdom.create_pi(target, content)
1769 }
1770
append(&mut self, parent: &Handle, child: NodeOrText<Handle>)1771 fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) {
1772 self.rcdom.append(parent, child)
1773 }
1774
append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>)1775 fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) {
1776 self.rcdom.append_before_sibling(sibling, child)
1777 }
1778
append_based_on_parent_node( &mut self, element: &Handle, prev_element: &Handle, child: NodeOrText<Handle>, )1779 fn append_based_on_parent_node(
1780 &mut self,
1781 element: &Handle,
1782 prev_element: &Handle,
1783 child: NodeOrText<Handle>,
1784 ) {
1785 self.rcdom
1786 .append_based_on_parent_node(element, prev_element, child)
1787 }
1788
append_doctype_to_document( &mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, )1789 fn append_doctype_to_document(
1790 &mut self,
1791 name: StrTendril,
1792 public_id: StrTendril,
1793 system_id: StrTendril,
1794 ) {
1795 self.rcdom
1796 .append_doctype_to_document(name, public_id, system_id);
1797 }
1798
add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>)1799 fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) {
1800 self.rcdom.add_attrs_if_missing(target, attrs);
1801 }
1802
remove_from_parent(&mut self, target: &Handle)1803 fn remove_from_parent(&mut self, target: &Handle) {
1804 self.rcdom.remove_from_parent(target);
1805 }
1806
reparent_children(&mut self, node: &Handle, new_parent: &Handle)1807 fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) {
1808 self.rcdom.reparent_children(node, new_parent);
1809 }
1810
mark_script_already_started(&mut self, target: &Handle)1811 fn mark_script_already_started(&mut self, target: &Handle) {
1812 self.rcdom.mark_script_already_started(target);
1813 }
1814
set_current_line(&mut self, line_number: u64)1815 fn set_current_line(&mut self, line_number: u64) {
1816 self.current_line = line_number;
1817 }
1818 }
1819
1820 #[test]
check_four_lines()1821 fn check_four_lines() {
1822 // Input
1823 let sink = LineCountingDOM {
1824 line_vec: vec![],
1825 current_line: 1,
1826 rcdom: RcDom::default(),
1827 };
1828 let opts = ParseOpts::default();
1829 let mut resultTok = parse_document(sink, opts);
1830 resultTok.process(StrTendril::from("<a>\n"));
1831 resultTok.process(StrTendril::from("</a>\n"));
1832 resultTok.process(StrTendril::from("<b>\n"));
1833 resultTok.process(StrTendril::from("</b>"));
1834 // Actual Output
1835 let actual = resultTok.finish();
1836 // Expected Output
1837 let expected = vec![
1838 (QualName::new(None, ns!(html), local_name!("html")), 1),
1839 (QualName::new(None, ns!(html), local_name!("head")), 1),
1840 (QualName::new(None, ns!(html), local_name!("body")), 1),
1841 (QualName::new(None, ns!(html), local_name!("a")), 1),
1842 (QualName::new(None, ns!(html), local_name!("b")), 3),
1843 ];
1844 // Assertion
1845 assert_eq!(actual.line_vec, expected);
1846 }
1847 }
1848