1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8	"errors"
9	"fmt"
10	"io"
11	"strings"
12
13	a "golang.org/x/net/html/atom"
14)
15
16// A parser implements the HTML5 parsing algorithm:
17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18type parser struct {
19	// tokenizer provides the tokens for the parser.
20	tokenizer *Tokenizer
21	// tok is the most recently read token.
22	tok Token
23	// Self-closing tags like <hr/> are treated as start tags, except that
24	// hasSelfClosingToken is set while they are being processed.
25	hasSelfClosingToken bool
26	// doc is the document root element.
27	doc *Node
28	// The stack of open elements (section 12.2.4.2) and active formatting
29	// elements (section 12.2.4.3).
30	oe, afe nodeStack
31	// Element pointers (section 12.2.4.4).
32	head, form *Node
33	// Other parsing state flags (section 12.2.4.5).
34	scripting, framesetOK bool
35	// The stack of template insertion modes
36	templateStack insertionModeStack
37	// im is the current insertion mode.
38	im insertionMode
39	// originalIM is the insertion mode to go back to after completing a text
40	// or inTableText insertion mode.
41	originalIM insertionMode
42	// fosterParenting is whether new elements should be inserted according to
43	// the foster parenting rules (section 12.2.6.1).
44	fosterParenting bool
45	// quirks is whether the parser is operating in "quirks mode."
46	quirks bool
47	// fragment is whether the parser is parsing an HTML fragment.
48	fragment bool
49	// context is the context element when parsing an HTML fragment
50	// (section 12.4).
51	context *Node
52}
53
54func (p *parser) top() *Node {
55	if n := p.oe.top(); n != nil {
56		return n
57	}
58	return p.doc
59}
60
61// Stop tags for use in popUntil. These come from section 12.2.4.2.
62var (
63	defaultScopeStopTags = map[string][]a.Atom{
64		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66		"svg":  {a.Desc, a.ForeignObject, a.Title},
67	}
68)
69
70type scope int
71
72const (
73	defaultScope scope = iota
74	listItemScope
75	buttonScope
76	tableScope
77	tableRowScope
78	tableBodyScope
79	selectScope
80)
81
82// popUntil pops the stack of open elements at the highest element whose tag
83// is in matchTags, provided there is no higher element in the scope's stop
84// tags (as defined in section 12.2.4.2). It returns whether or not there was
85// such an element. If there was not, popUntil leaves the stack unchanged.
86//
87// For example, the set of stop tags for table scope is: "html", "table". If
88// the stack was:
89// ["html", "body", "font", "table", "b", "i", "u"]
90// then popUntil(tableScope, "font") would return false, but
91// popUntil(tableScope, "i") would return true and the stack would become:
92// ["html", "body", "font", "table", "b"]
93//
94// If an element's tag is in both the stop tags and matchTags, then the stack
95// will be popped and the function returns true (provided, of course, there was
96// no higher element in the stack that was also in the stop tags). For example,
97// popUntil(tableScope, "table") returns true and leaves:
98// ["html", "body", "font"]
99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101		p.oe = p.oe[:i]
102		return true
103	}
104	return false
105}
106
107// indexOfElementInScope returns the index in p.oe of the highest element whose
108// tag is in matchTags that is in scope. If no matching element is in scope, it
109// returns -1.
110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111	for i := len(p.oe) - 1; i >= 0; i-- {
112		tagAtom := p.oe[i].DataAtom
113		if p.oe[i].Namespace == "" {
114			for _, t := range matchTags {
115				if t == tagAtom {
116					return i
117				}
118			}
119			switch s {
120			case defaultScope:
121				// No-op.
122			case listItemScope:
123				if tagAtom == a.Ol || tagAtom == a.Ul {
124					return -1
125				}
126			case buttonScope:
127				if tagAtom == a.Button {
128					return -1
129				}
130			case tableScope:
131				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
132					return -1
133				}
134			case selectScope:
135				if tagAtom != a.Optgroup && tagAtom != a.Option {
136					return -1
137				}
138			default:
139				panic("unreachable")
140			}
141		}
142		switch s {
143		case defaultScope, listItemScope, buttonScope:
144			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145				if t == tagAtom {
146					return -1
147				}
148			}
149		}
150	}
151	return -1
152}
153
154// elementInScope is like popUntil, except that it doesn't modify the stack of
155// open elements.
156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157	return p.indexOfElementInScope(s, matchTags...) != -1
158}
159
160// clearStackToContext pops elements off the stack of open elements until a
161// scope-defined element is found.
162func (p *parser) clearStackToContext(s scope) {
163	for i := len(p.oe) - 1; i >= 0; i-- {
164		tagAtom := p.oe[i].DataAtom
165		switch s {
166		case tableScope:
167			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
168				p.oe = p.oe[:i+1]
169				return
170			}
171		case tableRowScope:
172			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
173				p.oe = p.oe[:i+1]
174				return
175			}
176		case tableBodyScope:
177			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
178				p.oe = p.oe[:i+1]
179				return
180			}
181		default:
182			panic("unreachable")
183		}
184	}
185}
186
187// parseGenericRawTextElements implements the generic raw text element parsing
188// algorithm defined in 12.2.6.2.
189// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
190// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
191// officially, need to make tokenizer consider both states.
192func (p *parser) parseGenericRawTextElement() {
193	p.addElement()
194	p.originalIM = p.im
195	p.im = textIM
196}
197
198// generateImpliedEndTags pops nodes off the stack of open elements as long as
199// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
200// If exceptions are specified, nodes with that name will not be popped off.
201func (p *parser) generateImpliedEndTags(exceptions ...string) {
202	var i int
203loop:
204	for i = len(p.oe) - 1; i >= 0; i-- {
205		n := p.oe[i]
206		if n.Type != ElementNode {
207			break
208		}
209		switch n.DataAtom {
210		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
211			for _, except := range exceptions {
212				if n.Data == except {
213					break loop
214				}
215			}
216			continue
217		}
218		break
219	}
220
221	p.oe = p.oe[:i+1]
222}
223
224// addChild adds a child node n to the top element, and pushes n onto the stack
225// of open elements if it is an element node.
226func (p *parser) addChild(n *Node) {
227	if p.shouldFosterParent() {
228		p.fosterParent(n)
229	} else {
230		p.top().AppendChild(n)
231	}
232
233	if n.Type == ElementNode {
234		p.oe = append(p.oe, n)
235	}
236}
237
238// shouldFosterParent returns whether the next node to be added should be
239// foster parented.
240func (p *parser) shouldFosterParent() bool {
241	if p.fosterParenting {
242		switch p.top().DataAtom {
243		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
244			return true
245		}
246	}
247	return false
248}
249
250// fosterParent adds a child node according to the foster parenting rules.
251// Section 12.2.6.1, "foster parenting".
252func (p *parser) fosterParent(n *Node) {
253	var table, parent, prev, template *Node
254	var i int
255	for i = len(p.oe) - 1; i >= 0; i-- {
256		if p.oe[i].DataAtom == a.Table {
257			table = p.oe[i]
258			break
259		}
260	}
261
262	var j int
263	for j = len(p.oe) - 1; j >= 0; j-- {
264		if p.oe[j].DataAtom == a.Template {
265			template = p.oe[j]
266			break
267		}
268	}
269
270	if template != nil && (table == nil || j > i) {
271		template.AppendChild(n)
272		return
273	}
274
275	if table == nil {
276		// The foster parent is the html element.
277		parent = p.oe[0]
278	} else {
279		parent = table.Parent
280	}
281	if parent == nil {
282		parent = p.oe[i-1]
283	}
284
285	if table != nil {
286		prev = table.PrevSibling
287	} else {
288		prev = parent.LastChild
289	}
290	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
291		prev.Data += n.Data
292		return
293	}
294
295	parent.InsertBefore(n, table)
296}
297
298// addText adds text to the preceding node if it is a text node, or else it
299// calls addChild with a new text node.
300func (p *parser) addText(text string) {
301	if text == "" {
302		return
303	}
304
305	if p.shouldFosterParent() {
306		p.fosterParent(&Node{
307			Type: TextNode,
308			Data: text,
309		})
310		return
311	}
312
313	t := p.top()
314	if n := t.LastChild; n != nil && n.Type == TextNode {
315		n.Data += text
316		return
317	}
318	p.addChild(&Node{
319		Type: TextNode,
320		Data: text,
321	})
322}
323
324// addElement adds a child element based on the current token.
325func (p *parser) addElement() {
326	p.addChild(&Node{
327		Type:     ElementNode,
328		DataAtom: p.tok.DataAtom,
329		Data:     p.tok.Data,
330		Attr:     p.tok.Attr,
331	})
332}
333
334// Section 12.2.4.3.
335func (p *parser) addFormattingElement() {
336	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
337	p.addElement()
338
339	// Implement the Noah's Ark clause, but with three per family instead of two.
340	identicalElements := 0
341findIdenticalElements:
342	for i := len(p.afe) - 1; i >= 0; i-- {
343		n := p.afe[i]
344		if n.Type == scopeMarkerNode {
345			break
346		}
347		if n.Type != ElementNode {
348			continue
349		}
350		if n.Namespace != "" {
351			continue
352		}
353		if n.DataAtom != tagAtom {
354			continue
355		}
356		if len(n.Attr) != len(attr) {
357			continue
358		}
359	compareAttributes:
360		for _, t0 := range n.Attr {
361			for _, t1 := range attr {
362				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
363					// Found a match for this attribute, continue with the next attribute.
364					continue compareAttributes
365				}
366			}
367			// If we get here, there is no attribute that matches a.
368			// Therefore the element is not identical to the new one.
369			continue findIdenticalElements
370		}
371
372		identicalElements++
373		if identicalElements >= 3 {
374			p.afe.remove(n)
375		}
376	}
377
378	p.afe = append(p.afe, p.top())
379}
380
381// Section 12.2.4.3.
382func (p *parser) clearActiveFormattingElements() {
383	for {
384		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
385			return
386		}
387	}
388}
389
390// Section 12.2.4.3.
391func (p *parser) reconstructActiveFormattingElements() {
392	n := p.afe.top()
393	if n == nil {
394		return
395	}
396	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
397		return
398	}
399	i := len(p.afe) - 1
400	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
401		if i == 0 {
402			i = -1
403			break
404		}
405		i--
406		n = p.afe[i]
407	}
408	for {
409		i++
410		clone := p.afe[i].clone()
411		p.addChild(clone)
412		p.afe[i] = clone
413		if i == len(p.afe)-1 {
414			break
415		}
416	}
417}
418
419// Section 12.2.5.
420func (p *parser) acknowledgeSelfClosingTag() {
421	p.hasSelfClosingToken = false
422}
423
424// An insertion mode (section 12.2.4.1) is the state transition function from
425// a particular state in the HTML5 parser's state machine. It updates the
426// parser's fields depending on parser.tok (where ErrorToken means EOF).
427// It returns whether the token was consumed.
428type insertionMode func(*parser) bool
429
430// setOriginalIM sets the insertion mode to return to after completing a text or
431// inTableText insertion mode.
432// Section 12.2.4.1, "using the rules for".
433func (p *parser) setOriginalIM() {
434	if p.originalIM != nil {
435		panic("html: bad parser state: originalIM was set twice")
436	}
437	p.originalIM = p.im
438}
439
440// Section 12.2.4.1, "reset the insertion mode".
441func (p *parser) resetInsertionMode() {
442	for i := len(p.oe) - 1; i >= 0; i-- {
443		n := p.oe[i]
444		last := i == 0
445		if last && p.context != nil {
446			n = p.context
447		}
448
449		switch n.DataAtom {
450		case a.Select:
451			if !last {
452				for ancestor, first := n, p.oe[0]; ancestor != first; {
453					ancestor = p.oe[p.oe.index(ancestor)-1]
454					switch ancestor.DataAtom {
455					case a.Template:
456						p.im = inSelectIM
457						return
458					case a.Table:
459						p.im = inSelectInTableIM
460						return
461					}
462				}
463			}
464			p.im = inSelectIM
465		case a.Td, a.Th:
466			// TODO: remove this divergence from the HTML5 spec.
467			//
468			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
469			p.im = inCellIM
470		case a.Tr:
471			p.im = inRowIM
472		case a.Tbody, a.Thead, a.Tfoot:
473			p.im = inTableBodyIM
474		case a.Caption:
475			p.im = inCaptionIM
476		case a.Colgroup:
477			p.im = inColumnGroupIM
478		case a.Table:
479			p.im = inTableIM
480		case a.Template:
481			// TODO: remove this divergence from the HTML5 spec.
482			if n.Namespace != "" {
483				continue
484			}
485			p.im = p.templateStack.top()
486		case a.Head:
487			// TODO: remove this divergence from the HTML5 spec.
488			//
489			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
490			p.im = inHeadIM
491		case a.Body:
492			p.im = inBodyIM
493		case a.Frameset:
494			p.im = inFramesetIM
495		case a.Html:
496			if p.head == nil {
497				p.im = beforeHeadIM
498			} else {
499				p.im = afterHeadIM
500			}
501		default:
502			if last {
503				p.im = inBodyIM
504				return
505			}
506			continue
507		}
508		return
509	}
510}
511
512const whitespace = " \t\r\n\f"
513
514// Section 12.2.6.4.1.
515func initialIM(p *parser) bool {
516	switch p.tok.Type {
517	case TextToken:
518		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
519		if len(p.tok.Data) == 0 {
520			// It was all whitespace, so ignore it.
521			return true
522		}
523	case CommentToken:
524		p.doc.AppendChild(&Node{
525			Type: CommentNode,
526			Data: p.tok.Data,
527		})
528		return true
529	case DoctypeToken:
530		n, quirks := parseDoctype(p.tok.Data)
531		p.doc.AppendChild(n)
532		p.quirks = quirks
533		p.im = beforeHTMLIM
534		return true
535	}
536	p.quirks = true
537	p.im = beforeHTMLIM
538	return false
539}
540
541// Section 12.2.6.4.2.
542func beforeHTMLIM(p *parser) bool {
543	switch p.tok.Type {
544	case DoctypeToken:
545		// Ignore the token.
546		return true
547	case TextToken:
548		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
549		if len(p.tok.Data) == 0 {
550			// It was all whitespace, so ignore it.
551			return true
552		}
553	case StartTagToken:
554		if p.tok.DataAtom == a.Html {
555			p.addElement()
556			p.im = beforeHeadIM
557			return true
558		}
559	case EndTagToken:
560		switch p.tok.DataAtom {
561		case a.Head, a.Body, a.Html, a.Br:
562			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
563			return false
564		default:
565			// Ignore the token.
566			return true
567		}
568	case CommentToken:
569		p.doc.AppendChild(&Node{
570			Type: CommentNode,
571			Data: p.tok.Data,
572		})
573		return true
574	}
575	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
576	return false
577}
578
579// Section 12.2.6.4.3.
580func beforeHeadIM(p *parser) bool {
581	switch p.tok.Type {
582	case TextToken:
583		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
584		if len(p.tok.Data) == 0 {
585			// It was all whitespace, so ignore it.
586			return true
587		}
588	case StartTagToken:
589		switch p.tok.DataAtom {
590		case a.Head:
591			p.addElement()
592			p.head = p.top()
593			p.im = inHeadIM
594			return true
595		case a.Html:
596			return inBodyIM(p)
597		}
598	case EndTagToken:
599		switch p.tok.DataAtom {
600		case a.Head, a.Body, a.Html, a.Br:
601			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
602			return false
603		default:
604			// Ignore the token.
605			return true
606		}
607	case CommentToken:
608		p.addChild(&Node{
609			Type: CommentNode,
610			Data: p.tok.Data,
611		})
612		return true
613	case DoctypeToken:
614		// Ignore the token.
615		return true
616	}
617
618	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
619	return false
620}
621
622// Section 12.2.6.4.4.
623func inHeadIM(p *parser) bool {
624	switch p.tok.Type {
625	case TextToken:
626		s := strings.TrimLeft(p.tok.Data, whitespace)
627		if len(s) < len(p.tok.Data) {
628			// Add the initial whitespace to the current node.
629			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
630			if s == "" {
631				return true
632			}
633			p.tok.Data = s
634		}
635	case StartTagToken:
636		switch p.tok.DataAtom {
637		case a.Html:
638			return inBodyIM(p)
639		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
640			p.addElement()
641			p.oe.pop()
642			p.acknowledgeSelfClosingTag()
643			return true
644		case a.Noscript:
645			if p.scripting {
646				p.parseGenericRawTextElement()
647				return true
648			}
649			p.addElement()
650			p.im = inHeadNoscriptIM
651			// Don't let the tokenizer go into raw text mode when scripting is disabled.
652			p.tokenizer.NextIsNotRawText()
653			return true
654		case a.Script, a.Title:
655			p.addElement()
656			p.setOriginalIM()
657			p.im = textIM
658			return true
659		case a.Noframes, a.Style:
660			p.parseGenericRawTextElement()
661			return true
662		case a.Head:
663			// Ignore the token.
664			return true
665		case a.Template:
666			p.addElement()
667			p.afe = append(p.afe, &scopeMarker)
668			p.framesetOK = false
669			p.im = inTemplateIM
670			p.templateStack = append(p.templateStack, inTemplateIM)
671			return true
672		}
673	case EndTagToken:
674		switch p.tok.DataAtom {
675		case a.Head:
676			p.oe.pop()
677			p.im = afterHeadIM
678			return true
679		case a.Body, a.Html, a.Br:
680			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
681			return false
682		case a.Template:
683			if !p.oe.contains(a.Template) {
684				return true
685			}
686			// TODO: remove this divergence from the HTML5 spec.
687			//
688			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
689			p.generateImpliedEndTags()
690			for i := len(p.oe) - 1; i >= 0; i-- {
691				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
692					p.oe = p.oe[:i]
693					break
694				}
695			}
696			p.clearActiveFormattingElements()
697			p.templateStack.pop()
698			p.resetInsertionMode()
699			return true
700		default:
701			// Ignore the token.
702			return true
703		}
704	case CommentToken:
705		p.addChild(&Node{
706			Type: CommentNode,
707			Data: p.tok.Data,
708		})
709		return true
710	case DoctypeToken:
711		// Ignore the token.
712		return true
713	}
714
715	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
716	return false
717}
718
719// 12.2.6.4.5.
720func inHeadNoscriptIM(p *parser) bool {
721	switch p.tok.Type {
722	case DoctypeToken:
723		// Ignore the token.
724		return true
725	case StartTagToken:
726		switch p.tok.DataAtom {
727		case a.Html:
728			return inBodyIM(p)
729		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
730			return inHeadIM(p)
731		case a.Head:
732			// Ignore the token.
733			return true
734		case a.Noscript:
735			// Don't let the tokenizer go into raw text mode even when a <noscript>
736			// tag is in "in head noscript" insertion mode.
737			p.tokenizer.NextIsNotRawText()
738			// Ignore the token.
739			return true
740		}
741	case EndTagToken:
742		switch p.tok.DataAtom {
743		case a.Noscript, a.Br:
744		default:
745			// Ignore the token.
746			return true
747		}
748	case TextToken:
749		s := strings.TrimLeft(p.tok.Data, whitespace)
750		if len(s) == 0 {
751			// It was all whitespace.
752			return inHeadIM(p)
753		}
754	case CommentToken:
755		return inHeadIM(p)
756	}
757	p.oe.pop()
758	if p.top().DataAtom != a.Head {
759		panic("html: the new current node will be a head element.")
760	}
761	p.im = inHeadIM
762	if p.tok.DataAtom == a.Noscript {
763		return true
764	}
765	return false
766}
767
768// Section 12.2.6.4.6.
769func afterHeadIM(p *parser) bool {
770	switch p.tok.Type {
771	case TextToken:
772		s := strings.TrimLeft(p.tok.Data, whitespace)
773		if len(s) < len(p.tok.Data) {
774			// Add the initial whitespace to the current node.
775			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
776			if s == "" {
777				return true
778			}
779			p.tok.Data = s
780		}
781	case StartTagToken:
782		switch p.tok.DataAtom {
783		case a.Html:
784			return inBodyIM(p)
785		case a.Body:
786			p.addElement()
787			p.framesetOK = false
788			p.im = inBodyIM
789			return true
790		case a.Frameset:
791			p.addElement()
792			p.im = inFramesetIM
793			return true
794		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
795			p.oe = append(p.oe, p.head)
796			defer p.oe.remove(p.head)
797			return inHeadIM(p)
798		case a.Head:
799			// Ignore the token.
800			return true
801		}
802	case EndTagToken:
803		switch p.tok.DataAtom {
804		case a.Body, a.Html, a.Br:
805			// Drop down to creating an implied <body> tag.
806		case a.Template:
807			return inHeadIM(p)
808		default:
809			// Ignore the token.
810			return true
811		}
812	case CommentToken:
813		p.addChild(&Node{
814			Type: CommentNode,
815			Data: p.tok.Data,
816		})
817		return true
818	case DoctypeToken:
819		// Ignore the token.
820		return true
821	}
822
823	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
824	p.framesetOK = true
825	return false
826}
827
828// copyAttributes copies attributes of src not found on dst to dst.
829func copyAttributes(dst *Node, src Token) {
830	if len(src.Attr) == 0 {
831		return
832	}
833	attr := map[string]string{}
834	for _, t := range dst.Attr {
835		attr[t.Key] = t.Val
836	}
837	for _, t := range src.Attr {
838		if _, ok := attr[t.Key]; !ok {
839			dst.Attr = append(dst.Attr, t)
840			attr[t.Key] = t.Val
841		}
842	}
843}
844
845// Section 12.2.6.4.7.
846func inBodyIM(p *parser) bool {
847	switch p.tok.Type {
848	case TextToken:
849		d := p.tok.Data
850		switch n := p.oe.top(); n.DataAtom {
851		case a.Pre, a.Listing:
852			if n.FirstChild == nil {
853				// Ignore a newline at the start of a <pre> block.
854				if d != "" && d[0] == '\r' {
855					d = d[1:]
856				}
857				if d != "" && d[0] == '\n' {
858					d = d[1:]
859				}
860			}
861		}
862		d = strings.Replace(d, "\x00", "", -1)
863		if d == "" {
864			return true
865		}
866		p.reconstructActiveFormattingElements()
867		p.addText(d)
868		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
869			// There were non-whitespace characters inserted.
870			p.framesetOK = false
871		}
872	case StartTagToken:
873		switch p.tok.DataAtom {
874		case a.Html:
875			if p.oe.contains(a.Template) {
876				return true
877			}
878			copyAttributes(p.oe[0], p.tok)
879		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
880			return inHeadIM(p)
881		case a.Body:
882			if p.oe.contains(a.Template) {
883				return true
884			}
885			if len(p.oe) >= 2 {
886				body := p.oe[1]
887				if body.Type == ElementNode && body.DataAtom == a.Body {
888					p.framesetOK = false
889					copyAttributes(body, p.tok)
890				}
891			}
892		case a.Frameset:
893			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
894				// Ignore the token.
895				return true
896			}
897			body := p.oe[1]
898			if body.Parent != nil {
899				body.Parent.RemoveChild(body)
900			}
901			p.oe = p.oe[:1]
902			p.addElement()
903			p.im = inFramesetIM
904			return true
905		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
906			p.popUntil(buttonScope, a.P)
907			p.addElement()
908		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
909			p.popUntil(buttonScope, a.P)
910			switch n := p.top(); n.DataAtom {
911			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
912				p.oe.pop()
913			}
914			p.addElement()
915		case a.Pre, a.Listing:
916			p.popUntil(buttonScope, a.P)
917			p.addElement()
918			// The newline, if any, will be dealt with by the TextToken case.
919			p.framesetOK = false
920		case a.Form:
921			if p.form != nil && !p.oe.contains(a.Template) {
922				// Ignore the token
923				return true
924			}
925			p.popUntil(buttonScope, a.P)
926			p.addElement()
927			if !p.oe.contains(a.Template) {
928				p.form = p.top()
929			}
930		case a.Li:
931			p.framesetOK = false
932			for i := len(p.oe) - 1; i >= 0; i-- {
933				node := p.oe[i]
934				switch node.DataAtom {
935				case a.Li:
936					p.oe = p.oe[:i]
937				case a.Address, a.Div, a.P:
938					continue
939				default:
940					if !isSpecialElement(node) {
941						continue
942					}
943				}
944				break
945			}
946			p.popUntil(buttonScope, a.P)
947			p.addElement()
948		case a.Dd, a.Dt:
949			p.framesetOK = false
950			for i := len(p.oe) - 1; i >= 0; i-- {
951				node := p.oe[i]
952				switch node.DataAtom {
953				case a.Dd, a.Dt:
954					p.oe = p.oe[:i]
955				case a.Address, a.Div, a.P:
956					continue
957				default:
958					if !isSpecialElement(node) {
959						continue
960					}
961				}
962				break
963			}
964			p.popUntil(buttonScope, a.P)
965			p.addElement()
966		case a.Plaintext:
967			p.popUntil(buttonScope, a.P)
968			p.addElement()
969		case a.Button:
970			p.popUntil(defaultScope, a.Button)
971			p.reconstructActiveFormattingElements()
972			p.addElement()
973			p.framesetOK = false
974		case a.A:
975			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
976				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
977					p.inBodyEndTagFormatting(a.A, "a")
978					p.oe.remove(n)
979					p.afe.remove(n)
980					break
981				}
982			}
983			p.reconstructActiveFormattingElements()
984			p.addFormattingElement()
985		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
986			p.reconstructActiveFormattingElements()
987			p.addFormattingElement()
988		case a.Nobr:
989			p.reconstructActiveFormattingElements()
990			if p.elementInScope(defaultScope, a.Nobr) {
991				p.inBodyEndTagFormatting(a.Nobr, "nobr")
992				p.reconstructActiveFormattingElements()
993			}
994			p.addFormattingElement()
995		case a.Applet, a.Marquee, a.Object:
996			p.reconstructActiveFormattingElements()
997			p.addElement()
998			p.afe = append(p.afe, &scopeMarker)
999			p.framesetOK = false
1000		case a.Table:
1001			if !p.quirks {
1002				p.popUntil(buttonScope, a.P)
1003			}
1004			p.addElement()
1005			p.framesetOK = false
1006			p.im = inTableIM
1007			return true
1008		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1009			p.reconstructActiveFormattingElements()
1010			p.addElement()
1011			p.oe.pop()
1012			p.acknowledgeSelfClosingTag()
1013			if p.tok.DataAtom == a.Input {
1014				for _, t := range p.tok.Attr {
1015					if t.Key == "type" {
1016						if strings.ToLower(t.Val) == "hidden" {
1017							// Skip setting framesetOK = false
1018							return true
1019						}
1020					}
1021				}
1022			}
1023			p.framesetOK = false
1024		case a.Param, a.Source, a.Track:
1025			p.addElement()
1026			p.oe.pop()
1027			p.acknowledgeSelfClosingTag()
1028		case a.Hr:
1029			p.popUntil(buttonScope, a.P)
1030			p.addElement()
1031			p.oe.pop()
1032			p.acknowledgeSelfClosingTag()
1033			p.framesetOK = false
1034		case a.Image:
1035			p.tok.DataAtom = a.Img
1036			p.tok.Data = a.Img.String()
1037			return false
1038		case a.Textarea:
1039			p.addElement()
1040			p.setOriginalIM()
1041			p.framesetOK = false
1042			p.im = textIM
1043		case a.Xmp:
1044			p.popUntil(buttonScope, a.P)
1045			p.reconstructActiveFormattingElements()
1046			p.framesetOK = false
1047			p.parseGenericRawTextElement()
1048		case a.Iframe:
1049			p.framesetOK = false
1050			p.parseGenericRawTextElement()
1051		case a.Noembed:
1052			p.parseGenericRawTextElement()
1053		case a.Noscript:
1054			if p.scripting {
1055				p.parseGenericRawTextElement()
1056				return true
1057			}
1058			p.reconstructActiveFormattingElements()
1059			p.addElement()
1060			// Don't let the tokenizer go into raw text mode when scripting is disabled.
1061			p.tokenizer.NextIsNotRawText()
1062		case a.Select:
1063			p.reconstructActiveFormattingElements()
1064			p.addElement()
1065			p.framesetOK = false
1066			p.im = inSelectIM
1067			return true
1068		case a.Optgroup, a.Option:
1069			if p.top().DataAtom == a.Option {
1070				p.oe.pop()
1071			}
1072			p.reconstructActiveFormattingElements()
1073			p.addElement()
1074		case a.Rb, a.Rtc:
1075			if p.elementInScope(defaultScope, a.Ruby) {
1076				p.generateImpliedEndTags()
1077			}
1078			p.addElement()
1079		case a.Rp, a.Rt:
1080			if p.elementInScope(defaultScope, a.Ruby) {
1081				p.generateImpliedEndTags("rtc")
1082			}
1083			p.addElement()
1084		case a.Math, a.Svg:
1085			p.reconstructActiveFormattingElements()
1086			if p.tok.DataAtom == a.Math {
1087				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1088			} else {
1089				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1090			}
1091			adjustForeignAttributes(p.tok.Attr)
1092			p.addElement()
1093			p.top().Namespace = p.tok.Data
1094			if p.hasSelfClosingToken {
1095				p.oe.pop()
1096				p.acknowledgeSelfClosingTag()
1097			}
1098			return true
1099		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1100			// Ignore the token.
1101		default:
1102			p.reconstructActiveFormattingElements()
1103			p.addElement()
1104		}
1105	case EndTagToken:
1106		switch p.tok.DataAtom {
1107		case a.Body:
1108			if p.elementInScope(defaultScope, a.Body) {
1109				p.im = afterBodyIM
1110			}
1111		case a.Html:
1112			if p.elementInScope(defaultScope, a.Body) {
1113				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1114				return false
1115			}
1116			return true
1117		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1118			p.popUntil(defaultScope, p.tok.DataAtom)
1119		case a.Form:
1120			if p.oe.contains(a.Template) {
1121				i := p.indexOfElementInScope(defaultScope, a.Form)
1122				if i == -1 {
1123					// Ignore the token.
1124					return true
1125				}
1126				p.generateImpliedEndTags()
1127				if p.oe[i].DataAtom != a.Form {
1128					// Ignore the token.
1129					return true
1130				}
1131				p.popUntil(defaultScope, a.Form)
1132			} else {
1133				node := p.form
1134				p.form = nil
1135				i := p.indexOfElementInScope(defaultScope, a.Form)
1136				if node == nil || i == -1 || p.oe[i] != node {
1137					// Ignore the token.
1138					return true
1139				}
1140				p.generateImpliedEndTags()
1141				p.oe.remove(node)
1142			}
1143		case a.P:
1144			if !p.elementInScope(buttonScope, a.P) {
1145				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1146			}
1147			p.popUntil(buttonScope, a.P)
1148		case a.Li:
1149			p.popUntil(listItemScope, a.Li)
1150		case a.Dd, a.Dt:
1151			p.popUntil(defaultScope, p.tok.DataAtom)
1152		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1153			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1154		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1155			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1156		case a.Applet, a.Marquee, a.Object:
1157			if p.popUntil(defaultScope, p.tok.DataAtom) {
1158				p.clearActiveFormattingElements()
1159			}
1160		case a.Br:
1161			p.tok.Type = StartTagToken
1162			return false
1163		case a.Template:
1164			return inHeadIM(p)
1165		default:
1166			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1167		}
1168	case CommentToken:
1169		p.addChild(&Node{
1170			Type: CommentNode,
1171			Data: p.tok.Data,
1172		})
1173	case ErrorToken:
1174		// TODO: remove this divergence from the HTML5 spec.
1175		if len(p.templateStack) > 0 {
1176			p.im = inTemplateIM
1177			return false
1178		}
1179		for _, e := range p.oe {
1180			switch e.DataAtom {
1181			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1182				a.Thead, a.Tr, a.Body, a.Html:
1183			default:
1184				return true
1185			}
1186		}
1187	}
1188
1189	return true
1190}
1191
1192func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1193	// This is the "adoption agency" algorithm, described at
1194	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1195
1196	// TODO: this is a fairly literal line-by-line translation of that algorithm.
1197	// Once the code successfully parses the comprehensive test suite, we should
1198	// refactor this code to be more idiomatic.
1199
1200	// Steps 1-2
1201	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1202		p.oe.pop()
1203		return
1204	}
1205
1206	// Steps 3-5. The outer loop.
1207	for i := 0; i < 8; i++ {
1208		// Step 6. Find the formatting element.
1209		var formattingElement *Node
1210		for j := len(p.afe) - 1; j >= 0; j-- {
1211			if p.afe[j].Type == scopeMarkerNode {
1212				break
1213			}
1214			if p.afe[j].DataAtom == tagAtom {
1215				formattingElement = p.afe[j]
1216				break
1217			}
1218		}
1219		if formattingElement == nil {
1220			p.inBodyEndTagOther(tagAtom, tagName)
1221			return
1222		}
1223
1224		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
1225		feIndex := p.oe.index(formattingElement)
1226		if feIndex == -1 {
1227			p.afe.remove(formattingElement)
1228			return
1229		}
1230		// Step 8. Ignore the tag if formatting element is not in the scope.
1231		if !p.elementInScope(defaultScope, tagAtom) {
1232			// Ignore the tag.
1233			return
1234		}
1235
1236		// Step 9. This step is omitted because it's just a parse error but no need to return.
1237
1238		// Steps 10-11. Find the furthest block.
1239		var furthestBlock *Node
1240		for _, e := range p.oe[feIndex:] {
1241			if isSpecialElement(e) {
1242				furthestBlock = e
1243				break
1244			}
1245		}
1246		if furthestBlock == nil {
1247			e := p.oe.pop()
1248			for e != formattingElement {
1249				e = p.oe.pop()
1250			}
1251			p.afe.remove(e)
1252			return
1253		}
1254
1255		// Steps 12-13. Find the common ancestor and bookmark node.
1256		commonAncestor := p.oe[feIndex-1]
1257		bookmark := p.afe.index(formattingElement)
1258
1259		// Step 14. The inner loop. Find the lastNode to reparent.
1260		lastNode := furthestBlock
1261		node := furthestBlock
1262		x := p.oe.index(node)
1263		// Step 14.1.
1264		j := 0
1265		for {
1266			// Step 14.2.
1267			j++
1268			// Step. 14.3.
1269			x--
1270			node = p.oe[x]
1271			// Step 14.4. Go to the next step if node is formatting element.
1272			if node == formattingElement {
1273				break
1274			}
1275			// Step 14.5. Remove node from the list of active formatting elements if
1276			// inner loop counter is greater than three and node is in the list of
1277			// active formatting elements.
1278			if ni := p.afe.index(node); j > 3 && ni > -1 {
1279				p.afe.remove(node)
1280				// If any element of the list of active formatting elements is removed,
1281				// we need to take care whether bookmark should be decremented or not.
1282				// This is because the value of bookmark may exceed the size of the
1283				// list by removing elements from the list.
1284				if ni <= bookmark {
1285					bookmark--
1286				}
1287				continue
1288			}
1289			// Step 14.6. Continue the next inner loop if node is not in the list of
1290			// active formatting elements.
1291			if p.afe.index(node) == -1 {
1292				p.oe.remove(node)
1293				continue
1294			}
1295			// Step 14.7.
1296			clone := node.clone()
1297			p.afe[p.afe.index(node)] = clone
1298			p.oe[p.oe.index(node)] = clone
1299			node = clone
1300			// Step 14.8.
1301			if lastNode == furthestBlock {
1302				bookmark = p.afe.index(node) + 1
1303			}
1304			// Step 14.9.
1305			if lastNode.Parent != nil {
1306				lastNode.Parent.RemoveChild(lastNode)
1307			}
1308			node.AppendChild(lastNode)
1309			// Step 14.10.
1310			lastNode = node
1311		}
1312
1313		// Step 15. Reparent lastNode to the common ancestor,
1314		// or for misnested table nodes, to the foster parent.
1315		if lastNode.Parent != nil {
1316			lastNode.Parent.RemoveChild(lastNode)
1317		}
1318		switch commonAncestor.DataAtom {
1319		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1320			p.fosterParent(lastNode)
1321		default:
1322			commonAncestor.AppendChild(lastNode)
1323		}
1324
1325		// Steps 16-18. Reparent nodes from the furthest block's children
1326		// to a clone of the formatting element.
1327		clone := formattingElement.clone()
1328		reparentChildren(clone, furthestBlock)
1329		furthestBlock.AppendChild(clone)
1330
1331		// Step 19. Fix up the list of active formatting elements.
1332		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1333			// Move the bookmark with the rest of the list.
1334			bookmark--
1335		}
1336		p.afe.remove(formattingElement)
1337		p.afe.insert(bookmark, clone)
1338
1339		// Step 20. Fix up the stack of open elements.
1340		p.oe.remove(formattingElement)
1341		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1342	}
1343}
1344
1345// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1346// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1347// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1348func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1349	for i := len(p.oe) - 1; i >= 0; i-- {
1350		// Two element nodes have the same tag if they have the same Data (a
1351		// string-typed field). As an optimization, for common HTML tags, each
1352		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
1353		// field), since integer comparison is faster than string comparison.
1354		// Uncommon (custom) tags get a zero DataAtom.
1355		//
1356		// The if condition here is equivalent to (p.oe[i].Data == tagName).
1357		if (p.oe[i].DataAtom == tagAtom) &&
1358			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
1359			p.oe = p.oe[:i]
1360			break
1361		}
1362		if isSpecialElement(p.oe[i]) {
1363			break
1364		}
1365	}
1366}
1367
1368// Section 12.2.6.4.8.
1369func textIM(p *parser) bool {
1370	switch p.tok.Type {
1371	case ErrorToken:
1372		p.oe.pop()
1373	case TextToken:
1374		d := p.tok.Data
1375		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1376			// Ignore a newline at the start of a <textarea> block.
1377			if d != "" && d[0] == '\r' {
1378				d = d[1:]
1379			}
1380			if d != "" && d[0] == '\n' {
1381				d = d[1:]
1382			}
1383		}
1384		if d == "" {
1385			return true
1386		}
1387		p.addText(d)
1388		return true
1389	case EndTagToken:
1390		p.oe.pop()
1391	}
1392	p.im = p.originalIM
1393	p.originalIM = nil
1394	return p.tok.Type == EndTagToken
1395}
1396
1397// Section 12.2.6.4.9.
1398func inTableIM(p *parser) bool {
1399	switch p.tok.Type {
1400	case TextToken:
1401		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1402		switch p.oe.top().DataAtom {
1403		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1404			if strings.Trim(p.tok.Data, whitespace) == "" {
1405				p.addText(p.tok.Data)
1406				return true
1407			}
1408		}
1409	case StartTagToken:
1410		switch p.tok.DataAtom {
1411		case a.Caption:
1412			p.clearStackToContext(tableScope)
1413			p.afe = append(p.afe, &scopeMarker)
1414			p.addElement()
1415			p.im = inCaptionIM
1416			return true
1417		case a.Colgroup:
1418			p.clearStackToContext(tableScope)
1419			p.addElement()
1420			p.im = inColumnGroupIM
1421			return true
1422		case a.Col:
1423			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1424			return false
1425		case a.Tbody, a.Tfoot, a.Thead:
1426			p.clearStackToContext(tableScope)
1427			p.addElement()
1428			p.im = inTableBodyIM
1429			return true
1430		case a.Td, a.Th, a.Tr:
1431			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1432			return false
1433		case a.Table:
1434			if p.popUntil(tableScope, a.Table) {
1435				p.resetInsertionMode()
1436				return false
1437			}
1438			// Ignore the token.
1439			return true
1440		case a.Style, a.Script, a.Template:
1441			return inHeadIM(p)
1442		case a.Input:
1443			for _, t := range p.tok.Attr {
1444				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1445					p.addElement()
1446					p.oe.pop()
1447					return true
1448				}
1449			}
1450			// Otherwise drop down to the default action.
1451		case a.Form:
1452			if p.oe.contains(a.Template) || p.form != nil {
1453				// Ignore the token.
1454				return true
1455			}
1456			p.addElement()
1457			p.form = p.oe.pop()
1458		case a.Select:
1459			p.reconstructActiveFormattingElements()
1460			switch p.top().DataAtom {
1461			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1462				p.fosterParenting = true
1463			}
1464			p.addElement()
1465			p.fosterParenting = false
1466			p.framesetOK = false
1467			p.im = inSelectInTableIM
1468			return true
1469		}
1470	case EndTagToken:
1471		switch p.tok.DataAtom {
1472		case a.Table:
1473			if p.popUntil(tableScope, a.Table) {
1474				p.resetInsertionMode()
1475				return true
1476			}
1477			// Ignore the token.
1478			return true
1479		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1480			// Ignore the token.
1481			return true
1482		case a.Template:
1483			return inHeadIM(p)
1484		}
1485	case CommentToken:
1486		p.addChild(&Node{
1487			Type: CommentNode,
1488			Data: p.tok.Data,
1489		})
1490		return true
1491	case DoctypeToken:
1492		// Ignore the token.
1493		return true
1494	case ErrorToken:
1495		return inBodyIM(p)
1496	}
1497
1498	p.fosterParenting = true
1499	defer func() { p.fosterParenting = false }()
1500
1501	return inBodyIM(p)
1502}
1503
1504// Section 12.2.6.4.11.
1505func inCaptionIM(p *parser) bool {
1506	switch p.tok.Type {
1507	case StartTagToken:
1508		switch p.tok.DataAtom {
1509		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1510			if !p.popUntil(tableScope, a.Caption) {
1511				// Ignore the token.
1512				return true
1513			}
1514			p.clearActiveFormattingElements()
1515			p.im = inTableIM
1516			return false
1517		case a.Select:
1518			p.reconstructActiveFormattingElements()
1519			p.addElement()
1520			p.framesetOK = false
1521			p.im = inSelectInTableIM
1522			return true
1523		}
1524	case EndTagToken:
1525		switch p.tok.DataAtom {
1526		case a.Caption:
1527			if p.popUntil(tableScope, a.Caption) {
1528				p.clearActiveFormattingElements()
1529				p.im = inTableIM
1530			}
1531			return true
1532		case a.Table:
1533			if !p.popUntil(tableScope, a.Caption) {
1534				// Ignore the token.
1535				return true
1536			}
1537			p.clearActiveFormattingElements()
1538			p.im = inTableIM
1539			return false
1540		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1541			// Ignore the token.
1542			return true
1543		}
1544	}
1545	return inBodyIM(p)
1546}
1547
1548// Section 12.2.6.4.12.
1549func inColumnGroupIM(p *parser) bool {
1550	switch p.tok.Type {
1551	case TextToken:
1552		s := strings.TrimLeft(p.tok.Data, whitespace)
1553		if len(s) < len(p.tok.Data) {
1554			// Add the initial whitespace to the current node.
1555			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1556			if s == "" {
1557				return true
1558			}
1559			p.tok.Data = s
1560		}
1561	case CommentToken:
1562		p.addChild(&Node{
1563			Type: CommentNode,
1564			Data: p.tok.Data,
1565		})
1566		return true
1567	case DoctypeToken:
1568		// Ignore the token.
1569		return true
1570	case StartTagToken:
1571		switch p.tok.DataAtom {
1572		case a.Html:
1573			return inBodyIM(p)
1574		case a.Col:
1575			p.addElement()
1576			p.oe.pop()
1577			p.acknowledgeSelfClosingTag()
1578			return true
1579		case a.Template:
1580			return inHeadIM(p)
1581		}
1582	case EndTagToken:
1583		switch p.tok.DataAtom {
1584		case a.Colgroup:
1585			if p.oe.top().DataAtom == a.Colgroup {
1586				p.oe.pop()
1587				p.im = inTableIM
1588			}
1589			return true
1590		case a.Col:
1591			// Ignore the token.
1592			return true
1593		case a.Template:
1594			return inHeadIM(p)
1595		}
1596	case ErrorToken:
1597		return inBodyIM(p)
1598	}
1599	if p.oe.top().DataAtom != a.Colgroup {
1600		return true
1601	}
1602	p.oe.pop()
1603	p.im = inTableIM
1604	return false
1605}
1606
1607// Section 12.2.6.4.13.
1608func inTableBodyIM(p *parser) bool {
1609	switch p.tok.Type {
1610	case StartTagToken:
1611		switch p.tok.DataAtom {
1612		case a.Tr:
1613			p.clearStackToContext(tableBodyScope)
1614			p.addElement()
1615			p.im = inRowIM
1616			return true
1617		case a.Td, a.Th:
1618			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1619			return false
1620		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1621			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1622				p.im = inTableIM
1623				return false
1624			}
1625			// Ignore the token.
1626			return true
1627		}
1628	case EndTagToken:
1629		switch p.tok.DataAtom {
1630		case a.Tbody, a.Tfoot, a.Thead:
1631			if p.elementInScope(tableScope, p.tok.DataAtom) {
1632				p.clearStackToContext(tableBodyScope)
1633				p.oe.pop()
1634				p.im = inTableIM
1635			}
1636			return true
1637		case a.Table:
1638			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1639				p.im = inTableIM
1640				return false
1641			}
1642			// Ignore the token.
1643			return true
1644		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1645			// Ignore the token.
1646			return true
1647		}
1648	case CommentToken:
1649		p.addChild(&Node{
1650			Type: CommentNode,
1651			Data: p.tok.Data,
1652		})
1653		return true
1654	}
1655
1656	return inTableIM(p)
1657}
1658
1659// Section 12.2.6.4.14.
1660func inRowIM(p *parser) bool {
1661	switch p.tok.Type {
1662	case StartTagToken:
1663		switch p.tok.DataAtom {
1664		case a.Td, a.Th:
1665			p.clearStackToContext(tableRowScope)
1666			p.addElement()
1667			p.afe = append(p.afe, &scopeMarker)
1668			p.im = inCellIM
1669			return true
1670		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1671			if p.popUntil(tableScope, a.Tr) {
1672				p.im = inTableBodyIM
1673				return false
1674			}
1675			// Ignore the token.
1676			return true
1677		}
1678	case EndTagToken:
1679		switch p.tok.DataAtom {
1680		case a.Tr:
1681			if p.popUntil(tableScope, a.Tr) {
1682				p.im = inTableBodyIM
1683				return true
1684			}
1685			// Ignore the token.
1686			return true
1687		case a.Table:
1688			if p.popUntil(tableScope, a.Tr) {
1689				p.im = inTableBodyIM
1690				return false
1691			}
1692			// Ignore the token.
1693			return true
1694		case a.Tbody, a.Tfoot, a.Thead:
1695			if p.elementInScope(tableScope, p.tok.DataAtom) {
1696				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1697				return false
1698			}
1699			// Ignore the token.
1700			return true
1701		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1702			// Ignore the token.
1703			return true
1704		}
1705	}
1706
1707	return inTableIM(p)
1708}
1709
1710// Section 12.2.6.4.15.
1711func inCellIM(p *parser) bool {
1712	switch p.tok.Type {
1713	case StartTagToken:
1714		switch p.tok.DataAtom {
1715		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1716			if p.popUntil(tableScope, a.Td, a.Th) {
1717				// Close the cell and reprocess.
1718				p.clearActiveFormattingElements()
1719				p.im = inRowIM
1720				return false
1721			}
1722			// Ignore the token.
1723			return true
1724		case a.Select:
1725			p.reconstructActiveFormattingElements()
1726			p.addElement()
1727			p.framesetOK = false
1728			p.im = inSelectInTableIM
1729			return true
1730		}
1731	case EndTagToken:
1732		switch p.tok.DataAtom {
1733		case a.Td, a.Th:
1734			if !p.popUntil(tableScope, p.tok.DataAtom) {
1735				// Ignore the token.
1736				return true
1737			}
1738			p.clearActiveFormattingElements()
1739			p.im = inRowIM
1740			return true
1741		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1742			// Ignore the token.
1743			return true
1744		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1745			if !p.elementInScope(tableScope, p.tok.DataAtom) {
1746				// Ignore the token.
1747				return true
1748			}
1749			// Close the cell and reprocess.
1750			if p.popUntil(tableScope, a.Td, a.Th) {
1751				p.clearActiveFormattingElements()
1752			}
1753			p.im = inRowIM
1754			return false
1755		}
1756	}
1757	return inBodyIM(p)
1758}
1759
1760// Section 12.2.6.4.16.
1761func inSelectIM(p *parser) bool {
1762	switch p.tok.Type {
1763	case TextToken:
1764		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1765	case StartTagToken:
1766		switch p.tok.DataAtom {
1767		case a.Html:
1768			return inBodyIM(p)
1769		case a.Option:
1770			if p.top().DataAtom == a.Option {
1771				p.oe.pop()
1772			}
1773			p.addElement()
1774		case a.Optgroup:
1775			if p.top().DataAtom == a.Option {
1776				p.oe.pop()
1777			}
1778			if p.top().DataAtom == a.Optgroup {
1779				p.oe.pop()
1780			}
1781			p.addElement()
1782		case a.Select:
1783			if !p.popUntil(selectScope, a.Select) {
1784				// Ignore the token.
1785				return true
1786			}
1787			p.resetInsertionMode()
1788		case a.Input, a.Keygen, a.Textarea:
1789			if p.elementInScope(selectScope, a.Select) {
1790				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1791				return false
1792			}
1793			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
1794			p.tokenizer.NextIsNotRawText()
1795			// Ignore the token.
1796			return true
1797		case a.Script, a.Template:
1798			return inHeadIM(p)
1799		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
1800			// Don't let the tokenizer go into raw text mode when there are raw tags
1801			// to be ignored. These tags should be ignored from the tokenizer
1802			// properly.
1803			p.tokenizer.NextIsNotRawText()
1804			// Ignore the token.
1805			return true
1806		}
1807	case EndTagToken:
1808		switch p.tok.DataAtom {
1809		case a.Option:
1810			if p.top().DataAtom == a.Option {
1811				p.oe.pop()
1812			}
1813		case a.Optgroup:
1814			i := len(p.oe) - 1
1815			if p.oe[i].DataAtom == a.Option {
1816				i--
1817			}
1818			if p.oe[i].DataAtom == a.Optgroup {
1819				p.oe = p.oe[:i]
1820			}
1821		case a.Select:
1822			if !p.popUntil(selectScope, a.Select) {
1823				// Ignore the token.
1824				return true
1825			}
1826			p.resetInsertionMode()
1827		case a.Template:
1828			return inHeadIM(p)
1829		}
1830	case CommentToken:
1831		p.addChild(&Node{
1832			Type: CommentNode,
1833			Data: p.tok.Data,
1834		})
1835	case DoctypeToken:
1836		// Ignore the token.
1837		return true
1838	case ErrorToken:
1839		return inBodyIM(p)
1840	}
1841
1842	return true
1843}
1844
1845// Section 12.2.6.4.17.
1846func inSelectInTableIM(p *parser) bool {
1847	switch p.tok.Type {
1848	case StartTagToken, EndTagToken:
1849		switch p.tok.DataAtom {
1850		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1851			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1852				// Ignore the token.
1853				return true
1854			}
1855			// This is like p.popUntil(selectScope, a.Select), but it also
1856			// matches <math select>, not just <select>. Matching the MathML
1857			// tag is arguably incorrect (conceptually), but it mimics what
1858			// Chromium does.
1859			for i := len(p.oe) - 1; i >= 0; i-- {
1860				if n := p.oe[i]; n.DataAtom == a.Select {
1861					p.oe = p.oe[:i]
1862					break
1863				}
1864			}
1865			p.resetInsertionMode()
1866			return false
1867		}
1868	}
1869	return inSelectIM(p)
1870}
1871
1872// Section 12.2.6.4.18.
1873func inTemplateIM(p *parser) bool {
1874	switch p.tok.Type {
1875	case TextToken, CommentToken, DoctypeToken:
1876		return inBodyIM(p)
1877	case StartTagToken:
1878		switch p.tok.DataAtom {
1879		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1880			return inHeadIM(p)
1881		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1882			p.templateStack.pop()
1883			p.templateStack = append(p.templateStack, inTableIM)
1884			p.im = inTableIM
1885			return false
1886		case a.Col:
1887			p.templateStack.pop()
1888			p.templateStack = append(p.templateStack, inColumnGroupIM)
1889			p.im = inColumnGroupIM
1890			return false
1891		case a.Tr:
1892			p.templateStack.pop()
1893			p.templateStack = append(p.templateStack, inTableBodyIM)
1894			p.im = inTableBodyIM
1895			return false
1896		case a.Td, a.Th:
1897			p.templateStack.pop()
1898			p.templateStack = append(p.templateStack, inRowIM)
1899			p.im = inRowIM
1900			return false
1901		default:
1902			p.templateStack.pop()
1903			p.templateStack = append(p.templateStack, inBodyIM)
1904			p.im = inBodyIM
1905			return false
1906		}
1907	case EndTagToken:
1908		switch p.tok.DataAtom {
1909		case a.Template:
1910			return inHeadIM(p)
1911		default:
1912			// Ignore the token.
1913			return true
1914		}
1915	case ErrorToken:
1916		if !p.oe.contains(a.Template) {
1917			// Ignore the token.
1918			return true
1919		}
1920		// TODO: remove this divergence from the HTML5 spec.
1921		//
1922		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1923		p.generateImpliedEndTags()
1924		for i := len(p.oe) - 1; i >= 0; i-- {
1925			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1926				p.oe = p.oe[:i]
1927				break
1928			}
1929		}
1930		p.clearActiveFormattingElements()
1931		p.templateStack.pop()
1932		p.resetInsertionMode()
1933		return false
1934	}
1935	return false
1936}
1937
1938// Section 12.2.6.4.19.
1939func afterBodyIM(p *parser) bool {
1940	switch p.tok.Type {
1941	case ErrorToken:
1942		// Stop parsing.
1943		return true
1944	case TextToken:
1945		s := strings.TrimLeft(p.tok.Data, whitespace)
1946		if len(s) == 0 {
1947			// It was all whitespace.
1948			return inBodyIM(p)
1949		}
1950	case StartTagToken:
1951		if p.tok.DataAtom == a.Html {
1952			return inBodyIM(p)
1953		}
1954	case EndTagToken:
1955		if p.tok.DataAtom == a.Html {
1956			if !p.fragment {
1957				p.im = afterAfterBodyIM
1958			}
1959			return true
1960		}
1961	case CommentToken:
1962		// The comment is attached to the <html> element.
1963		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1964			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1965		}
1966		p.oe[0].AppendChild(&Node{
1967			Type: CommentNode,
1968			Data: p.tok.Data,
1969		})
1970		return true
1971	}
1972	p.im = inBodyIM
1973	return false
1974}
1975
1976// Section 12.2.6.4.20.
1977func inFramesetIM(p *parser) bool {
1978	switch p.tok.Type {
1979	case CommentToken:
1980		p.addChild(&Node{
1981			Type: CommentNode,
1982			Data: p.tok.Data,
1983		})
1984	case TextToken:
1985		// Ignore all text but whitespace.
1986		s := strings.Map(func(c rune) rune {
1987			switch c {
1988			case ' ', '\t', '\n', '\f', '\r':
1989				return c
1990			}
1991			return -1
1992		}, p.tok.Data)
1993		if s != "" {
1994			p.addText(s)
1995		}
1996	case StartTagToken:
1997		switch p.tok.DataAtom {
1998		case a.Html:
1999			return inBodyIM(p)
2000		case a.Frameset:
2001			p.addElement()
2002		case a.Frame:
2003			p.addElement()
2004			p.oe.pop()
2005			p.acknowledgeSelfClosingTag()
2006		case a.Noframes:
2007			return inHeadIM(p)
2008		}
2009	case EndTagToken:
2010		switch p.tok.DataAtom {
2011		case a.Frameset:
2012			if p.oe.top().DataAtom != a.Html {
2013				p.oe.pop()
2014				if p.oe.top().DataAtom != a.Frameset {
2015					p.im = afterFramesetIM
2016					return true
2017				}
2018			}
2019		}
2020	default:
2021		// Ignore the token.
2022	}
2023	return true
2024}
2025
2026// Section 12.2.6.4.21.
2027func afterFramesetIM(p *parser) bool {
2028	switch p.tok.Type {
2029	case CommentToken:
2030		p.addChild(&Node{
2031			Type: CommentNode,
2032			Data: p.tok.Data,
2033		})
2034	case TextToken:
2035		// Ignore all text but whitespace.
2036		s := strings.Map(func(c rune) rune {
2037			switch c {
2038			case ' ', '\t', '\n', '\f', '\r':
2039				return c
2040			}
2041			return -1
2042		}, p.tok.Data)
2043		if s != "" {
2044			p.addText(s)
2045		}
2046	case StartTagToken:
2047		switch p.tok.DataAtom {
2048		case a.Html:
2049			return inBodyIM(p)
2050		case a.Noframes:
2051			return inHeadIM(p)
2052		}
2053	case EndTagToken:
2054		switch p.tok.DataAtom {
2055		case a.Html:
2056			p.im = afterAfterFramesetIM
2057			return true
2058		}
2059	default:
2060		// Ignore the token.
2061	}
2062	return true
2063}
2064
2065// Section 12.2.6.4.22.
2066func afterAfterBodyIM(p *parser) bool {
2067	switch p.tok.Type {
2068	case ErrorToken:
2069		// Stop parsing.
2070		return true
2071	case TextToken:
2072		s := strings.TrimLeft(p.tok.Data, whitespace)
2073		if len(s) == 0 {
2074			// It was all whitespace.
2075			return inBodyIM(p)
2076		}
2077	case StartTagToken:
2078		if p.tok.DataAtom == a.Html {
2079			return inBodyIM(p)
2080		}
2081	case CommentToken:
2082		p.doc.AppendChild(&Node{
2083			Type: CommentNode,
2084			Data: p.tok.Data,
2085		})
2086		return true
2087	case DoctypeToken:
2088		return inBodyIM(p)
2089	}
2090	p.im = inBodyIM
2091	return false
2092}
2093
2094// Section 12.2.6.4.23.
2095func afterAfterFramesetIM(p *parser) bool {
2096	switch p.tok.Type {
2097	case CommentToken:
2098		p.doc.AppendChild(&Node{
2099			Type: CommentNode,
2100			Data: p.tok.Data,
2101		})
2102	case TextToken:
2103		// Ignore all text but whitespace.
2104		s := strings.Map(func(c rune) rune {
2105			switch c {
2106			case ' ', '\t', '\n', '\f', '\r':
2107				return c
2108			}
2109			return -1
2110		}, p.tok.Data)
2111		if s != "" {
2112			p.tok.Data = s
2113			return inBodyIM(p)
2114		}
2115	case StartTagToken:
2116		switch p.tok.DataAtom {
2117		case a.Html:
2118			return inBodyIM(p)
2119		case a.Noframes:
2120			return inHeadIM(p)
2121		}
2122	case DoctypeToken:
2123		return inBodyIM(p)
2124	default:
2125		// Ignore the token.
2126	}
2127	return true
2128}
2129
2130const whitespaceOrNUL = whitespace + "\x00"
2131
2132// Section 12.2.6.5
2133func parseForeignContent(p *parser) bool {
2134	switch p.tok.Type {
2135	case TextToken:
2136		if p.framesetOK {
2137			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2138		}
2139		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2140		p.addText(p.tok.Data)
2141	case CommentToken:
2142		p.addChild(&Node{
2143			Type: CommentNode,
2144			Data: p.tok.Data,
2145		})
2146	case StartTagToken:
2147		if !p.fragment {
2148			b := breakout[p.tok.Data]
2149			if p.tok.DataAtom == a.Font {
2150			loop:
2151				for _, attr := range p.tok.Attr {
2152					switch attr.Key {
2153					case "color", "face", "size":
2154						b = true
2155						break loop
2156					}
2157				}
2158			}
2159			if b {
2160				for i := len(p.oe) - 1; i >= 0; i-- {
2161					n := p.oe[i]
2162					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2163						p.oe = p.oe[:i+1]
2164						break
2165					}
2166				}
2167				return false
2168			}
2169		}
2170		current := p.adjustedCurrentNode()
2171		switch current.Namespace {
2172		case "math":
2173			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2174		case "svg":
2175			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
2176			// SVG wants e.g. "foreignObject" with a capital second "O".
2177			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2178				p.tok.DataAtom = a.Lookup([]byte(x))
2179				p.tok.Data = x
2180			}
2181			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2182		default:
2183			panic("html: bad parser state: unexpected namespace")
2184		}
2185		adjustForeignAttributes(p.tok.Attr)
2186		namespace := current.Namespace
2187		p.addElement()
2188		p.top().Namespace = namespace
2189		if namespace != "" {
2190			// Don't let the tokenizer go into raw text mode in foreign content
2191			// (e.g. in an SVG <title> tag).
2192			p.tokenizer.NextIsNotRawText()
2193		}
2194		if p.hasSelfClosingToken {
2195			p.oe.pop()
2196			p.acknowledgeSelfClosingTag()
2197		}
2198	case EndTagToken:
2199		for i := len(p.oe) - 1; i >= 0; i-- {
2200			if p.oe[i].Namespace == "" {
2201				return p.im(p)
2202			}
2203			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2204				p.oe = p.oe[:i]
2205				break
2206			}
2207		}
2208		return true
2209	default:
2210		// Ignore the token.
2211	}
2212	return true
2213}
2214
2215// Section 12.2.4.2.
2216func (p *parser) adjustedCurrentNode() *Node {
2217	if len(p.oe) == 1 && p.fragment && p.context != nil {
2218		return p.context
2219	}
2220	return p.oe.top()
2221}
2222
2223// Section 12.2.6.
2224func (p *parser) inForeignContent() bool {
2225	if len(p.oe) == 0 {
2226		return false
2227	}
2228	n := p.adjustedCurrentNode()
2229	if n.Namespace == "" {
2230		return false
2231	}
2232	if mathMLTextIntegrationPoint(n) {
2233		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2234			return false
2235		}
2236		if p.tok.Type == TextToken {
2237			return false
2238		}
2239	}
2240	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2241		return false
2242	}
2243	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2244		return false
2245	}
2246	if p.tok.Type == ErrorToken {
2247		return false
2248	}
2249	return true
2250}
2251
2252// parseImpliedToken parses a token as though it had appeared in the parser's
2253// input.
2254func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2255	realToken, selfClosing := p.tok, p.hasSelfClosingToken
2256	p.tok = Token{
2257		Type:     t,
2258		DataAtom: dataAtom,
2259		Data:     data,
2260	}
2261	p.hasSelfClosingToken = false
2262	p.parseCurrentToken()
2263	p.tok, p.hasSelfClosingToken = realToken, selfClosing
2264}
2265
2266// parseCurrentToken runs the current token through the parsing routines
2267// until it is consumed.
2268func (p *parser) parseCurrentToken() {
2269	if p.tok.Type == SelfClosingTagToken {
2270		p.hasSelfClosingToken = true
2271		p.tok.Type = StartTagToken
2272	}
2273
2274	consumed := false
2275	for !consumed {
2276		if p.inForeignContent() {
2277			consumed = parseForeignContent(p)
2278		} else {
2279			consumed = p.im(p)
2280		}
2281	}
2282
2283	if p.hasSelfClosingToken {
2284		// This is a parse error, but ignore it.
2285		p.hasSelfClosingToken = false
2286	}
2287}
2288
2289func (p *parser) parse() error {
2290	// Iterate until EOF. Any other error will cause an early return.
2291	var err error
2292	for err != io.EOF {
2293		// CDATA sections are allowed only in foreign content.
2294		n := p.oe.top()
2295		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2296		// Read and parse the next token.
2297		p.tokenizer.Next()
2298		p.tok = p.tokenizer.Token()
2299		if p.tok.Type == ErrorToken {
2300			err = p.tokenizer.Err()
2301			if err != nil && err != io.EOF {
2302				return err
2303			}
2304		}
2305		p.parseCurrentToken()
2306	}
2307	return nil
2308}
2309
2310// Parse returns the parse tree for the HTML from the given Reader.
2311//
2312// It implements the HTML5 parsing algorithm
2313// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2314// which is very complicated. The resultant tree can contain implicitly created
2315// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2316// differ from the nesting implied by a naive processing of start and end
2317// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2318// with no corresponding node in the resulting tree.
2319//
2320// The input is assumed to be UTF-8 encoded.
2321func Parse(r io.Reader) (*Node, error) {
2322	return ParseWithOptions(r)
2323}
2324
2325// ParseFragment parses a fragment of HTML and returns the nodes that were
2326// found. If the fragment is the InnerHTML for an existing element, pass that
2327// element in context.
2328//
2329// It has the same intricacies as Parse.
2330func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2331	return ParseFragmentWithOptions(r, context)
2332}
2333
2334// ParseOption configures a parser.
2335type ParseOption func(p *parser)
2336
2337// ParseOptionEnableScripting configures the scripting flag.
2338// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2339//
2340// By default, scripting is enabled.
2341func ParseOptionEnableScripting(enable bool) ParseOption {
2342	return func(p *parser) {
2343		p.scripting = enable
2344	}
2345}
2346
2347// ParseWithOptions is like Parse, with options.
2348func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2349	p := &parser{
2350		tokenizer: NewTokenizer(r),
2351		doc: &Node{
2352			Type: DocumentNode,
2353		},
2354		scripting:  true,
2355		framesetOK: true,
2356		im:         initialIM,
2357	}
2358
2359	for _, f := range opts {
2360		f(p)
2361	}
2362
2363	if err := p.parse(); err != nil {
2364		return nil, err
2365	}
2366	return p.doc, nil
2367}
2368
2369// ParseFragmentWithOptions is like ParseFragment, with options.
2370func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
2371	contextTag := ""
2372	if context != nil {
2373		if context.Type != ElementNode {
2374			return nil, errors.New("html: ParseFragment of non-element Node")
2375		}
2376		// The next check isn't just context.DataAtom.String() == context.Data because
2377		// it is valid to pass an element whose tag isn't a known atom. For example,
2378		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2379		if context.DataAtom != a.Lookup([]byte(context.Data)) {
2380			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2381		}
2382		contextTag = context.DataAtom.String()
2383	}
2384	p := &parser{
2385		doc: &Node{
2386			Type: DocumentNode,
2387		},
2388		scripting: true,
2389		fragment:  true,
2390		context:   context,
2391	}
2392	if context != nil && context.Namespace != "" {
2393		p.tokenizer = NewTokenizer(r)
2394	} else {
2395		p.tokenizer = NewTokenizerFragment(r, contextTag)
2396	}
2397
2398	for _, f := range opts {
2399		f(p)
2400	}
2401
2402	root := &Node{
2403		Type:     ElementNode,
2404		DataAtom: a.Html,
2405		Data:     a.Html.String(),
2406	}
2407	p.doc.AppendChild(root)
2408	p.oe = nodeStack{root}
2409	if context != nil && context.DataAtom == a.Template {
2410		p.templateStack = append(p.templateStack, inTemplateIM)
2411	}
2412	p.resetInsertionMode()
2413
2414	for n := context; n != nil; n = n.Parent {
2415		if n.Type == ElementNode && n.DataAtom == a.Form {
2416			p.form = n
2417			break
2418		}
2419	}
2420
2421	if err := p.parse(); err != nil {
2422		return nil, err
2423	}
2424
2425	parent := p.doc
2426	if context != nil {
2427		parent = root
2428	}
2429
2430	var result []*Node
2431	for c := parent.FirstChild; c != nil; {
2432		next := c.NextSibling
2433		parent.RemoveChild(c)
2434		result = append(result, c)
2435		c = next
2436	}
2437	return result, nil
2438}
2439