1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8	"errors"
9	"fmt"
10	"io"
11	"strings"
12
13	a "golang.org/x/net/html/atom"
14)
15
16// A parser implements the HTML5 parsing algorithm:
17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18type parser struct {
19	// tokenizer provides the tokens for the parser.
20	tokenizer *Tokenizer
21	// tok is the most recently read token.
22	tok Token
23	// Self-closing tags like <hr/> are treated as start tags, except that
24	// hasSelfClosingToken is set while they are being processed.
25	hasSelfClosingToken bool
26	// doc is the document root element.
27	doc *Node
28	// The stack of open elements (section 12.2.4.2) and active formatting
29	// elements (section 12.2.4.3).
30	oe, afe nodeStack
31	// Element pointers (section 12.2.4.4).
32	head, form *Node
33	// Other parsing state flags (section 12.2.4.5).
34	scripting, framesetOK bool
35	// The stack of template insertion modes
36	templateStack insertionModeStack
37	// im is the current insertion mode.
38	im insertionMode
39	// originalIM is the insertion mode to go back to after completing a text
40	// or inTableText insertion mode.
41	originalIM insertionMode
42	// fosterParenting is whether new elements should be inserted according to
43	// the foster parenting rules (section 12.2.6.1).
44	fosterParenting bool
45	// quirks is whether the parser is operating in "quirks mode."
46	quirks bool
47	// fragment is whether the parser is parsing an HTML fragment.
48	fragment bool
49	// context is the context element when parsing an HTML fragment
50	// (section 12.4).
51	context *Node
52}
53
54func (p *parser) top() *Node {
55	if n := p.oe.top(); n != nil {
56		return n
57	}
58	return p.doc
59}
60
61// Stop tags for use in popUntil. These come from section 12.2.4.2.
62var (
63	defaultScopeStopTags = map[string][]a.Atom{
64		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66		"svg":  {a.Desc, a.ForeignObject, a.Title},
67	}
68)
69
70type scope int
71
72const (
73	defaultScope scope = iota
74	listItemScope
75	buttonScope
76	tableScope
77	tableRowScope
78	tableBodyScope
79	selectScope
80)
81
82// popUntil pops the stack of open elements at the highest element whose tag
83// is in matchTags, provided there is no higher element in the scope's stop
84// tags (as defined in section 12.2.4.2). It returns whether or not there was
85// such an element. If there was not, popUntil leaves the stack unchanged.
86//
87// For example, the set of stop tags for table scope is: "html", "table". If
88// the stack was:
89// ["html", "body", "font", "table", "b", "i", "u"]
90// then popUntil(tableScope, "font") would return false, but
91// popUntil(tableScope, "i") would return true and the stack would become:
92// ["html", "body", "font", "table", "b"]
93//
94// If an element's tag is in both the stop tags and matchTags, then the stack
95// will be popped and the function returns true (provided, of course, there was
96// no higher element in the stack that was also in the stop tags). For example,
97// popUntil(tableScope, "table") returns true and leaves:
98// ["html", "body", "font"]
99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101		p.oe = p.oe[:i]
102		return true
103	}
104	return false
105}
106
107// indexOfElementInScope returns the index in p.oe of the highest element whose
108// tag is in matchTags that is in scope. If no matching element is in scope, it
109// returns -1.
110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111	for i := len(p.oe) - 1; i >= 0; i-- {
112		tagAtom := p.oe[i].DataAtom
113		if p.oe[i].Namespace == "" {
114			for _, t := range matchTags {
115				if t == tagAtom {
116					return i
117				}
118			}
119			switch s {
120			case defaultScope:
121				// No-op.
122			case listItemScope:
123				if tagAtom == a.Ol || tagAtom == a.Ul {
124					return -1
125				}
126			case buttonScope:
127				if tagAtom == a.Button {
128					return -1
129				}
130			case tableScope:
131				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
132					return -1
133				}
134			case selectScope:
135				if tagAtom != a.Optgroup && tagAtom != a.Option {
136					return -1
137				}
138			default:
139				panic("unreachable")
140			}
141		}
142		switch s {
143		case defaultScope, listItemScope, buttonScope:
144			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145				if t == tagAtom {
146					return -1
147				}
148			}
149		}
150	}
151	return -1
152}
153
154// elementInScope is like popUntil, except that it doesn't modify the stack of
155// open elements.
156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157	return p.indexOfElementInScope(s, matchTags...) != -1
158}
159
160// clearStackToContext pops elements off the stack of open elements until a
161// scope-defined element is found.
162func (p *parser) clearStackToContext(s scope) {
163	for i := len(p.oe) - 1; i >= 0; i-- {
164		tagAtom := p.oe[i].DataAtom
165		switch s {
166		case tableScope:
167			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
168				p.oe = p.oe[:i+1]
169				return
170			}
171		case tableRowScope:
172			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
173				p.oe = p.oe[:i+1]
174				return
175			}
176		case tableBodyScope:
177			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
178				p.oe = p.oe[:i+1]
179				return
180			}
181		default:
182			panic("unreachable")
183		}
184	}
185}
186
187// parseGenericRawTextElements implements the generic raw text element parsing
188// algorithm defined in 12.2.6.2.
189// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
190// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
191// officially, need to make tokenizer consider both states.
192func (p *parser) parseGenericRawTextElement() {
193	p.addElement()
194	p.originalIM = p.im
195	p.im = textIM
196}
197
198// generateImpliedEndTags pops nodes off the stack of open elements as long as
199// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
200// If exceptions are specified, nodes with that name will not be popped off.
201func (p *parser) generateImpliedEndTags(exceptions ...string) {
202	var i int
203loop:
204	for i = len(p.oe) - 1; i >= 0; i-- {
205		n := p.oe[i]
206		if n.Type != ElementNode {
207			break
208		}
209		switch n.DataAtom {
210		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
211			for _, except := range exceptions {
212				if n.Data == except {
213					break loop
214				}
215			}
216			continue
217		}
218		break
219	}
220
221	p.oe = p.oe[:i+1]
222}
223
224// addChild adds a child node n to the top element, and pushes n onto the stack
225// of open elements if it is an element node.
226func (p *parser) addChild(n *Node) {
227	if p.shouldFosterParent() {
228		p.fosterParent(n)
229	} else {
230		p.top().AppendChild(n)
231	}
232
233	if n.Type == ElementNode {
234		p.oe = append(p.oe, n)
235	}
236}
237
238// shouldFosterParent returns whether the next node to be added should be
239// foster parented.
240func (p *parser) shouldFosterParent() bool {
241	if p.fosterParenting {
242		switch p.top().DataAtom {
243		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
244			return true
245		}
246	}
247	return false
248}
249
250// fosterParent adds a child node according to the foster parenting rules.
251// Section 12.2.6.1, "foster parenting".
252func (p *parser) fosterParent(n *Node) {
253	var table, parent, prev, template *Node
254	var i int
255	for i = len(p.oe) - 1; i >= 0; i-- {
256		if p.oe[i].DataAtom == a.Table {
257			table = p.oe[i]
258			break
259		}
260	}
261
262	var j int
263	for j = len(p.oe) - 1; j >= 0; j-- {
264		if p.oe[j].DataAtom == a.Template {
265			template = p.oe[j]
266			break
267		}
268	}
269
270	if template != nil && (table == nil || j > i) {
271		template.AppendChild(n)
272		return
273	}
274
275	if table == nil {
276		// The foster parent is the html element.
277		parent = p.oe[0]
278	} else {
279		parent = table.Parent
280	}
281	if parent == nil {
282		parent = p.oe[i-1]
283	}
284
285	if table != nil {
286		prev = table.PrevSibling
287	} else {
288		prev = parent.LastChild
289	}
290	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
291		prev.Data += n.Data
292		return
293	}
294
295	parent.InsertBefore(n, table)
296}
297
298// addText adds text to the preceding node if it is a text node, or else it
299// calls addChild with a new text node.
300func (p *parser) addText(text string) {
301	if text == "" {
302		return
303	}
304
305	if p.shouldFosterParent() {
306		p.fosterParent(&Node{
307			Type: TextNode,
308			Data: text,
309		})
310		return
311	}
312
313	t := p.top()
314	if n := t.LastChild; n != nil && n.Type == TextNode {
315		n.Data += text
316		return
317	}
318	p.addChild(&Node{
319		Type: TextNode,
320		Data: text,
321	})
322}
323
324// addElement adds a child element based on the current token.
325func (p *parser) addElement() {
326	p.addChild(&Node{
327		Type:     ElementNode,
328		DataAtom: p.tok.DataAtom,
329		Data:     p.tok.Data,
330		Attr:     p.tok.Attr,
331	})
332}
333
334// Section 12.2.4.3.
335func (p *parser) addFormattingElement() {
336	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
337	p.addElement()
338
339	// Implement the Noah's Ark clause, but with three per family instead of two.
340	identicalElements := 0
341findIdenticalElements:
342	for i := len(p.afe) - 1; i >= 0; i-- {
343		n := p.afe[i]
344		if n.Type == scopeMarkerNode {
345			break
346		}
347		if n.Type != ElementNode {
348			continue
349		}
350		if n.Namespace != "" {
351			continue
352		}
353		if n.DataAtom != tagAtom {
354			continue
355		}
356		if len(n.Attr) != len(attr) {
357			continue
358		}
359	compareAttributes:
360		for _, t0 := range n.Attr {
361			for _, t1 := range attr {
362				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
363					// Found a match for this attribute, continue with the next attribute.
364					continue compareAttributes
365				}
366			}
367			// If we get here, there is no attribute that matches a.
368			// Therefore the element is not identical to the new one.
369			continue findIdenticalElements
370		}
371
372		identicalElements++
373		if identicalElements >= 3 {
374			p.afe.remove(n)
375		}
376	}
377
378	p.afe = append(p.afe, p.top())
379}
380
381// Section 12.2.4.3.
382func (p *parser) clearActiveFormattingElements() {
383	for {
384		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
385			return
386		}
387	}
388}
389
390// Section 12.2.4.3.
391func (p *parser) reconstructActiveFormattingElements() {
392	n := p.afe.top()
393	if n == nil {
394		return
395	}
396	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
397		return
398	}
399	i := len(p.afe) - 1
400	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
401		if i == 0 {
402			i = -1
403			break
404		}
405		i--
406		n = p.afe[i]
407	}
408	for {
409		i++
410		clone := p.afe[i].clone()
411		p.addChild(clone)
412		p.afe[i] = clone
413		if i == len(p.afe)-1 {
414			break
415		}
416	}
417}
418
419// Section 12.2.5.
420func (p *parser) acknowledgeSelfClosingTag() {
421	p.hasSelfClosingToken = false
422}
423
424// An insertion mode (section 12.2.4.1) is the state transition function from
425// a particular state in the HTML5 parser's state machine. It updates the
426// parser's fields depending on parser.tok (where ErrorToken means EOF).
427// It returns whether the token was consumed.
428type insertionMode func(*parser) bool
429
430// setOriginalIM sets the insertion mode to return to after completing a text or
431// inTableText insertion mode.
432// Section 12.2.4.1, "using the rules for".
433func (p *parser) setOriginalIM() {
434	if p.originalIM != nil {
435		panic("html: bad parser state: originalIM was set twice")
436	}
437	p.originalIM = p.im
438}
439
440// Section 12.2.4.1, "reset the insertion mode".
441func (p *parser) resetInsertionMode() {
442	for i := len(p.oe) - 1; i >= 0; i-- {
443		n := p.oe[i]
444		last := i == 0
445		if last && p.context != nil {
446			n = p.context
447		}
448
449		switch n.DataAtom {
450		case a.Select:
451			if !last {
452				for ancestor, first := n, p.oe[0]; ancestor != first; {
453					ancestor = p.oe[p.oe.index(ancestor)-1]
454					switch ancestor.DataAtom {
455					case a.Template:
456						p.im = inSelectIM
457						return
458					case a.Table:
459						p.im = inSelectInTableIM
460						return
461					}
462				}
463			}
464			p.im = inSelectIM
465		case a.Td, a.Th:
466			// TODO: remove this divergence from the HTML5 spec.
467			//
468			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
469			p.im = inCellIM
470		case a.Tr:
471			p.im = inRowIM
472		case a.Tbody, a.Thead, a.Tfoot:
473			p.im = inTableBodyIM
474		case a.Caption:
475			p.im = inCaptionIM
476		case a.Colgroup:
477			p.im = inColumnGroupIM
478		case a.Table:
479			p.im = inTableIM
480		case a.Template:
481			// TODO: remove this divergence from the HTML5 spec.
482			if n.Namespace != "" {
483				continue
484			}
485			p.im = p.templateStack.top()
486		case a.Head:
487			// TODO: remove this divergence from the HTML5 spec.
488			//
489			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
490			p.im = inHeadIM
491		case a.Body:
492			p.im = inBodyIM
493		case a.Frameset:
494			p.im = inFramesetIM
495		case a.Html:
496			if p.head == nil {
497				p.im = beforeHeadIM
498			} else {
499				p.im = afterHeadIM
500			}
501		default:
502			if last {
503				p.im = inBodyIM
504				return
505			}
506			continue
507		}
508		return
509	}
510}
511
512const whitespace = " \t\r\n\f"
513
514// Section 12.2.6.4.1.
515func initialIM(p *parser) bool {
516	switch p.tok.Type {
517	case TextToken:
518		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
519		if len(p.tok.Data) == 0 {
520			// It was all whitespace, so ignore it.
521			return true
522		}
523	case CommentToken:
524		p.doc.AppendChild(&Node{
525			Type: CommentNode,
526			Data: p.tok.Data,
527		})
528		return true
529	case DoctypeToken:
530		n, quirks := parseDoctype(p.tok.Data)
531		p.doc.AppendChild(n)
532		p.quirks = quirks
533		p.im = beforeHTMLIM
534		return true
535	}
536	p.quirks = true
537	p.im = beforeHTMLIM
538	return false
539}
540
541// Section 12.2.6.4.2.
542func beforeHTMLIM(p *parser) bool {
543	switch p.tok.Type {
544	case DoctypeToken:
545		// Ignore the token.
546		return true
547	case TextToken:
548		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
549		if len(p.tok.Data) == 0 {
550			// It was all whitespace, so ignore it.
551			return true
552		}
553	case StartTagToken:
554		if p.tok.DataAtom == a.Html {
555			p.addElement()
556			p.im = beforeHeadIM
557			return true
558		}
559	case EndTagToken:
560		switch p.tok.DataAtom {
561		case a.Head, a.Body, a.Html, a.Br:
562			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
563			return false
564		default:
565			// Ignore the token.
566			return true
567		}
568	case CommentToken:
569		p.doc.AppendChild(&Node{
570			Type: CommentNode,
571			Data: p.tok.Data,
572		})
573		return true
574	}
575	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
576	return false
577}
578
579// Section 12.2.6.4.3.
580func beforeHeadIM(p *parser) bool {
581	switch p.tok.Type {
582	case TextToken:
583		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
584		if len(p.tok.Data) == 0 {
585			// It was all whitespace, so ignore it.
586			return true
587		}
588	case StartTagToken:
589		switch p.tok.DataAtom {
590		case a.Head:
591			p.addElement()
592			p.head = p.top()
593			p.im = inHeadIM
594			return true
595		case a.Html:
596			return inBodyIM(p)
597		}
598	case EndTagToken:
599		switch p.tok.DataAtom {
600		case a.Head, a.Body, a.Html, a.Br:
601			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
602			return false
603		default:
604			// Ignore the token.
605			return true
606		}
607	case CommentToken:
608		p.addChild(&Node{
609			Type: CommentNode,
610			Data: p.tok.Data,
611		})
612		return true
613	case DoctypeToken:
614		// Ignore the token.
615		return true
616	}
617
618	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
619	return false
620}
621
622// Section 12.2.6.4.4.
623func inHeadIM(p *parser) bool {
624	switch p.tok.Type {
625	case TextToken:
626		s := strings.TrimLeft(p.tok.Data, whitespace)
627		if len(s) < len(p.tok.Data) {
628			// Add the initial whitespace to the current node.
629			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
630			if s == "" {
631				return true
632			}
633			p.tok.Data = s
634		}
635	case StartTagToken:
636		switch p.tok.DataAtom {
637		case a.Html:
638			return inBodyIM(p)
639		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
640			p.addElement()
641			p.oe.pop()
642			p.acknowledgeSelfClosingTag()
643			return true
644		case a.Noscript:
645			if p.scripting {
646				p.parseGenericRawTextElement()
647				return true
648			}
649			p.addElement()
650			p.im = inHeadNoscriptIM
651			// Don't let the tokenizer go into raw text mode when scripting is disabled.
652			p.tokenizer.NextIsNotRawText()
653			return true
654		case a.Script, a.Title:
655			p.addElement()
656			p.setOriginalIM()
657			p.im = textIM
658			return true
659		case a.Noframes, a.Style:
660			p.parseGenericRawTextElement()
661			return true
662		case a.Head:
663			// Ignore the token.
664			return true
665		case a.Template:
666			// TODO: remove this divergence from the HTML5 spec.
667			//
668			// We don't handle all of the corner cases when mixing foreign
669			// content (i.e. <math> or <svg>) with <template>. Without this
670			// early return, we can get into an infinite loop, possibly because
671			// of the "TODO... further divergence" a little below.
672			//
673			// As a workaround, if we are mixing foreign content and templates,
674			// just ignore the rest of the HTML. Foreign content is rare and a
675			// relatively old HTML feature. Templates are also rare and a
676			// relatively new HTML feature. Their combination is very rare.
677			for _, e := range p.oe {
678				if e.Namespace != "" {
679					p.im = ignoreTheRemainingTokens
680					return true
681				}
682			}
683
684			p.addElement()
685			p.afe = append(p.afe, &scopeMarker)
686			p.framesetOK = false
687			p.im = inTemplateIM
688			p.templateStack = append(p.templateStack, inTemplateIM)
689			return true
690		}
691	case EndTagToken:
692		switch p.tok.DataAtom {
693		case a.Head:
694			p.oe.pop()
695			p.im = afterHeadIM
696			return true
697		case a.Body, a.Html, a.Br:
698			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
699			return false
700		case a.Template:
701			if !p.oe.contains(a.Template) {
702				return true
703			}
704			// TODO: remove this further divergence from the HTML5 spec.
705			//
706			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
707			p.generateImpliedEndTags()
708			for i := len(p.oe) - 1; i >= 0; i-- {
709				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
710					p.oe = p.oe[:i]
711					break
712				}
713			}
714			p.clearActiveFormattingElements()
715			p.templateStack.pop()
716			p.resetInsertionMode()
717			return true
718		default:
719			// Ignore the token.
720			return true
721		}
722	case CommentToken:
723		p.addChild(&Node{
724			Type: CommentNode,
725			Data: p.tok.Data,
726		})
727		return true
728	case DoctypeToken:
729		// Ignore the token.
730		return true
731	}
732
733	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
734	return false
735}
736
737// 12.2.6.4.5.
738func inHeadNoscriptIM(p *parser) bool {
739	switch p.tok.Type {
740	case DoctypeToken:
741		// Ignore the token.
742		return true
743	case StartTagToken:
744		switch p.tok.DataAtom {
745		case a.Html:
746			return inBodyIM(p)
747		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
748			return inHeadIM(p)
749		case a.Head:
750			// Ignore the token.
751			return true
752		case a.Noscript:
753			// Don't let the tokenizer go into raw text mode even when a <noscript>
754			// tag is in "in head noscript" insertion mode.
755			p.tokenizer.NextIsNotRawText()
756			// Ignore the token.
757			return true
758		}
759	case EndTagToken:
760		switch p.tok.DataAtom {
761		case a.Noscript, a.Br:
762		default:
763			// Ignore the token.
764			return true
765		}
766	case TextToken:
767		s := strings.TrimLeft(p.tok.Data, whitespace)
768		if len(s) == 0 {
769			// It was all whitespace.
770			return inHeadIM(p)
771		}
772	case CommentToken:
773		return inHeadIM(p)
774	}
775	p.oe.pop()
776	if p.top().DataAtom != a.Head {
777		panic("html: the new current node will be a head element.")
778	}
779	p.im = inHeadIM
780	if p.tok.DataAtom == a.Noscript {
781		return true
782	}
783	return false
784}
785
786// Section 12.2.6.4.6.
787func afterHeadIM(p *parser) bool {
788	switch p.tok.Type {
789	case TextToken:
790		s := strings.TrimLeft(p.tok.Data, whitespace)
791		if len(s) < len(p.tok.Data) {
792			// Add the initial whitespace to the current node.
793			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
794			if s == "" {
795				return true
796			}
797			p.tok.Data = s
798		}
799	case StartTagToken:
800		switch p.tok.DataAtom {
801		case a.Html:
802			return inBodyIM(p)
803		case a.Body:
804			p.addElement()
805			p.framesetOK = false
806			p.im = inBodyIM
807			return true
808		case a.Frameset:
809			p.addElement()
810			p.im = inFramesetIM
811			return true
812		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
813			p.oe = append(p.oe, p.head)
814			defer p.oe.remove(p.head)
815			return inHeadIM(p)
816		case a.Head:
817			// Ignore the token.
818			return true
819		}
820	case EndTagToken:
821		switch p.tok.DataAtom {
822		case a.Body, a.Html, a.Br:
823			// Drop down to creating an implied <body> tag.
824		case a.Template:
825			return inHeadIM(p)
826		default:
827			// Ignore the token.
828			return true
829		}
830	case CommentToken:
831		p.addChild(&Node{
832			Type: CommentNode,
833			Data: p.tok.Data,
834		})
835		return true
836	case DoctypeToken:
837		// Ignore the token.
838		return true
839	}
840
841	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
842	p.framesetOK = true
843	return false
844}
845
846// copyAttributes copies attributes of src not found on dst to dst.
847func copyAttributes(dst *Node, src Token) {
848	if len(src.Attr) == 0 {
849		return
850	}
851	attr := map[string]string{}
852	for _, t := range dst.Attr {
853		attr[t.Key] = t.Val
854	}
855	for _, t := range src.Attr {
856		if _, ok := attr[t.Key]; !ok {
857			dst.Attr = append(dst.Attr, t)
858			attr[t.Key] = t.Val
859		}
860	}
861}
862
863// Section 12.2.6.4.7.
864func inBodyIM(p *parser) bool {
865	switch p.tok.Type {
866	case TextToken:
867		d := p.tok.Data
868		switch n := p.oe.top(); n.DataAtom {
869		case a.Pre, a.Listing:
870			if n.FirstChild == nil {
871				// Ignore a newline at the start of a <pre> block.
872				if d != "" && d[0] == '\r' {
873					d = d[1:]
874				}
875				if d != "" && d[0] == '\n' {
876					d = d[1:]
877				}
878			}
879		}
880		d = strings.Replace(d, "\x00", "", -1)
881		if d == "" {
882			return true
883		}
884		p.reconstructActiveFormattingElements()
885		p.addText(d)
886		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
887			// There were non-whitespace characters inserted.
888			p.framesetOK = false
889		}
890	case StartTagToken:
891		switch p.tok.DataAtom {
892		case a.Html:
893			if p.oe.contains(a.Template) {
894				return true
895			}
896			copyAttributes(p.oe[0], p.tok)
897		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
898			return inHeadIM(p)
899		case a.Body:
900			if p.oe.contains(a.Template) {
901				return true
902			}
903			if len(p.oe) >= 2 {
904				body := p.oe[1]
905				if body.Type == ElementNode && body.DataAtom == a.Body {
906					p.framesetOK = false
907					copyAttributes(body, p.tok)
908				}
909			}
910		case a.Frameset:
911			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
912				// Ignore the token.
913				return true
914			}
915			body := p.oe[1]
916			if body.Parent != nil {
917				body.Parent.RemoveChild(body)
918			}
919			p.oe = p.oe[:1]
920			p.addElement()
921			p.im = inFramesetIM
922			return true
923		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
924			p.popUntil(buttonScope, a.P)
925			p.addElement()
926		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
927			p.popUntil(buttonScope, a.P)
928			switch n := p.top(); n.DataAtom {
929			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
930				p.oe.pop()
931			}
932			p.addElement()
933		case a.Pre, a.Listing:
934			p.popUntil(buttonScope, a.P)
935			p.addElement()
936			// The newline, if any, will be dealt with by the TextToken case.
937			p.framesetOK = false
938		case a.Form:
939			if p.form != nil && !p.oe.contains(a.Template) {
940				// Ignore the token
941				return true
942			}
943			p.popUntil(buttonScope, a.P)
944			p.addElement()
945			if !p.oe.contains(a.Template) {
946				p.form = p.top()
947			}
948		case a.Li:
949			p.framesetOK = false
950			for i := len(p.oe) - 1; i >= 0; i-- {
951				node := p.oe[i]
952				switch node.DataAtom {
953				case a.Li:
954					p.oe = p.oe[:i]
955				case a.Address, a.Div, a.P:
956					continue
957				default:
958					if !isSpecialElement(node) {
959						continue
960					}
961				}
962				break
963			}
964			p.popUntil(buttonScope, a.P)
965			p.addElement()
966		case a.Dd, a.Dt:
967			p.framesetOK = false
968			for i := len(p.oe) - 1; i >= 0; i-- {
969				node := p.oe[i]
970				switch node.DataAtom {
971				case a.Dd, a.Dt:
972					p.oe = p.oe[:i]
973				case a.Address, a.Div, a.P:
974					continue
975				default:
976					if !isSpecialElement(node) {
977						continue
978					}
979				}
980				break
981			}
982			p.popUntil(buttonScope, a.P)
983			p.addElement()
984		case a.Plaintext:
985			p.popUntil(buttonScope, a.P)
986			p.addElement()
987		case a.Button:
988			p.popUntil(defaultScope, a.Button)
989			p.reconstructActiveFormattingElements()
990			p.addElement()
991			p.framesetOK = false
992		case a.A:
993			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
994				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
995					p.inBodyEndTagFormatting(a.A, "a")
996					p.oe.remove(n)
997					p.afe.remove(n)
998					break
999				}
1000			}
1001			p.reconstructActiveFormattingElements()
1002			p.addFormattingElement()
1003		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1004			p.reconstructActiveFormattingElements()
1005			p.addFormattingElement()
1006		case a.Nobr:
1007			p.reconstructActiveFormattingElements()
1008			if p.elementInScope(defaultScope, a.Nobr) {
1009				p.inBodyEndTagFormatting(a.Nobr, "nobr")
1010				p.reconstructActiveFormattingElements()
1011			}
1012			p.addFormattingElement()
1013		case a.Applet, a.Marquee, a.Object:
1014			p.reconstructActiveFormattingElements()
1015			p.addElement()
1016			p.afe = append(p.afe, &scopeMarker)
1017			p.framesetOK = false
1018		case a.Table:
1019			if !p.quirks {
1020				p.popUntil(buttonScope, a.P)
1021			}
1022			p.addElement()
1023			p.framesetOK = false
1024			p.im = inTableIM
1025			return true
1026		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1027			p.reconstructActiveFormattingElements()
1028			p.addElement()
1029			p.oe.pop()
1030			p.acknowledgeSelfClosingTag()
1031			if p.tok.DataAtom == a.Input {
1032				for _, t := range p.tok.Attr {
1033					if t.Key == "type" {
1034						if strings.ToLower(t.Val) == "hidden" {
1035							// Skip setting framesetOK = false
1036							return true
1037						}
1038					}
1039				}
1040			}
1041			p.framesetOK = false
1042		case a.Param, a.Source, a.Track:
1043			p.addElement()
1044			p.oe.pop()
1045			p.acknowledgeSelfClosingTag()
1046		case a.Hr:
1047			p.popUntil(buttonScope, a.P)
1048			p.addElement()
1049			p.oe.pop()
1050			p.acknowledgeSelfClosingTag()
1051			p.framesetOK = false
1052		case a.Image:
1053			p.tok.DataAtom = a.Img
1054			p.tok.Data = a.Img.String()
1055			return false
1056		case a.Textarea:
1057			p.addElement()
1058			p.setOriginalIM()
1059			p.framesetOK = false
1060			p.im = textIM
1061		case a.Xmp:
1062			p.popUntil(buttonScope, a.P)
1063			p.reconstructActiveFormattingElements()
1064			p.framesetOK = false
1065			p.parseGenericRawTextElement()
1066		case a.Iframe:
1067			p.framesetOK = false
1068			p.parseGenericRawTextElement()
1069		case a.Noembed:
1070			p.parseGenericRawTextElement()
1071		case a.Noscript:
1072			if p.scripting {
1073				p.parseGenericRawTextElement()
1074				return true
1075			}
1076			p.reconstructActiveFormattingElements()
1077			p.addElement()
1078			// Don't let the tokenizer go into raw text mode when scripting is disabled.
1079			p.tokenizer.NextIsNotRawText()
1080		case a.Select:
1081			p.reconstructActiveFormattingElements()
1082			p.addElement()
1083			p.framesetOK = false
1084			p.im = inSelectIM
1085			return true
1086		case a.Optgroup, a.Option:
1087			if p.top().DataAtom == a.Option {
1088				p.oe.pop()
1089			}
1090			p.reconstructActiveFormattingElements()
1091			p.addElement()
1092		case a.Rb, a.Rtc:
1093			if p.elementInScope(defaultScope, a.Ruby) {
1094				p.generateImpliedEndTags()
1095			}
1096			p.addElement()
1097		case a.Rp, a.Rt:
1098			if p.elementInScope(defaultScope, a.Ruby) {
1099				p.generateImpliedEndTags("rtc")
1100			}
1101			p.addElement()
1102		case a.Math, a.Svg:
1103			p.reconstructActiveFormattingElements()
1104			if p.tok.DataAtom == a.Math {
1105				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1106			} else {
1107				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1108			}
1109			adjustForeignAttributes(p.tok.Attr)
1110			p.addElement()
1111			p.top().Namespace = p.tok.Data
1112			if p.hasSelfClosingToken {
1113				p.oe.pop()
1114				p.acknowledgeSelfClosingTag()
1115			}
1116			return true
1117		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1118			// Ignore the token.
1119		default:
1120			p.reconstructActiveFormattingElements()
1121			p.addElement()
1122		}
1123	case EndTagToken:
1124		switch p.tok.DataAtom {
1125		case a.Body:
1126			if p.elementInScope(defaultScope, a.Body) {
1127				p.im = afterBodyIM
1128			}
1129		case a.Html:
1130			if p.elementInScope(defaultScope, a.Body) {
1131				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1132				return false
1133			}
1134			return true
1135		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1136			p.popUntil(defaultScope, p.tok.DataAtom)
1137		case a.Form:
1138			if p.oe.contains(a.Template) {
1139				i := p.indexOfElementInScope(defaultScope, a.Form)
1140				if i == -1 {
1141					// Ignore the token.
1142					return true
1143				}
1144				p.generateImpliedEndTags()
1145				if p.oe[i].DataAtom != a.Form {
1146					// Ignore the token.
1147					return true
1148				}
1149				p.popUntil(defaultScope, a.Form)
1150			} else {
1151				node := p.form
1152				p.form = nil
1153				i := p.indexOfElementInScope(defaultScope, a.Form)
1154				if node == nil || i == -1 || p.oe[i] != node {
1155					// Ignore the token.
1156					return true
1157				}
1158				p.generateImpliedEndTags()
1159				p.oe.remove(node)
1160			}
1161		case a.P:
1162			if !p.elementInScope(buttonScope, a.P) {
1163				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1164			}
1165			p.popUntil(buttonScope, a.P)
1166		case a.Li:
1167			p.popUntil(listItemScope, a.Li)
1168		case a.Dd, a.Dt:
1169			p.popUntil(defaultScope, p.tok.DataAtom)
1170		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1171			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1172		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1173			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1174		case a.Applet, a.Marquee, a.Object:
1175			if p.popUntil(defaultScope, p.tok.DataAtom) {
1176				p.clearActiveFormattingElements()
1177			}
1178		case a.Br:
1179			p.tok.Type = StartTagToken
1180			return false
1181		case a.Template:
1182			return inHeadIM(p)
1183		default:
1184			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1185		}
1186	case CommentToken:
1187		p.addChild(&Node{
1188			Type: CommentNode,
1189			Data: p.tok.Data,
1190		})
1191	case ErrorToken:
1192		// TODO: remove this divergence from the HTML5 spec.
1193		if len(p.templateStack) > 0 {
1194			p.im = inTemplateIM
1195			return false
1196		}
1197		for _, e := range p.oe {
1198			switch e.DataAtom {
1199			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1200				a.Thead, a.Tr, a.Body, a.Html:
1201			default:
1202				return true
1203			}
1204		}
1205	}
1206
1207	return true
1208}
1209
1210func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1211	// This is the "adoption agency" algorithm, described at
1212	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1213
1214	// TODO: this is a fairly literal line-by-line translation of that algorithm.
1215	// Once the code successfully parses the comprehensive test suite, we should
1216	// refactor this code to be more idiomatic.
1217
1218	// Steps 1-2
1219	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1220		p.oe.pop()
1221		return
1222	}
1223
1224	// Steps 3-5. The outer loop.
1225	for i := 0; i < 8; i++ {
1226		// Step 6. Find the formatting element.
1227		var formattingElement *Node
1228		for j := len(p.afe) - 1; j >= 0; j-- {
1229			if p.afe[j].Type == scopeMarkerNode {
1230				break
1231			}
1232			if p.afe[j].DataAtom == tagAtom {
1233				formattingElement = p.afe[j]
1234				break
1235			}
1236		}
1237		if formattingElement == nil {
1238			p.inBodyEndTagOther(tagAtom, tagName)
1239			return
1240		}
1241
1242		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
1243		feIndex := p.oe.index(formattingElement)
1244		if feIndex == -1 {
1245			p.afe.remove(formattingElement)
1246			return
1247		}
1248		// Step 8. Ignore the tag if formatting element is not in the scope.
1249		if !p.elementInScope(defaultScope, tagAtom) {
1250			// Ignore the tag.
1251			return
1252		}
1253
1254		// Step 9. This step is omitted because it's just a parse error but no need to return.
1255
1256		// Steps 10-11. Find the furthest block.
1257		var furthestBlock *Node
1258		for _, e := range p.oe[feIndex:] {
1259			if isSpecialElement(e) {
1260				furthestBlock = e
1261				break
1262			}
1263		}
1264		if furthestBlock == nil {
1265			e := p.oe.pop()
1266			for e != formattingElement {
1267				e = p.oe.pop()
1268			}
1269			p.afe.remove(e)
1270			return
1271		}
1272
1273		// Steps 12-13. Find the common ancestor and bookmark node.
1274		commonAncestor := p.oe[feIndex-1]
1275		bookmark := p.afe.index(formattingElement)
1276
1277		// Step 14. The inner loop. Find the lastNode to reparent.
1278		lastNode := furthestBlock
1279		node := furthestBlock
1280		x := p.oe.index(node)
1281		// Step 14.1.
1282		j := 0
1283		for {
1284			// Step 14.2.
1285			j++
1286			// Step. 14.3.
1287			x--
1288			node = p.oe[x]
1289			// Step 14.4. Go to the next step if node is formatting element.
1290			if node == formattingElement {
1291				break
1292			}
1293			// Step 14.5. Remove node from the list of active formatting elements if
1294			// inner loop counter is greater than three and node is in the list of
1295			// active formatting elements.
1296			if ni := p.afe.index(node); j > 3 && ni > -1 {
1297				p.afe.remove(node)
1298				// If any element of the list of active formatting elements is removed,
1299				// we need to take care whether bookmark should be decremented or not.
1300				// This is because the value of bookmark may exceed the size of the
1301				// list by removing elements from the list.
1302				if ni <= bookmark {
1303					bookmark--
1304				}
1305				continue
1306			}
1307			// Step 14.6. Continue the next inner loop if node is not in the list of
1308			// active formatting elements.
1309			if p.afe.index(node) == -1 {
1310				p.oe.remove(node)
1311				continue
1312			}
1313			// Step 14.7.
1314			clone := node.clone()
1315			p.afe[p.afe.index(node)] = clone
1316			p.oe[p.oe.index(node)] = clone
1317			node = clone
1318			// Step 14.8.
1319			if lastNode == furthestBlock {
1320				bookmark = p.afe.index(node) + 1
1321			}
1322			// Step 14.9.
1323			if lastNode.Parent != nil {
1324				lastNode.Parent.RemoveChild(lastNode)
1325			}
1326			node.AppendChild(lastNode)
1327			// Step 14.10.
1328			lastNode = node
1329		}
1330
1331		// Step 15. Reparent lastNode to the common ancestor,
1332		// or for misnested table nodes, to the foster parent.
1333		if lastNode.Parent != nil {
1334			lastNode.Parent.RemoveChild(lastNode)
1335		}
1336		switch commonAncestor.DataAtom {
1337		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1338			p.fosterParent(lastNode)
1339		default:
1340			commonAncestor.AppendChild(lastNode)
1341		}
1342
1343		// Steps 16-18. Reparent nodes from the furthest block's children
1344		// to a clone of the formatting element.
1345		clone := formattingElement.clone()
1346		reparentChildren(clone, furthestBlock)
1347		furthestBlock.AppendChild(clone)
1348
1349		// Step 19. Fix up the list of active formatting elements.
1350		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1351			// Move the bookmark with the rest of the list.
1352			bookmark--
1353		}
1354		p.afe.remove(formattingElement)
1355		p.afe.insert(bookmark, clone)
1356
1357		// Step 20. Fix up the stack of open elements.
1358		p.oe.remove(formattingElement)
1359		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1360	}
1361}
1362
1363// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1364// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1365// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1366func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1367	for i := len(p.oe) - 1; i >= 0; i-- {
1368		// Two element nodes have the same tag if they have the same Data (a
1369		// string-typed field). As an optimization, for common HTML tags, each
1370		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
1371		// field), since integer comparison is faster than string comparison.
1372		// Uncommon (custom) tags get a zero DataAtom.
1373		//
1374		// The if condition here is equivalent to (p.oe[i].Data == tagName).
1375		if (p.oe[i].DataAtom == tagAtom) &&
1376			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
1377			p.oe = p.oe[:i]
1378			break
1379		}
1380		if isSpecialElement(p.oe[i]) {
1381			break
1382		}
1383	}
1384}
1385
1386// Section 12.2.6.4.8.
1387func textIM(p *parser) bool {
1388	switch p.tok.Type {
1389	case ErrorToken:
1390		p.oe.pop()
1391	case TextToken:
1392		d := p.tok.Data
1393		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1394			// Ignore a newline at the start of a <textarea> block.
1395			if d != "" && d[0] == '\r' {
1396				d = d[1:]
1397			}
1398			if d != "" && d[0] == '\n' {
1399				d = d[1:]
1400			}
1401		}
1402		if d == "" {
1403			return true
1404		}
1405		p.addText(d)
1406		return true
1407	case EndTagToken:
1408		p.oe.pop()
1409	}
1410	p.im = p.originalIM
1411	p.originalIM = nil
1412	return p.tok.Type == EndTagToken
1413}
1414
1415// Section 12.2.6.4.9.
1416func inTableIM(p *parser) bool {
1417	switch p.tok.Type {
1418	case TextToken:
1419		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1420		switch p.oe.top().DataAtom {
1421		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1422			if strings.Trim(p.tok.Data, whitespace) == "" {
1423				p.addText(p.tok.Data)
1424				return true
1425			}
1426		}
1427	case StartTagToken:
1428		switch p.tok.DataAtom {
1429		case a.Caption:
1430			p.clearStackToContext(tableScope)
1431			p.afe = append(p.afe, &scopeMarker)
1432			p.addElement()
1433			p.im = inCaptionIM
1434			return true
1435		case a.Colgroup:
1436			p.clearStackToContext(tableScope)
1437			p.addElement()
1438			p.im = inColumnGroupIM
1439			return true
1440		case a.Col:
1441			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1442			return false
1443		case a.Tbody, a.Tfoot, a.Thead:
1444			p.clearStackToContext(tableScope)
1445			p.addElement()
1446			p.im = inTableBodyIM
1447			return true
1448		case a.Td, a.Th, a.Tr:
1449			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1450			return false
1451		case a.Table:
1452			if p.popUntil(tableScope, a.Table) {
1453				p.resetInsertionMode()
1454				return false
1455			}
1456			// Ignore the token.
1457			return true
1458		case a.Style, a.Script, a.Template:
1459			return inHeadIM(p)
1460		case a.Input:
1461			for _, t := range p.tok.Attr {
1462				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1463					p.addElement()
1464					p.oe.pop()
1465					return true
1466				}
1467			}
1468			// Otherwise drop down to the default action.
1469		case a.Form:
1470			if p.oe.contains(a.Template) || p.form != nil {
1471				// Ignore the token.
1472				return true
1473			}
1474			p.addElement()
1475			p.form = p.oe.pop()
1476		case a.Select:
1477			p.reconstructActiveFormattingElements()
1478			switch p.top().DataAtom {
1479			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1480				p.fosterParenting = true
1481			}
1482			p.addElement()
1483			p.fosterParenting = false
1484			p.framesetOK = false
1485			p.im = inSelectInTableIM
1486			return true
1487		}
1488	case EndTagToken:
1489		switch p.tok.DataAtom {
1490		case a.Table:
1491			if p.popUntil(tableScope, a.Table) {
1492				p.resetInsertionMode()
1493				return true
1494			}
1495			// Ignore the token.
1496			return true
1497		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1498			// Ignore the token.
1499			return true
1500		case a.Template:
1501			return inHeadIM(p)
1502		}
1503	case CommentToken:
1504		p.addChild(&Node{
1505			Type: CommentNode,
1506			Data: p.tok.Data,
1507		})
1508		return true
1509	case DoctypeToken:
1510		// Ignore the token.
1511		return true
1512	case ErrorToken:
1513		return inBodyIM(p)
1514	}
1515
1516	p.fosterParenting = true
1517	defer func() { p.fosterParenting = false }()
1518
1519	return inBodyIM(p)
1520}
1521
1522// Section 12.2.6.4.11.
1523func inCaptionIM(p *parser) bool {
1524	switch p.tok.Type {
1525	case StartTagToken:
1526		switch p.tok.DataAtom {
1527		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1528			if !p.popUntil(tableScope, a.Caption) {
1529				// Ignore the token.
1530				return true
1531			}
1532			p.clearActiveFormattingElements()
1533			p.im = inTableIM
1534			return false
1535		case a.Select:
1536			p.reconstructActiveFormattingElements()
1537			p.addElement()
1538			p.framesetOK = false
1539			p.im = inSelectInTableIM
1540			return true
1541		}
1542	case EndTagToken:
1543		switch p.tok.DataAtom {
1544		case a.Caption:
1545			if p.popUntil(tableScope, a.Caption) {
1546				p.clearActiveFormattingElements()
1547				p.im = inTableIM
1548			}
1549			return true
1550		case a.Table:
1551			if !p.popUntil(tableScope, a.Caption) {
1552				// Ignore the token.
1553				return true
1554			}
1555			p.clearActiveFormattingElements()
1556			p.im = inTableIM
1557			return false
1558		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1559			// Ignore the token.
1560			return true
1561		}
1562	}
1563	return inBodyIM(p)
1564}
1565
1566// Section 12.2.6.4.12.
1567func inColumnGroupIM(p *parser) bool {
1568	switch p.tok.Type {
1569	case TextToken:
1570		s := strings.TrimLeft(p.tok.Data, whitespace)
1571		if len(s) < len(p.tok.Data) {
1572			// Add the initial whitespace to the current node.
1573			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1574			if s == "" {
1575				return true
1576			}
1577			p.tok.Data = s
1578		}
1579	case CommentToken:
1580		p.addChild(&Node{
1581			Type: CommentNode,
1582			Data: p.tok.Data,
1583		})
1584		return true
1585	case DoctypeToken:
1586		// Ignore the token.
1587		return true
1588	case StartTagToken:
1589		switch p.tok.DataAtom {
1590		case a.Html:
1591			return inBodyIM(p)
1592		case a.Col:
1593			p.addElement()
1594			p.oe.pop()
1595			p.acknowledgeSelfClosingTag()
1596			return true
1597		case a.Template:
1598			return inHeadIM(p)
1599		}
1600	case EndTagToken:
1601		switch p.tok.DataAtom {
1602		case a.Colgroup:
1603			if p.oe.top().DataAtom == a.Colgroup {
1604				p.oe.pop()
1605				p.im = inTableIM
1606			}
1607			return true
1608		case a.Col:
1609			// Ignore the token.
1610			return true
1611		case a.Template:
1612			return inHeadIM(p)
1613		}
1614	case ErrorToken:
1615		return inBodyIM(p)
1616	}
1617	if p.oe.top().DataAtom != a.Colgroup {
1618		return true
1619	}
1620	p.oe.pop()
1621	p.im = inTableIM
1622	return false
1623}
1624
1625// Section 12.2.6.4.13.
1626func inTableBodyIM(p *parser) bool {
1627	switch p.tok.Type {
1628	case StartTagToken:
1629		switch p.tok.DataAtom {
1630		case a.Tr:
1631			p.clearStackToContext(tableBodyScope)
1632			p.addElement()
1633			p.im = inRowIM
1634			return true
1635		case a.Td, a.Th:
1636			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1637			return false
1638		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1639			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1640				p.im = inTableIM
1641				return false
1642			}
1643			// Ignore the token.
1644			return true
1645		}
1646	case EndTagToken:
1647		switch p.tok.DataAtom {
1648		case a.Tbody, a.Tfoot, a.Thead:
1649			if p.elementInScope(tableScope, p.tok.DataAtom) {
1650				p.clearStackToContext(tableBodyScope)
1651				p.oe.pop()
1652				p.im = inTableIM
1653			}
1654			return true
1655		case a.Table:
1656			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1657				p.im = inTableIM
1658				return false
1659			}
1660			// Ignore the token.
1661			return true
1662		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1663			// Ignore the token.
1664			return true
1665		}
1666	case CommentToken:
1667		p.addChild(&Node{
1668			Type: CommentNode,
1669			Data: p.tok.Data,
1670		})
1671		return true
1672	}
1673
1674	return inTableIM(p)
1675}
1676
1677// Section 12.2.6.4.14.
1678func inRowIM(p *parser) bool {
1679	switch p.tok.Type {
1680	case StartTagToken:
1681		switch p.tok.DataAtom {
1682		case a.Td, a.Th:
1683			p.clearStackToContext(tableRowScope)
1684			p.addElement()
1685			p.afe = append(p.afe, &scopeMarker)
1686			p.im = inCellIM
1687			return true
1688		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1689			if p.popUntil(tableScope, a.Tr) {
1690				p.im = inTableBodyIM
1691				return false
1692			}
1693			// Ignore the token.
1694			return true
1695		}
1696	case EndTagToken:
1697		switch p.tok.DataAtom {
1698		case a.Tr:
1699			if p.popUntil(tableScope, a.Tr) {
1700				p.im = inTableBodyIM
1701				return true
1702			}
1703			// Ignore the token.
1704			return true
1705		case a.Table:
1706			if p.popUntil(tableScope, a.Tr) {
1707				p.im = inTableBodyIM
1708				return false
1709			}
1710			// Ignore the token.
1711			return true
1712		case a.Tbody, a.Tfoot, a.Thead:
1713			if p.elementInScope(tableScope, p.tok.DataAtom) {
1714				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1715				return false
1716			}
1717			// Ignore the token.
1718			return true
1719		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1720			// Ignore the token.
1721			return true
1722		}
1723	}
1724
1725	return inTableIM(p)
1726}
1727
1728// Section 12.2.6.4.15.
1729func inCellIM(p *parser) bool {
1730	switch p.tok.Type {
1731	case StartTagToken:
1732		switch p.tok.DataAtom {
1733		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1734			if p.popUntil(tableScope, a.Td, a.Th) {
1735				// Close the cell and reprocess.
1736				p.clearActiveFormattingElements()
1737				p.im = inRowIM
1738				return false
1739			}
1740			// Ignore the token.
1741			return true
1742		case a.Select:
1743			p.reconstructActiveFormattingElements()
1744			p.addElement()
1745			p.framesetOK = false
1746			p.im = inSelectInTableIM
1747			return true
1748		}
1749	case EndTagToken:
1750		switch p.tok.DataAtom {
1751		case a.Td, a.Th:
1752			if !p.popUntil(tableScope, p.tok.DataAtom) {
1753				// Ignore the token.
1754				return true
1755			}
1756			p.clearActiveFormattingElements()
1757			p.im = inRowIM
1758			return true
1759		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1760			// Ignore the token.
1761			return true
1762		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1763			if !p.elementInScope(tableScope, p.tok.DataAtom) {
1764				// Ignore the token.
1765				return true
1766			}
1767			// Close the cell and reprocess.
1768			if p.popUntil(tableScope, a.Td, a.Th) {
1769				p.clearActiveFormattingElements()
1770			}
1771			p.im = inRowIM
1772			return false
1773		}
1774	}
1775	return inBodyIM(p)
1776}
1777
1778// Section 12.2.6.4.16.
1779func inSelectIM(p *parser) bool {
1780	switch p.tok.Type {
1781	case TextToken:
1782		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1783	case StartTagToken:
1784		switch p.tok.DataAtom {
1785		case a.Html:
1786			return inBodyIM(p)
1787		case a.Option:
1788			if p.top().DataAtom == a.Option {
1789				p.oe.pop()
1790			}
1791			p.addElement()
1792		case a.Optgroup:
1793			if p.top().DataAtom == a.Option {
1794				p.oe.pop()
1795			}
1796			if p.top().DataAtom == a.Optgroup {
1797				p.oe.pop()
1798			}
1799			p.addElement()
1800		case a.Select:
1801			if !p.popUntil(selectScope, a.Select) {
1802				// Ignore the token.
1803				return true
1804			}
1805			p.resetInsertionMode()
1806		case a.Input, a.Keygen, a.Textarea:
1807			if p.elementInScope(selectScope, a.Select) {
1808				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1809				return false
1810			}
1811			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
1812			p.tokenizer.NextIsNotRawText()
1813			// Ignore the token.
1814			return true
1815		case a.Script, a.Template:
1816			return inHeadIM(p)
1817		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
1818			// Don't let the tokenizer go into raw text mode when there are raw tags
1819			// to be ignored. These tags should be ignored from the tokenizer
1820			// properly.
1821			p.tokenizer.NextIsNotRawText()
1822			// Ignore the token.
1823			return true
1824		}
1825	case EndTagToken:
1826		switch p.tok.DataAtom {
1827		case a.Option:
1828			if p.top().DataAtom == a.Option {
1829				p.oe.pop()
1830			}
1831		case a.Optgroup:
1832			i := len(p.oe) - 1
1833			if p.oe[i].DataAtom == a.Option {
1834				i--
1835			}
1836			if p.oe[i].DataAtom == a.Optgroup {
1837				p.oe = p.oe[:i]
1838			}
1839		case a.Select:
1840			if !p.popUntil(selectScope, a.Select) {
1841				// Ignore the token.
1842				return true
1843			}
1844			p.resetInsertionMode()
1845		case a.Template:
1846			return inHeadIM(p)
1847		}
1848	case CommentToken:
1849		p.addChild(&Node{
1850			Type: CommentNode,
1851			Data: p.tok.Data,
1852		})
1853	case DoctypeToken:
1854		// Ignore the token.
1855		return true
1856	case ErrorToken:
1857		return inBodyIM(p)
1858	}
1859
1860	return true
1861}
1862
1863// Section 12.2.6.4.17.
1864func inSelectInTableIM(p *parser) bool {
1865	switch p.tok.Type {
1866	case StartTagToken, EndTagToken:
1867		switch p.tok.DataAtom {
1868		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1869			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1870				// Ignore the token.
1871				return true
1872			}
1873			// This is like p.popUntil(selectScope, a.Select), but it also
1874			// matches <math select>, not just <select>. Matching the MathML
1875			// tag is arguably incorrect (conceptually), but it mimics what
1876			// Chromium does.
1877			for i := len(p.oe) - 1; i >= 0; i-- {
1878				if n := p.oe[i]; n.DataAtom == a.Select {
1879					p.oe = p.oe[:i]
1880					break
1881				}
1882			}
1883			p.resetInsertionMode()
1884			return false
1885		}
1886	}
1887	return inSelectIM(p)
1888}
1889
1890// Section 12.2.6.4.18.
1891func inTemplateIM(p *parser) bool {
1892	switch p.tok.Type {
1893	case TextToken, CommentToken, DoctypeToken:
1894		return inBodyIM(p)
1895	case StartTagToken:
1896		switch p.tok.DataAtom {
1897		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1898			return inHeadIM(p)
1899		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1900			p.templateStack.pop()
1901			p.templateStack = append(p.templateStack, inTableIM)
1902			p.im = inTableIM
1903			return false
1904		case a.Col:
1905			p.templateStack.pop()
1906			p.templateStack = append(p.templateStack, inColumnGroupIM)
1907			p.im = inColumnGroupIM
1908			return false
1909		case a.Tr:
1910			p.templateStack.pop()
1911			p.templateStack = append(p.templateStack, inTableBodyIM)
1912			p.im = inTableBodyIM
1913			return false
1914		case a.Td, a.Th:
1915			p.templateStack.pop()
1916			p.templateStack = append(p.templateStack, inRowIM)
1917			p.im = inRowIM
1918			return false
1919		default:
1920			p.templateStack.pop()
1921			p.templateStack = append(p.templateStack, inBodyIM)
1922			p.im = inBodyIM
1923			return false
1924		}
1925	case EndTagToken:
1926		switch p.tok.DataAtom {
1927		case a.Template:
1928			return inHeadIM(p)
1929		default:
1930			// Ignore the token.
1931			return true
1932		}
1933	case ErrorToken:
1934		if !p.oe.contains(a.Template) {
1935			// Ignore the token.
1936			return true
1937		}
1938		// TODO: remove this divergence from the HTML5 spec.
1939		//
1940		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1941		p.generateImpliedEndTags()
1942		for i := len(p.oe) - 1; i >= 0; i-- {
1943			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1944				p.oe = p.oe[:i]
1945				break
1946			}
1947		}
1948		p.clearActiveFormattingElements()
1949		p.templateStack.pop()
1950		p.resetInsertionMode()
1951		return false
1952	}
1953	return false
1954}
1955
1956// Section 12.2.6.4.19.
1957func afterBodyIM(p *parser) bool {
1958	switch p.tok.Type {
1959	case ErrorToken:
1960		// Stop parsing.
1961		return true
1962	case TextToken:
1963		s := strings.TrimLeft(p.tok.Data, whitespace)
1964		if len(s) == 0 {
1965			// It was all whitespace.
1966			return inBodyIM(p)
1967		}
1968	case StartTagToken:
1969		if p.tok.DataAtom == a.Html {
1970			return inBodyIM(p)
1971		}
1972	case EndTagToken:
1973		if p.tok.DataAtom == a.Html {
1974			if !p.fragment {
1975				p.im = afterAfterBodyIM
1976			}
1977			return true
1978		}
1979	case CommentToken:
1980		// The comment is attached to the <html> element.
1981		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1982			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1983		}
1984		p.oe[0].AppendChild(&Node{
1985			Type: CommentNode,
1986			Data: p.tok.Data,
1987		})
1988		return true
1989	}
1990	p.im = inBodyIM
1991	return false
1992}
1993
1994// Section 12.2.6.4.20.
1995func inFramesetIM(p *parser) bool {
1996	switch p.tok.Type {
1997	case CommentToken:
1998		p.addChild(&Node{
1999			Type: CommentNode,
2000			Data: p.tok.Data,
2001		})
2002	case TextToken:
2003		// Ignore all text but whitespace.
2004		s := strings.Map(func(c rune) rune {
2005			switch c {
2006			case ' ', '\t', '\n', '\f', '\r':
2007				return c
2008			}
2009			return -1
2010		}, p.tok.Data)
2011		if s != "" {
2012			p.addText(s)
2013		}
2014	case StartTagToken:
2015		switch p.tok.DataAtom {
2016		case a.Html:
2017			return inBodyIM(p)
2018		case a.Frameset:
2019			p.addElement()
2020		case a.Frame:
2021			p.addElement()
2022			p.oe.pop()
2023			p.acknowledgeSelfClosingTag()
2024		case a.Noframes:
2025			return inHeadIM(p)
2026		}
2027	case EndTagToken:
2028		switch p.tok.DataAtom {
2029		case a.Frameset:
2030			if p.oe.top().DataAtom != a.Html {
2031				p.oe.pop()
2032				if p.oe.top().DataAtom != a.Frameset {
2033					p.im = afterFramesetIM
2034					return true
2035				}
2036			}
2037		}
2038	default:
2039		// Ignore the token.
2040	}
2041	return true
2042}
2043
2044// Section 12.2.6.4.21.
2045func afterFramesetIM(p *parser) bool {
2046	switch p.tok.Type {
2047	case CommentToken:
2048		p.addChild(&Node{
2049			Type: CommentNode,
2050			Data: p.tok.Data,
2051		})
2052	case TextToken:
2053		// Ignore all text but whitespace.
2054		s := strings.Map(func(c rune) rune {
2055			switch c {
2056			case ' ', '\t', '\n', '\f', '\r':
2057				return c
2058			}
2059			return -1
2060		}, p.tok.Data)
2061		if s != "" {
2062			p.addText(s)
2063		}
2064	case StartTagToken:
2065		switch p.tok.DataAtom {
2066		case a.Html:
2067			return inBodyIM(p)
2068		case a.Noframes:
2069			return inHeadIM(p)
2070		}
2071	case EndTagToken:
2072		switch p.tok.DataAtom {
2073		case a.Html:
2074			p.im = afterAfterFramesetIM
2075			return true
2076		}
2077	default:
2078		// Ignore the token.
2079	}
2080	return true
2081}
2082
2083// Section 12.2.6.4.22.
2084func afterAfterBodyIM(p *parser) bool {
2085	switch p.tok.Type {
2086	case ErrorToken:
2087		// Stop parsing.
2088		return true
2089	case TextToken:
2090		s := strings.TrimLeft(p.tok.Data, whitespace)
2091		if len(s) == 0 {
2092			// It was all whitespace.
2093			return inBodyIM(p)
2094		}
2095	case StartTagToken:
2096		if p.tok.DataAtom == a.Html {
2097			return inBodyIM(p)
2098		}
2099	case CommentToken:
2100		p.doc.AppendChild(&Node{
2101			Type: CommentNode,
2102			Data: p.tok.Data,
2103		})
2104		return true
2105	case DoctypeToken:
2106		return inBodyIM(p)
2107	}
2108	p.im = inBodyIM
2109	return false
2110}
2111
2112// Section 12.2.6.4.23.
2113func afterAfterFramesetIM(p *parser) bool {
2114	switch p.tok.Type {
2115	case CommentToken:
2116		p.doc.AppendChild(&Node{
2117			Type: CommentNode,
2118			Data: p.tok.Data,
2119		})
2120	case TextToken:
2121		// Ignore all text but whitespace.
2122		s := strings.Map(func(c rune) rune {
2123			switch c {
2124			case ' ', '\t', '\n', '\f', '\r':
2125				return c
2126			}
2127			return -1
2128		}, p.tok.Data)
2129		if s != "" {
2130			p.tok.Data = s
2131			return inBodyIM(p)
2132		}
2133	case StartTagToken:
2134		switch p.tok.DataAtom {
2135		case a.Html:
2136			return inBodyIM(p)
2137		case a.Noframes:
2138			return inHeadIM(p)
2139		}
2140	case DoctypeToken:
2141		return inBodyIM(p)
2142	default:
2143		// Ignore the token.
2144	}
2145	return true
2146}
2147
2148func ignoreTheRemainingTokens(p *parser) bool {
2149	return true
2150}
2151
2152const whitespaceOrNUL = whitespace + "\x00"
2153
2154// Section 12.2.6.5
2155func parseForeignContent(p *parser) bool {
2156	switch p.tok.Type {
2157	case TextToken:
2158		if p.framesetOK {
2159			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2160		}
2161		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2162		p.addText(p.tok.Data)
2163	case CommentToken:
2164		p.addChild(&Node{
2165			Type: CommentNode,
2166			Data: p.tok.Data,
2167		})
2168	case StartTagToken:
2169		if !p.fragment {
2170			b := breakout[p.tok.Data]
2171			if p.tok.DataAtom == a.Font {
2172			loop:
2173				for _, attr := range p.tok.Attr {
2174					switch attr.Key {
2175					case "color", "face", "size":
2176						b = true
2177						break loop
2178					}
2179				}
2180			}
2181			if b {
2182				for i := len(p.oe) - 1; i >= 0; i-- {
2183					n := p.oe[i]
2184					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2185						p.oe = p.oe[:i+1]
2186						break
2187					}
2188				}
2189				return false
2190			}
2191		}
2192		current := p.adjustedCurrentNode()
2193		switch current.Namespace {
2194		case "math":
2195			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2196		case "svg":
2197			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
2198			// SVG wants e.g. "foreignObject" with a capital second "O".
2199			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2200				p.tok.DataAtom = a.Lookup([]byte(x))
2201				p.tok.Data = x
2202			}
2203			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2204		default:
2205			panic("html: bad parser state: unexpected namespace")
2206		}
2207		adjustForeignAttributes(p.tok.Attr)
2208		namespace := current.Namespace
2209		p.addElement()
2210		p.top().Namespace = namespace
2211		if namespace != "" {
2212			// Don't let the tokenizer go into raw text mode in foreign content
2213			// (e.g. in an SVG <title> tag).
2214			p.tokenizer.NextIsNotRawText()
2215		}
2216		if p.hasSelfClosingToken {
2217			p.oe.pop()
2218			p.acknowledgeSelfClosingTag()
2219		}
2220	case EndTagToken:
2221		for i := len(p.oe) - 1; i >= 0; i-- {
2222			if p.oe[i].Namespace == "" {
2223				return p.im(p)
2224			}
2225			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2226				p.oe = p.oe[:i]
2227				break
2228			}
2229		}
2230		return true
2231	default:
2232		// Ignore the token.
2233	}
2234	return true
2235}
2236
2237// Section 12.2.4.2.
2238func (p *parser) adjustedCurrentNode() *Node {
2239	if len(p.oe) == 1 && p.fragment && p.context != nil {
2240		return p.context
2241	}
2242	return p.oe.top()
2243}
2244
2245// Section 12.2.6.
2246func (p *parser) inForeignContent() bool {
2247	if len(p.oe) == 0 {
2248		return false
2249	}
2250	n := p.adjustedCurrentNode()
2251	if n.Namespace == "" {
2252		return false
2253	}
2254	if mathMLTextIntegrationPoint(n) {
2255		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2256			return false
2257		}
2258		if p.tok.Type == TextToken {
2259			return false
2260		}
2261	}
2262	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2263		return false
2264	}
2265	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2266		return false
2267	}
2268	if p.tok.Type == ErrorToken {
2269		return false
2270	}
2271	return true
2272}
2273
2274// parseImpliedToken parses a token as though it had appeared in the parser's
2275// input.
2276func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2277	realToken, selfClosing := p.tok, p.hasSelfClosingToken
2278	p.tok = Token{
2279		Type:     t,
2280		DataAtom: dataAtom,
2281		Data:     data,
2282	}
2283	p.hasSelfClosingToken = false
2284	p.parseCurrentToken()
2285	p.tok, p.hasSelfClosingToken = realToken, selfClosing
2286}
2287
2288// parseCurrentToken runs the current token through the parsing routines
2289// until it is consumed.
2290func (p *parser) parseCurrentToken() {
2291	if p.tok.Type == SelfClosingTagToken {
2292		p.hasSelfClosingToken = true
2293		p.tok.Type = StartTagToken
2294	}
2295
2296	consumed := false
2297	for !consumed {
2298		if p.inForeignContent() {
2299			consumed = parseForeignContent(p)
2300		} else {
2301			consumed = p.im(p)
2302		}
2303	}
2304
2305	if p.hasSelfClosingToken {
2306		// This is a parse error, but ignore it.
2307		p.hasSelfClosingToken = false
2308	}
2309}
2310
2311func (p *parser) parse() error {
2312	// Iterate until EOF. Any other error will cause an early return.
2313	var err error
2314	for err != io.EOF {
2315		// CDATA sections are allowed only in foreign content.
2316		n := p.oe.top()
2317		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2318		// Read and parse the next token.
2319		p.tokenizer.Next()
2320		p.tok = p.tokenizer.Token()
2321		if p.tok.Type == ErrorToken {
2322			err = p.tokenizer.Err()
2323			if err != nil && err != io.EOF {
2324				return err
2325			}
2326		}
2327		p.parseCurrentToken()
2328	}
2329	return nil
2330}
2331
2332// Parse returns the parse tree for the HTML from the given Reader.
2333//
2334// It implements the HTML5 parsing algorithm
2335// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2336// which is very complicated. The resultant tree can contain implicitly created
2337// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2338// differ from the nesting implied by a naive processing of start and end
2339// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2340// with no corresponding node in the resulting tree.
2341//
2342// The input is assumed to be UTF-8 encoded.
2343func Parse(r io.Reader) (*Node, error) {
2344	return ParseWithOptions(r)
2345}
2346
2347// ParseFragment parses a fragment of HTML and returns the nodes that were
2348// found. If the fragment is the InnerHTML for an existing element, pass that
2349// element in context.
2350//
2351// It has the same intricacies as Parse.
2352func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2353	return ParseFragmentWithOptions(r, context)
2354}
2355
2356// ParseOption configures a parser.
2357type ParseOption func(p *parser)
2358
2359// ParseOptionEnableScripting configures the scripting flag.
2360// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2361//
2362// By default, scripting is enabled.
2363func ParseOptionEnableScripting(enable bool) ParseOption {
2364	return func(p *parser) {
2365		p.scripting = enable
2366	}
2367}
2368
2369// ParseWithOptions is like Parse, with options.
2370func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2371	p := &parser{
2372		tokenizer: NewTokenizer(r),
2373		doc: &Node{
2374			Type: DocumentNode,
2375		},
2376		scripting:  true,
2377		framesetOK: true,
2378		im:         initialIM,
2379	}
2380
2381	for _, f := range opts {
2382		f(p)
2383	}
2384
2385	if err := p.parse(); err != nil {
2386		return nil, err
2387	}
2388	return p.doc, nil
2389}
2390
2391// ParseFragmentWithOptions is like ParseFragment, with options.
2392func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
2393	contextTag := ""
2394	if context != nil {
2395		if context.Type != ElementNode {
2396			return nil, errors.New("html: ParseFragment of non-element Node")
2397		}
2398		// The next check isn't just context.DataAtom.String() == context.Data because
2399		// it is valid to pass an element whose tag isn't a known atom. For example,
2400		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2401		if context.DataAtom != a.Lookup([]byte(context.Data)) {
2402			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2403		}
2404		contextTag = context.DataAtom.String()
2405	}
2406	p := &parser{
2407		doc: &Node{
2408			Type: DocumentNode,
2409		},
2410		scripting: true,
2411		fragment:  true,
2412		context:   context,
2413	}
2414	if context != nil && context.Namespace != "" {
2415		p.tokenizer = NewTokenizer(r)
2416	} else {
2417		p.tokenizer = NewTokenizerFragment(r, contextTag)
2418	}
2419
2420	for _, f := range opts {
2421		f(p)
2422	}
2423
2424	root := &Node{
2425		Type:     ElementNode,
2426		DataAtom: a.Html,
2427		Data:     a.Html.String(),
2428	}
2429	p.doc.AppendChild(root)
2430	p.oe = nodeStack{root}
2431	if context != nil && context.DataAtom == a.Template {
2432		p.templateStack = append(p.templateStack, inTemplateIM)
2433	}
2434	p.resetInsertionMode()
2435
2436	for n := context; n != nil; n = n.Parent {
2437		if n.Type == ElementNode && n.DataAtom == a.Form {
2438			p.form = n
2439			break
2440		}
2441	}
2442
2443	if err := p.parse(); err != nil {
2444		return nil, err
2445	}
2446
2447	parent := p.doc
2448	if context != nil {
2449		parent = root
2450	}
2451
2452	var result []*Node
2453	for c := parent.FirstChild; c != nil; {
2454		next := c.NextSibling
2455		parent.RemoveChild(c)
2456		result = append(result, c)
2457		c = next
2458	}
2459	return result, nil
2460}
2461