1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8	"errors"
9	"fmt"
10	"io"
11	"strings"
12
13	a "golang.org/x/net/html/atom"
14)
15
16// A parser implements the HTML5 parsing algorithm:
17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18type parser struct {
19	// tokenizer provides the tokens for the parser.
20	tokenizer *Tokenizer
21	// tok is the most recently read token.
22	tok Token
23	// Self-closing tags like <hr/> are treated as start tags, except that
24	// hasSelfClosingToken is set while they are being processed.
25	hasSelfClosingToken bool
26	// doc is the document root element.
27	doc *Node
28	// The stack of open elements (section 12.2.4.2) and active formatting
29	// elements (section 12.2.4.3).
30	oe, afe nodeStack
31	// Element pointers (section 12.2.4.4).
32	head, form *Node
33	// Other parsing state flags (section 12.2.4.5).
34	scripting, framesetOK bool
35	// The stack of template insertion modes
36	templateStack insertionModeStack
37	// im is the current insertion mode.
38	im insertionMode
39	// originalIM is the insertion mode to go back to after completing a text
40	// or inTableText insertion mode.
41	originalIM insertionMode
42	// fosterParenting is whether new elements should be inserted according to
43	// the foster parenting rules (section 12.2.6.1).
44	fosterParenting bool
45	// quirks is whether the parser is operating in "quirks mode."
46	quirks bool
47	// fragment is whether the parser is parsing an HTML fragment.
48	fragment bool
49	// context is the context element when parsing an HTML fragment
50	// (section 12.4).
51	context *Node
52}
53
54func (p *parser) top() *Node {
55	if n := p.oe.top(); n != nil {
56		return n
57	}
58	return p.doc
59}
60
61// Stop tags for use in popUntil. These come from section 12.2.4.2.
62var (
63	defaultScopeStopTags = map[string][]a.Atom{
64		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66		"svg":  {a.Desc, a.ForeignObject, a.Title},
67	}
68)
69
70type scope int
71
72const (
73	defaultScope scope = iota
74	listItemScope
75	buttonScope
76	tableScope
77	tableRowScope
78	tableBodyScope
79	selectScope
80)
81
82// popUntil pops the stack of open elements at the highest element whose tag
83// is in matchTags, provided there is no higher element in the scope's stop
84// tags (as defined in section 12.2.4.2). It returns whether or not there was
85// such an element. If there was not, popUntil leaves the stack unchanged.
86//
87// For example, the set of stop tags for table scope is: "html", "table". If
88// the stack was:
89// ["html", "body", "font", "table", "b", "i", "u"]
90// then popUntil(tableScope, "font") would return false, but
91// popUntil(tableScope, "i") would return true and the stack would become:
92// ["html", "body", "font", "table", "b"]
93//
94// If an element's tag is in both the stop tags and matchTags, then the stack
95// will be popped and the function returns true (provided, of course, there was
96// no higher element in the stack that was also in the stop tags). For example,
97// popUntil(tableScope, "table") returns true and leaves:
98// ["html", "body", "font"]
99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101		p.oe = p.oe[:i]
102		return true
103	}
104	return false
105}
106
107// indexOfElementInScope returns the index in p.oe of the highest element whose
108// tag is in matchTags that is in scope. If no matching element is in scope, it
109// returns -1.
110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111	for i := len(p.oe) - 1; i >= 0; i-- {
112		tagAtom := p.oe[i].DataAtom
113		if p.oe[i].Namespace == "" {
114			for _, t := range matchTags {
115				if t == tagAtom {
116					return i
117				}
118			}
119			switch s {
120			case defaultScope:
121				// No-op.
122			case listItemScope:
123				if tagAtom == a.Ol || tagAtom == a.Ul {
124					return -1
125				}
126			case buttonScope:
127				if tagAtom == a.Button {
128					return -1
129				}
130			case tableScope:
131				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
132					return -1
133				}
134			case selectScope:
135				if tagAtom != a.Optgroup && tagAtom != a.Option {
136					return -1
137				}
138			default:
139				panic("unreachable")
140			}
141		}
142		switch s {
143		case defaultScope, listItemScope, buttonScope:
144			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145				if t == tagAtom {
146					return -1
147				}
148			}
149		}
150	}
151	return -1
152}
153
154// elementInScope is like popUntil, except that it doesn't modify the stack of
155// open elements.
156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157	return p.indexOfElementInScope(s, matchTags...) != -1
158}
159
160// clearStackToContext pops elements off the stack of open elements until a
161// scope-defined element is found.
162func (p *parser) clearStackToContext(s scope) {
163	for i := len(p.oe) - 1; i >= 0; i-- {
164		tagAtom := p.oe[i].DataAtom
165		switch s {
166		case tableScope:
167			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
168				p.oe = p.oe[:i+1]
169				return
170			}
171		case tableRowScope:
172			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
173				p.oe = p.oe[:i+1]
174				return
175			}
176		case tableBodyScope:
177			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
178				p.oe = p.oe[:i+1]
179				return
180			}
181		default:
182			panic("unreachable")
183		}
184	}
185}
186
187// parseGenericRawTextElements implements the generic raw text element parsing
188// algorithm defined in 12.2.6.2.
189// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
190// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
191// officially, need to make tokenizer consider both states.
192func (p *parser) parseGenericRawTextElement() {
193	p.addElement()
194	p.originalIM = p.im
195	p.im = textIM
196}
197
198// generateImpliedEndTags pops nodes off the stack of open elements as long as
199// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
200// If exceptions are specified, nodes with that name will not be popped off.
201func (p *parser) generateImpliedEndTags(exceptions ...string) {
202	var i int
203loop:
204	for i = len(p.oe) - 1; i >= 0; i-- {
205		n := p.oe[i]
206		if n.Type != ElementNode {
207			break
208		}
209		switch n.DataAtom {
210		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
211			for _, except := range exceptions {
212				if n.Data == except {
213					break loop
214				}
215			}
216			continue
217		}
218		break
219	}
220
221	p.oe = p.oe[:i+1]
222}
223
224// addChild adds a child node n to the top element, and pushes n onto the stack
225// of open elements if it is an element node.
226func (p *parser) addChild(n *Node) {
227	if p.shouldFosterParent() {
228		p.fosterParent(n)
229	} else {
230		p.top().AppendChild(n)
231	}
232
233	if n.Type == ElementNode {
234		p.oe = append(p.oe, n)
235	}
236}
237
238// shouldFosterParent returns whether the next node to be added should be
239// foster parented.
240func (p *parser) shouldFosterParent() bool {
241	if p.fosterParenting {
242		switch p.top().DataAtom {
243		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
244			return true
245		}
246	}
247	return false
248}
249
250// fosterParent adds a child node according to the foster parenting rules.
251// Section 12.2.6.1, "foster parenting".
252func (p *parser) fosterParent(n *Node) {
253	var table, parent, prev, template *Node
254	var i int
255	for i = len(p.oe) - 1; i >= 0; i-- {
256		if p.oe[i].DataAtom == a.Table {
257			table = p.oe[i]
258			break
259		}
260	}
261
262	var j int
263	for j = len(p.oe) - 1; j >= 0; j-- {
264		if p.oe[j].DataAtom == a.Template {
265			template = p.oe[j]
266			break
267		}
268	}
269
270	if template != nil && (table == nil || j > i) {
271		template.AppendChild(n)
272		return
273	}
274
275	if table == nil {
276		// The foster parent is the html element.
277		parent = p.oe[0]
278	} else {
279		parent = table.Parent
280	}
281	if parent == nil {
282		parent = p.oe[i-1]
283	}
284
285	if table != nil {
286		prev = table.PrevSibling
287	} else {
288		prev = parent.LastChild
289	}
290	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
291		prev.Data += n.Data
292		return
293	}
294
295	parent.InsertBefore(n, table)
296}
297
298// addText adds text to the preceding node if it is a text node, or else it
299// calls addChild with a new text node.
300func (p *parser) addText(text string) {
301	if text == "" {
302		return
303	}
304
305	if p.shouldFosterParent() {
306		p.fosterParent(&Node{
307			Type: TextNode,
308			Data: text,
309		})
310		return
311	}
312
313	t := p.top()
314	if n := t.LastChild; n != nil && n.Type == TextNode {
315		n.Data += text
316		return
317	}
318	p.addChild(&Node{
319		Type: TextNode,
320		Data: text,
321	})
322}
323
324// addElement adds a child element based on the current token.
325func (p *parser) addElement() {
326	p.addChild(&Node{
327		Type:     ElementNode,
328		DataAtom: p.tok.DataAtom,
329		Data:     p.tok.Data,
330		Attr:     p.tok.Attr,
331	})
332}
333
334// Section 12.2.4.3.
335func (p *parser) addFormattingElement() {
336	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
337	p.addElement()
338
339	// Implement the Noah's Ark clause, but with three per family instead of two.
340	identicalElements := 0
341findIdenticalElements:
342	for i := len(p.afe) - 1; i >= 0; i-- {
343		n := p.afe[i]
344		if n.Type == scopeMarkerNode {
345			break
346		}
347		if n.Type != ElementNode {
348			continue
349		}
350		if n.Namespace != "" {
351			continue
352		}
353		if n.DataAtom != tagAtom {
354			continue
355		}
356		if len(n.Attr) != len(attr) {
357			continue
358		}
359	compareAttributes:
360		for _, t0 := range n.Attr {
361			for _, t1 := range attr {
362				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
363					// Found a match for this attribute, continue with the next attribute.
364					continue compareAttributes
365				}
366			}
367			// If we get here, there is no attribute that matches a.
368			// Therefore the element is not identical to the new one.
369			continue findIdenticalElements
370		}
371
372		identicalElements++
373		if identicalElements >= 3 {
374			p.afe.remove(n)
375		}
376	}
377
378	p.afe = append(p.afe, p.top())
379}
380
381// Section 12.2.4.3.
382func (p *parser) clearActiveFormattingElements() {
383	for {
384		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
385			return
386		}
387	}
388}
389
390// Section 12.2.4.3.
391func (p *parser) reconstructActiveFormattingElements() {
392	n := p.afe.top()
393	if n == nil {
394		return
395	}
396	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
397		return
398	}
399	i := len(p.afe) - 1
400	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
401		if i == 0 {
402			i = -1
403			break
404		}
405		i--
406		n = p.afe[i]
407	}
408	for {
409		i++
410		clone := p.afe[i].clone()
411		p.addChild(clone)
412		p.afe[i] = clone
413		if i == len(p.afe)-1 {
414			break
415		}
416	}
417}
418
419// Section 12.2.5.
420func (p *parser) acknowledgeSelfClosingTag() {
421	p.hasSelfClosingToken = false
422}
423
424// An insertion mode (section 12.2.4.1) is the state transition function from
425// a particular state in the HTML5 parser's state machine. It updates the
426// parser's fields depending on parser.tok (where ErrorToken means EOF).
427// It returns whether the token was consumed.
428type insertionMode func(*parser) bool
429
430// setOriginalIM sets the insertion mode to return to after completing a text or
431// inTableText insertion mode.
432// Section 12.2.4.1, "using the rules for".
433func (p *parser) setOriginalIM() {
434	if p.originalIM != nil {
435		panic("html: bad parser state: originalIM was set twice")
436	}
437	p.originalIM = p.im
438}
439
440// Section 12.2.4.1, "reset the insertion mode".
441func (p *parser) resetInsertionMode() {
442	for i := len(p.oe) - 1; i >= 0; i-- {
443		n := p.oe[i]
444		last := i == 0
445		if last && p.context != nil {
446			n = p.context
447		}
448
449		switch n.DataAtom {
450		case a.Select:
451			if !last {
452				for ancestor, first := n, p.oe[0]; ancestor != first; {
453					ancestor = p.oe[p.oe.index(ancestor)-1]
454					switch ancestor.DataAtom {
455					case a.Template:
456						p.im = inSelectIM
457						return
458					case a.Table:
459						p.im = inSelectInTableIM
460						return
461					}
462				}
463			}
464			p.im = inSelectIM
465		case a.Td, a.Th:
466			// TODO: remove this divergence from the HTML5 spec.
467			//
468			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
469			p.im = inCellIM
470		case a.Tr:
471			p.im = inRowIM
472		case a.Tbody, a.Thead, a.Tfoot:
473			p.im = inTableBodyIM
474		case a.Caption:
475			p.im = inCaptionIM
476		case a.Colgroup:
477			p.im = inColumnGroupIM
478		case a.Table:
479			p.im = inTableIM
480		case a.Template:
481			// TODO: remove this divergence from the HTML5 spec.
482			if n.Namespace != "" {
483				continue
484			}
485			p.im = p.templateStack.top()
486		case a.Head:
487			// TODO: remove this divergence from the HTML5 spec.
488			//
489			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
490			p.im = inHeadIM
491		case a.Body:
492			p.im = inBodyIM
493		case a.Frameset:
494			p.im = inFramesetIM
495		case a.Html:
496			if p.head == nil {
497				p.im = beforeHeadIM
498			} else {
499				p.im = afterHeadIM
500			}
501		default:
502			if last {
503				p.im = inBodyIM
504				return
505			}
506			continue
507		}
508		return
509	}
510}
511
512const whitespace = " \t\r\n\f"
513
514// Section 12.2.6.4.1.
515func initialIM(p *parser) bool {
516	switch p.tok.Type {
517	case TextToken:
518		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
519		if len(p.tok.Data) == 0 {
520			// It was all whitespace, so ignore it.
521			return true
522		}
523	case CommentToken:
524		p.doc.AppendChild(&Node{
525			Type: CommentNode,
526			Data: p.tok.Data,
527		})
528		return true
529	case DoctypeToken:
530		n, quirks := parseDoctype(p.tok.Data)
531		p.doc.AppendChild(n)
532		p.quirks = quirks
533		p.im = beforeHTMLIM
534		return true
535	}
536	p.quirks = true
537	p.im = beforeHTMLIM
538	return false
539}
540
541// Section 12.2.6.4.2.
542func beforeHTMLIM(p *parser) bool {
543	switch p.tok.Type {
544	case DoctypeToken:
545		// Ignore the token.
546		return true
547	case TextToken:
548		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
549		if len(p.tok.Data) == 0 {
550			// It was all whitespace, so ignore it.
551			return true
552		}
553	case StartTagToken:
554		if p.tok.DataAtom == a.Html {
555			p.addElement()
556			p.im = beforeHeadIM
557			return true
558		}
559	case EndTagToken:
560		switch p.tok.DataAtom {
561		case a.Head, a.Body, a.Html, a.Br:
562			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
563			return false
564		default:
565			// Ignore the token.
566			return true
567		}
568	case CommentToken:
569		p.doc.AppendChild(&Node{
570			Type: CommentNode,
571			Data: p.tok.Data,
572		})
573		return true
574	}
575	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
576	return false
577}
578
579// Section 12.2.6.4.3.
580func beforeHeadIM(p *parser) bool {
581	switch p.tok.Type {
582	case TextToken:
583		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
584		if len(p.tok.Data) == 0 {
585			// It was all whitespace, so ignore it.
586			return true
587		}
588	case StartTagToken:
589		switch p.tok.DataAtom {
590		case a.Head:
591			p.addElement()
592			p.head = p.top()
593			p.im = inHeadIM
594			return true
595		case a.Html:
596			return inBodyIM(p)
597		}
598	case EndTagToken:
599		switch p.tok.DataAtom {
600		case a.Head, a.Body, a.Html, a.Br:
601			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
602			return false
603		default:
604			// Ignore the token.
605			return true
606		}
607	case CommentToken:
608		p.addChild(&Node{
609			Type: CommentNode,
610			Data: p.tok.Data,
611		})
612		return true
613	case DoctypeToken:
614		// Ignore the token.
615		return true
616	}
617
618	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
619	return false
620}
621
622// Section 12.2.6.4.4.
623func inHeadIM(p *parser) bool {
624	switch p.tok.Type {
625	case TextToken:
626		s := strings.TrimLeft(p.tok.Data, whitespace)
627		if len(s) < len(p.tok.Data) {
628			// Add the initial whitespace to the current node.
629			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
630			if s == "" {
631				return true
632			}
633			p.tok.Data = s
634		}
635	case StartTagToken:
636		switch p.tok.DataAtom {
637		case a.Html:
638			return inBodyIM(p)
639		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
640			p.addElement()
641			p.oe.pop()
642			p.acknowledgeSelfClosingTag()
643			return true
644		case a.Noscript:
645			if p.scripting {
646				p.parseGenericRawTextElement()
647				return true
648			}
649			p.addElement()
650			p.im = inHeadNoscriptIM
651			// Don't let the tokenizer go into raw text mode when scripting is disabled.
652			p.tokenizer.NextIsNotRawText()
653			return true
654		case a.Script, a.Title:
655			p.addElement()
656			p.setOriginalIM()
657			p.im = textIM
658			return true
659		case a.Noframes, a.Style:
660			p.parseGenericRawTextElement()
661			return true
662		case a.Head:
663			// Ignore the token.
664			return true
665		case a.Template:
666			p.addElement()
667			p.afe = append(p.afe, &scopeMarker)
668			p.framesetOK = false
669			p.im = inTemplateIM
670			p.templateStack = append(p.templateStack, inTemplateIM)
671			return true
672		}
673	case EndTagToken:
674		switch p.tok.DataAtom {
675		case a.Head:
676			p.oe.pop()
677			p.im = afterHeadIM
678			return true
679		case a.Body, a.Html, a.Br:
680			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
681			return false
682		case a.Template:
683			if !p.oe.contains(a.Template) {
684				return true
685			}
686			// TODO: remove this divergence from the HTML5 spec.
687			//
688			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
689			p.generateImpliedEndTags()
690			for i := len(p.oe) - 1; i >= 0; i-- {
691				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
692					p.oe = p.oe[:i]
693					break
694				}
695			}
696			p.clearActiveFormattingElements()
697			p.templateStack.pop()
698			p.resetInsertionMode()
699			return true
700		default:
701			// Ignore the token.
702			return true
703		}
704	case CommentToken:
705		p.addChild(&Node{
706			Type: CommentNode,
707			Data: p.tok.Data,
708		})
709		return true
710	case DoctypeToken:
711		// Ignore the token.
712		return true
713	}
714
715	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
716	return false
717}
718
719// 12.2.6.4.5.
720func inHeadNoscriptIM(p *parser) bool {
721	switch p.tok.Type {
722	case DoctypeToken:
723		// Ignore the token.
724		return true
725	case StartTagToken:
726		switch p.tok.DataAtom {
727		case a.Html:
728			return inBodyIM(p)
729		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
730			return inHeadIM(p)
731		case a.Head, a.Noscript:
732			// Ignore the token.
733			return true
734		}
735	case EndTagToken:
736		switch p.tok.DataAtom {
737		case a.Noscript, a.Br:
738		default:
739			// Ignore the token.
740			return true
741		}
742	case TextToken:
743		s := strings.TrimLeft(p.tok.Data, whitespace)
744		if len(s) == 0 {
745			// It was all whitespace.
746			return inHeadIM(p)
747		}
748	case CommentToken:
749		return inHeadIM(p)
750	}
751	p.oe.pop()
752	if p.top().DataAtom != a.Head {
753		panic("html: the new current node will be a head element.")
754	}
755	p.im = inHeadIM
756	if p.tok.DataAtom == a.Noscript {
757		return true
758	}
759	return false
760}
761
762// Section 12.2.6.4.6.
763func afterHeadIM(p *parser) bool {
764	switch p.tok.Type {
765	case TextToken:
766		s := strings.TrimLeft(p.tok.Data, whitespace)
767		if len(s) < len(p.tok.Data) {
768			// Add the initial whitespace to the current node.
769			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
770			if s == "" {
771				return true
772			}
773			p.tok.Data = s
774		}
775	case StartTagToken:
776		switch p.tok.DataAtom {
777		case a.Html:
778			return inBodyIM(p)
779		case a.Body:
780			p.addElement()
781			p.framesetOK = false
782			p.im = inBodyIM
783			return true
784		case a.Frameset:
785			p.addElement()
786			p.im = inFramesetIM
787			return true
788		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
789			p.oe = append(p.oe, p.head)
790			defer p.oe.remove(p.head)
791			return inHeadIM(p)
792		case a.Head:
793			// Ignore the token.
794			return true
795		}
796	case EndTagToken:
797		switch p.tok.DataAtom {
798		case a.Body, a.Html, a.Br:
799			// Drop down to creating an implied <body> tag.
800		case a.Template:
801			return inHeadIM(p)
802		default:
803			// Ignore the token.
804			return true
805		}
806	case CommentToken:
807		p.addChild(&Node{
808			Type: CommentNode,
809			Data: p.tok.Data,
810		})
811		return true
812	case DoctypeToken:
813		// Ignore the token.
814		return true
815	}
816
817	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
818	p.framesetOK = true
819	return false
820}
821
822// copyAttributes copies attributes of src not found on dst to dst.
823func copyAttributes(dst *Node, src Token) {
824	if len(src.Attr) == 0 {
825		return
826	}
827	attr := map[string]string{}
828	for _, t := range dst.Attr {
829		attr[t.Key] = t.Val
830	}
831	for _, t := range src.Attr {
832		if _, ok := attr[t.Key]; !ok {
833			dst.Attr = append(dst.Attr, t)
834			attr[t.Key] = t.Val
835		}
836	}
837}
838
839// Section 12.2.6.4.7.
840func inBodyIM(p *parser) bool {
841	switch p.tok.Type {
842	case TextToken:
843		d := p.tok.Data
844		switch n := p.oe.top(); n.DataAtom {
845		case a.Pre, a.Listing:
846			if n.FirstChild == nil {
847				// Ignore a newline at the start of a <pre> block.
848				if d != "" && d[0] == '\r' {
849					d = d[1:]
850				}
851				if d != "" && d[0] == '\n' {
852					d = d[1:]
853				}
854			}
855		}
856		d = strings.Replace(d, "\x00", "", -1)
857		if d == "" {
858			return true
859		}
860		p.reconstructActiveFormattingElements()
861		p.addText(d)
862		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
863			// There were non-whitespace characters inserted.
864			p.framesetOK = false
865		}
866	case StartTagToken:
867		switch p.tok.DataAtom {
868		case a.Html:
869			if p.oe.contains(a.Template) {
870				return true
871			}
872			copyAttributes(p.oe[0], p.tok)
873		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
874			return inHeadIM(p)
875		case a.Body:
876			if p.oe.contains(a.Template) {
877				return true
878			}
879			if len(p.oe) >= 2 {
880				body := p.oe[1]
881				if body.Type == ElementNode && body.DataAtom == a.Body {
882					p.framesetOK = false
883					copyAttributes(body, p.tok)
884				}
885			}
886		case a.Frameset:
887			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
888				// Ignore the token.
889				return true
890			}
891			body := p.oe[1]
892			if body.Parent != nil {
893				body.Parent.RemoveChild(body)
894			}
895			p.oe = p.oe[:1]
896			p.addElement()
897			p.im = inFramesetIM
898			return true
899		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
900			p.popUntil(buttonScope, a.P)
901			p.addElement()
902		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
903			p.popUntil(buttonScope, a.P)
904			switch n := p.top(); n.DataAtom {
905			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
906				p.oe.pop()
907			}
908			p.addElement()
909		case a.Pre, a.Listing:
910			p.popUntil(buttonScope, a.P)
911			p.addElement()
912			// The newline, if any, will be dealt with by the TextToken case.
913			p.framesetOK = false
914		case a.Form:
915			if p.form != nil && !p.oe.contains(a.Template) {
916				// Ignore the token
917				return true
918			}
919			p.popUntil(buttonScope, a.P)
920			p.addElement()
921			if !p.oe.contains(a.Template) {
922				p.form = p.top()
923			}
924		case a.Li:
925			p.framesetOK = false
926			for i := len(p.oe) - 1; i >= 0; i-- {
927				node := p.oe[i]
928				switch node.DataAtom {
929				case a.Li:
930					p.oe = p.oe[:i]
931				case a.Address, a.Div, a.P:
932					continue
933				default:
934					if !isSpecialElement(node) {
935						continue
936					}
937				}
938				break
939			}
940			p.popUntil(buttonScope, a.P)
941			p.addElement()
942		case a.Dd, a.Dt:
943			p.framesetOK = false
944			for i := len(p.oe) - 1; i >= 0; i-- {
945				node := p.oe[i]
946				switch node.DataAtom {
947				case a.Dd, a.Dt:
948					p.oe = p.oe[:i]
949				case a.Address, a.Div, a.P:
950					continue
951				default:
952					if !isSpecialElement(node) {
953						continue
954					}
955				}
956				break
957			}
958			p.popUntil(buttonScope, a.P)
959			p.addElement()
960		case a.Plaintext:
961			p.popUntil(buttonScope, a.P)
962			p.addElement()
963		case a.Button:
964			p.popUntil(defaultScope, a.Button)
965			p.reconstructActiveFormattingElements()
966			p.addElement()
967			p.framesetOK = false
968		case a.A:
969			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
970				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
971					p.inBodyEndTagFormatting(a.A, "a")
972					p.oe.remove(n)
973					p.afe.remove(n)
974					break
975				}
976			}
977			p.reconstructActiveFormattingElements()
978			p.addFormattingElement()
979		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
980			p.reconstructActiveFormattingElements()
981			p.addFormattingElement()
982		case a.Nobr:
983			p.reconstructActiveFormattingElements()
984			if p.elementInScope(defaultScope, a.Nobr) {
985				p.inBodyEndTagFormatting(a.Nobr, "nobr")
986				p.reconstructActiveFormattingElements()
987			}
988			p.addFormattingElement()
989		case a.Applet, a.Marquee, a.Object:
990			p.reconstructActiveFormattingElements()
991			p.addElement()
992			p.afe = append(p.afe, &scopeMarker)
993			p.framesetOK = false
994		case a.Table:
995			if !p.quirks {
996				p.popUntil(buttonScope, a.P)
997			}
998			p.addElement()
999			p.framesetOK = false
1000			p.im = inTableIM
1001			return true
1002		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1003			p.reconstructActiveFormattingElements()
1004			p.addElement()
1005			p.oe.pop()
1006			p.acknowledgeSelfClosingTag()
1007			if p.tok.DataAtom == a.Input {
1008				for _, t := range p.tok.Attr {
1009					if t.Key == "type" {
1010						if strings.ToLower(t.Val) == "hidden" {
1011							// Skip setting framesetOK = false
1012							return true
1013						}
1014					}
1015				}
1016			}
1017			p.framesetOK = false
1018		case a.Param, a.Source, a.Track:
1019			p.addElement()
1020			p.oe.pop()
1021			p.acknowledgeSelfClosingTag()
1022		case a.Hr:
1023			p.popUntil(buttonScope, a.P)
1024			p.addElement()
1025			p.oe.pop()
1026			p.acknowledgeSelfClosingTag()
1027			p.framesetOK = false
1028		case a.Image:
1029			p.tok.DataAtom = a.Img
1030			p.tok.Data = a.Img.String()
1031			return false
1032		case a.Textarea:
1033			p.addElement()
1034			p.setOriginalIM()
1035			p.framesetOK = false
1036			p.im = textIM
1037		case a.Xmp:
1038			p.popUntil(buttonScope, a.P)
1039			p.reconstructActiveFormattingElements()
1040			p.framesetOK = false
1041			p.parseGenericRawTextElement()
1042		case a.Iframe:
1043			p.framesetOK = false
1044			p.parseGenericRawTextElement()
1045		case a.Noembed:
1046			p.parseGenericRawTextElement()
1047		case a.Noscript:
1048			if p.scripting {
1049				p.parseGenericRawTextElement()
1050				return true
1051			}
1052			p.reconstructActiveFormattingElements()
1053			p.addElement()
1054			// Don't let the tokenizer go into raw text mode when scripting is disabled.
1055			p.tokenizer.NextIsNotRawText()
1056		case a.Select:
1057			p.reconstructActiveFormattingElements()
1058			p.addElement()
1059			p.framesetOK = false
1060			p.im = inSelectIM
1061			return true
1062		case a.Optgroup, a.Option:
1063			if p.top().DataAtom == a.Option {
1064				p.oe.pop()
1065			}
1066			p.reconstructActiveFormattingElements()
1067			p.addElement()
1068		case a.Rb, a.Rtc:
1069			if p.elementInScope(defaultScope, a.Ruby) {
1070				p.generateImpliedEndTags()
1071			}
1072			p.addElement()
1073		case a.Rp, a.Rt:
1074			if p.elementInScope(defaultScope, a.Ruby) {
1075				p.generateImpliedEndTags("rtc")
1076			}
1077			p.addElement()
1078		case a.Math, a.Svg:
1079			p.reconstructActiveFormattingElements()
1080			if p.tok.DataAtom == a.Math {
1081				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1082			} else {
1083				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1084			}
1085			adjustForeignAttributes(p.tok.Attr)
1086			p.addElement()
1087			p.top().Namespace = p.tok.Data
1088			if p.hasSelfClosingToken {
1089				p.oe.pop()
1090				p.acknowledgeSelfClosingTag()
1091			}
1092			return true
1093		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1094			// Ignore the token.
1095		default:
1096			p.reconstructActiveFormattingElements()
1097			p.addElement()
1098		}
1099	case EndTagToken:
1100		switch p.tok.DataAtom {
1101		case a.Body:
1102			if p.elementInScope(defaultScope, a.Body) {
1103				p.im = afterBodyIM
1104			}
1105		case a.Html:
1106			if p.elementInScope(defaultScope, a.Body) {
1107				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1108				return false
1109			}
1110			return true
1111		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1112			p.popUntil(defaultScope, p.tok.DataAtom)
1113		case a.Form:
1114			if p.oe.contains(a.Template) {
1115				i := p.indexOfElementInScope(defaultScope, a.Form)
1116				if i == -1 {
1117					// Ignore the token.
1118					return true
1119				}
1120				p.generateImpliedEndTags()
1121				if p.oe[i].DataAtom != a.Form {
1122					// Ignore the token.
1123					return true
1124				}
1125				p.popUntil(defaultScope, a.Form)
1126			} else {
1127				node := p.form
1128				p.form = nil
1129				i := p.indexOfElementInScope(defaultScope, a.Form)
1130				if node == nil || i == -1 || p.oe[i] != node {
1131					// Ignore the token.
1132					return true
1133				}
1134				p.generateImpliedEndTags()
1135				p.oe.remove(node)
1136			}
1137		case a.P:
1138			if !p.elementInScope(buttonScope, a.P) {
1139				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1140			}
1141			p.popUntil(buttonScope, a.P)
1142		case a.Li:
1143			p.popUntil(listItemScope, a.Li)
1144		case a.Dd, a.Dt:
1145			p.popUntil(defaultScope, p.tok.DataAtom)
1146		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1147			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1148		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1149			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1150		case a.Applet, a.Marquee, a.Object:
1151			if p.popUntil(defaultScope, p.tok.DataAtom) {
1152				p.clearActiveFormattingElements()
1153			}
1154		case a.Br:
1155			p.tok.Type = StartTagToken
1156			return false
1157		case a.Template:
1158			return inHeadIM(p)
1159		default:
1160			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1161		}
1162	case CommentToken:
1163		p.addChild(&Node{
1164			Type: CommentNode,
1165			Data: p.tok.Data,
1166		})
1167	case ErrorToken:
1168		// TODO: remove this divergence from the HTML5 spec.
1169		if len(p.templateStack) > 0 {
1170			p.im = inTemplateIM
1171			return false
1172		}
1173		for _, e := range p.oe {
1174			switch e.DataAtom {
1175			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1176				a.Thead, a.Tr, a.Body, a.Html:
1177			default:
1178				return true
1179			}
1180		}
1181	}
1182
1183	return true
1184}
1185
1186func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1187	// This is the "adoption agency" algorithm, described at
1188	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1189
1190	// TODO: this is a fairly literal line-by-line translation of that algorithm.
1191	// Once the code successfully parses the comprehensive test suite, we should
1192	// refactor this code to be more idiomatic.
1193
1194	// Steps 1-2
1195	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1196		p.oe.pop()
1197		return
1198	}
1199
1200	// Steps 3-5. The outer loop.
1201	for i := 0; i < 8; i++ {
1202		// Step 6. Find the formatting element.
1203		var formattingElement *Node
1204		for j := len(p.afe) - 1; j >= 0; j-- {
1205			if p.afe[j].Type == scopeMarkerNode {
1206				break
1207			}
1208			if p.afe[j].DataAtom == tagAtom {
1209				formattingElement = p.afe[j]
1210				break
1211			}
1212		}
1213		if formattingElement == nil {
1214			p.inBodyEndTagOther(tagAtom, tagName)
1215			return
1216		}
1217
1218		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
1219		feIndex := p.oe.index(formattingElement)
1220		if feIndex == -1 {
1221			p.afe.remove(formattingElement)
1222			return
1223		}
1224		// Step 8. Ignore the tag if formatting element is not in the scope.
1225		if !p.elementInScope(defaultScope, tagAtom) {
1226			// Ignore the tag.
1227			return
1228		}
1229
1230		// Step 9. This step is omitted because it's just a parse error but no need to return.
1231
1232		// Steps 10-11. Find the furthest block.
1233		var furthestBlock *Node
1234		for _, e := range p.oe[feIndex:] {
1235			if isSpecialElement(e) {
1236				furthestBlock = e
1237				break
1238			}
1239		}
1240		if furthestBlock == nil {
1241			e := p.oe.pop()
1242			for e != formattingElement {
1243				e = p.oe.pop()
1244			}
1245			p.afe.remove(e)
1246			return
1247		}
1248
1249		// Steps 12-13. Find the common ancestor and bookmark node.
1250		commonAncestor := p.oe[feIndex-1]
1251		bookmark := p.afe.index(formattingElement)
1252
1253		// Step 14. The inner loop. Find the lastNode to reparent.
1254		lastNode := furthestBlock
1255		node := furthestBlock
1256		x := p.oe.index(node)
1257		// Step 14.1.
1258		j := 0
1259		for {
1260			// Step 14.2.
1261			j++
1262			// Step. 14.3.
1263			x--
1264			node = p.oe[x]
1265			// Step 14.4. Go to the next step if node is formatting element.
1266			if node == formattingElement {
1267				break
1268			}
1269			// Step 14.5. Remove node from the list of active formatting elements if
1270			// inner loop counter is greater than three and node is in the list of
1271			// active formatting elements.
1272			if ni := p.afe.index(node); j > 3 && ni > -1 {
1273				p.afe.remove(node)
1274				// If any element of the list of active formatting elements is removed,
1275				// we need to take care whether bookmark should be decremented or not.
1276				// This is because the value of bookmark may exceed the size of the
1277				// list by removing elements from the list.
1278				if ni <= bookmark {
1279					bookmark--
1280				}
1281				continue
1282			}
1283			// Step 14.6. Continue the next inner loop if node is not in the list of
1284			// active formatting elements.
1285			if p.afe.index(node) == -1 {
1286				p.oe.remove(node)
1287				continue
1288			}
1289			// Step 14.7.
1290			clone := node.clone()
1291			p.afe[p.afe.index(node)] = clone
1292			p.oe[p.oe.index(node)] = clone
1293			node = clone
1294			// Step 14.8.
1295			if lastNode == furthestBlock {
1296				bookmark = p.afe.index(node) + 1
1297			}
1298			// Step 14.9.
1299			if lastNode.Parent != nil {
1300				lastNode.Parent.RemoveChild(lastNode)
1301			}
1302			node.AppendChild(lastNode)
1303			// Step 14.10.
1304			lastNode = node
1305		}
1306
1307		// Step 15. Reparent lastNode to the common ancestor,
1308		// or for misnested table nodes, to the foster parent.
1309		if lastNode.Parent != nil {
1310			lastNode.Parent.RemoveChild(lastNode)
1311		}
1312		switch commonAncestor.DataAtom {
1313		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1314			p.fosterParent(lastNode)
1315		default:
1316			commonAncestor.AppendChild(lastNode)
1317		}
1318
1319		// Steps 16-18. Reparent nodes from the furthest block's children
1320		// to a clone of the formatting element.
1321		clone := formattingElement.clone()
1322		reparentChildren(clone, furthestBlock)
1323		furthestBlock.AppendChild(clone)
1324
1325		// Step 19. Fix up the list of active formatting elements.
1326		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1327			// Move the bookmark with the rest of the list.
1328			bookmark--
1329		}
1330		p.afe.remove(formattingElement)
1331		p.afe.insert(bookmark, clone)
1332
1333		// Step 20. Fix up the stack of open elements.
1334		p.oe.remove(formattingElement)
1335		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1336	}
1337}
1338
1339// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1340// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1341// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1342func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1343	for i := len(p.oe) - 1; i >= 0; i-- {
1344		// Two element nodes have the same tag if they have the same Data (a
1345		// string-typed field). As an optimization, for common HTML tags, each
1346		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
1347		// field), since integer comparison is faster than string comparison.
1348		// Uncommon (custom) tags get a zero DataAtom.
1349		//
1350		// The if condition here is equivalent to (p.oe[i].Data == tagName).
1351		if (p.oe[i].DataAtom == tagAtom) &&
1352			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
1353			p.oe = p.oe[:i]
1354			break
1355		}
1356		if isSpecialElement(p.oe[i]) {
1357			break
1358		}
1359	}
1360}
1361
1362// Section 12.2.6.4.8.
1363func textIM(p *parser) bool {
1364	switch p.tok.Type {
1365	case ErrorToken:
1366		p.oe.pop()
1367	case TextToken:
1368		d := p.tok.Data
1369		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1370			// Ignore a newline at the start of a <textarea> block.
1371			if d != "" && d[0] == '\r' {
1372				d = d[1:]
1373			}
1374			if d != "" && d[0] == '\n' {
1375				d = d[1:]
1376			}
1377		}
1378		if d == "" {
1379			return true
1380		}
1381		p.addText(d)
1382		return true
1383	case EndTagToken:
1384		p.oe.pop()
1385	}
1386	p.im = p.originalIM
1387	p.originalIM = nil
1388	return p.tok.Type == EndTagToken
1389}
1390
1391// Section 12.2.6.4.9.
1392func inTableIM(p *parser) bool {
1393	switch p.tok.Type {
1394	case TextToken:
1395		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1396		switch p.oe.top().DataAtom {
1397		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1398			if strings.Trim(p.tok.Data, whitespace) == "" {
1399				p.addText(p.tok.Data)
1400				return true
1401			}
1402		}
1403	case StartTagToken:
1404		switch p.tok.DataAtom {
1405		case a.Caption:
1406			p.clearStackToContext(tableScope)
1407			p.afe = append(p.afe, &scopeMarker)
1408			p.addElement()
1409			p.im = inCaptionIM
1410			return true
1411		case a.Colgroup:
1412			p.clearStackToContext(tableScope)
1413			p.addElement()
1414			p.im = inColumnGroupIM
1415			return true
1416		case a.Col:
1417			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1418			return false
1419		case a.Tbody, a.Tfoot, a.Thead:
1420			p.clearStackToContext(tableScope)
1421			p.addElement()
1422			p.im = inTableBodyIM
1423			return true
1424		case a.Td, a.Th, a.Tr:
1425			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1426			return false
1427		case a.Table:
1428			if p.popUntil(tableScope, a.Table) {
1429				p.resetInsertionMode()
1430				return false
1431			}
1432			// Ignore the token.
1433			return true
1434		case a.Style, a.Script, a.Template:
1435			return inHeadIM(p)
1436		case a.Input:
1437			for _, t := range p.tok.Attr {
1438				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1439					p.addElement()
1440					p.oe.pop()
1441					return true
1442				}
1443			}
1444			// Otherwise drop down to the default action.
1445		case a.Form:
1446			if p.oe.contains(a.Template) || p.form != nil {
1447				// Ignore the token.
1448				return true
1449			}
1450			p.addElement()
1451			p.form = p.oe.pop()
1452		case a.Select:
1453			p.reconstructActiveFormattingElements()
1454			switch p.top().DataAtom {
1455			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1456				p.fosterParenting = true
1457			}
1458			p.addElement()
1459			p.fosterParenting = false
1460			p.framesetOK = false
1461			p.im = inSelectInTableIM
1462			return true
1463		}
1464	case EndTagToken:
1465		switch p.tok.DataAtom {
1466		case a.Table:
1467			if p.popUntil(tableScope, a.Table) {
1468				p.resetInsertionMode()
1469				return true
1470			}
1471			// Ignore the token.
1472			return true
1473		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1474			// Ignore the token.
1475			return true
1476		case a.Template:
1477			return inHeadIM(p)
1478		}
1479	case CommentToken:
1480		p.addChild(&Node{
1481			Type: CommentNode,
1482			Data: p.tok.Data,
1483		})
1484		return true
1485	case DoctypeToken:
1486		// Ignore the token.
1487		return true
1488	case ErrorToken:
1489		return inBodyIM(p)
1490	}
1491
1492	p.fosterParenting = true
1493	defer func() { p.fosterParenting = false }()
1494
1495	return inBodyIM(p)
1496}
1497
1498// Section 12.2.6.4.11.
1499func inCaptionIM(p *parser) bool {
1500	switch p.tok.Type {
1501	case StartTagToken:
1502		switch p.tok.DataAtom {
1503		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1504			if !p.popUntil(tableScope, a.Caption) {
1505				// Ignore the token.
1506				return true
1507			}
1508			p.clearActiveFormattingElements()
1509			p.im = inTableIM
1510			return false
1511		case a.Select:
1512			p.reconstructActiveFormattingElements()
1513			p.addElement()
1514			p.framesetOK = false
1515			p.im = inSelectInTableIM
1516			return true
1517		}
1518	case EndTagToken:
1519		switch p.tok.DataAtom {
1520		case a.Caption:
1521			if p.popUntil(tableScope, a.Caption) {
1522				p.clearActiveFormattingElements()
1523				p.im = inTableIM
1524			}
1525			return true
1526		case a.Table:
1527			if !p.popUntil(tableScope, a.Caption) {
1528				// Ignore the token.
1529				return true
1530			}
1531			p.clearActiveFormattingElements()
1532			p.im = inTableIM
1533			return false
1534		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1535			// Ignore the token.
1536			return true
1537		}
1538	}
1539	return inBodyIM(p)
1540}
1541
1542// Section 12.2.6.4.12.
1543func inColumnGroupIM(p *parser) bool {
1544	switch p.tok.Type {
1545	case TextToken:
1546		s := strings.TrimLeft(p.tok.Data, whitespace)
1547		if len(s) < len(p.tok.Data) {
1548			// Add the initial whitespace to the current node.
1549			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1550			if s == "" {
1551				return true
1552			}
1553			p.tok.Data = s
1554		}
1555	case CommentToken:
1556		p.addChild(&Node{
1557			Type: CommentNode,
1558			Data: p.tok.Data,
1559		})
1560		return true
1561	case DoctypeToken:
1562		// Ignore the token.
1563		return true
1564	case StartTagToken:
1565		switch p.tok.DataAtom {
1566		case a.Html:
1567			return inBodyIM(p)
1568		case a.Col:
1569			p.addElement()
1570			p.oe.pop()
1571			p.acknowledgeSelfClosingTag()
1572			return true
1573		case a.Template:
1574			return inHeadIM(p)
1575		}
1576	case EndTagToken:
1577		switch p.tok.DataAtom {
1578		case a.Colgroup:
1579			if p.oe.top().DataAtom == a.Colgroup {
1580				p.oe.pop()
1581				p.im = inTableIM
1582			}
1583			return true
1584		case a.Col:
1585			// Ignore the token.
1586			return true
1587		case a.Template:
1588			return inHeadIM(p)
1589		}
1590	case ErrorToken:
1591		return inBodyIM(p)
1592	}
1593	if p.oe.top().DataAtom != a.Colgroup {
1594		return true
1595	}
1596	p.oe.pop()
1597	p.im = inTableIM
1598	return false
1599}
1600
1601// Section 12.2.6.4.13.
1602func inTableBodyIM(p *parser) bool {
1603	switch p.tok.Type {
1604	case StartTagToken:
1605		switch p.tok.DataAtom {
1606		case a.Tr:
1607			p.clearStackToContext(tableBodyScope)
1608			p.addElement()
1609			p.im = inRowIM
1610			return true
1611		case a.Td, a.Th:
1612			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1613			return false
1614		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1615			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1616				p.im = inTableIM
1617				return false
1618			}
1619			// Ignore the token.
1620			return true
1621		}
1622	case EndTagToken:
1623		switch p.tok.DataAtom {
1624		case a.Tbody, a.Tfoot, a.Thead:
1625			if p.elementInScope(tableScope, p.tok.DataAtom) {
1626				p.clearStackToContext(tableBodyScope)
1627				p.oe.pop()
1628				p.im = inTableIM
1629			}
1630			return true
1631		case a.Table:
1632			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1633				p.im = inTableIM
1634				return false
1635			}
1636			// Ignore the token.
1637			return true
1638		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1639			// Ignore the token.
1640			return true
1641		}
1642	case CommentToken:
1643		p.addChild(&Node{
1644			Type: CommentNode,
1645			Data: p.tok.Data,
1646		})
1647		return true
1648	}
1649
1650	return inTableIM(p)
1651}
1652
1653// Section 12.2.6.4.14.
1654func inRowIM(p *parser) bool {
1655	switch p.tok.Type {
1656	case StartTagToken:
1657		switch p.tok.DataAtom {
1658		case a.Td, a.Th:
1659			p.clearStackToContext(tableRowScope)
1660			p.addElement()
1661			p.afe = append(p.afe, &scopeMarker)
1662			p.im = inCellIM
1663			return true
1664		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1665			if p.popUntil(tableScope, a.Tr) {
1666				p.im = inTableBodyIM
1667				return false
1668			}
1669			// Ignore the token.
1670			return true
1671		}
1672	case EndTagToken:
1673		switch p.tok.DataAtom {
1674		case a.Tr:
1675			if p.popUntil(tableScope, a.Tr) {
1676				p.im = inTableBodyIM
1677				return true
1678			}
1679			// Ignore the token.
1680			return true
1681		case a.Table:
1682			if p.popUntil(tableScope, a.Tr) {
1683				p.im = inTableBodyIM
1684				return false
1685			}
1686			// Ignore the token.
1687			return true
1688		case a.Tbody, a.Tfoot, a.Thead:
1689			if p.elementInScope(tableScope, p.tok.DataAtom) {
1690				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1691				return false
1692			}
1693			// Ignore the token.
1694			return true
1695		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1696			// Ignore the token.
1697			return true
1698		}
1699	}
1700
1701	return inTableIM(p)
1702}
1703
1704// Section 12.2.6.4.15.
1705func inCellIM(p *parser) bool {
1706	switch p.tok.Type {
1707	case StartTagToken:
1708		switch p.tok.DataAtom {
1709		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1710			if p.popUntil(tableScope, a.Td, a.Th) {
1711				// Close the cell and reprocess.
1712				p.clearActiveFormattingElements()
1713				p.im = inRowIM
1714				return false
1715			}
1716			// Ignore the token.
1717			return true
1718		case a.Select:
1719			p.reconstructActiveFormattingElements()
1720			p.addElement()
1721			p.framesetOK = false
1722			p.im = inSelectInTableIM
1723			return true
1724		}
1725	case EndTagToken:
1726		switch p.tok.DataAtom {
1727		case a.Td, a.Th:
1728			if !p.popUntil(tableScope, p.tok.DataAtom) {
1729				// Ignore the token.
1730				return true
1731			}
1732			p.clearActiveFormattingElements()
1733			p.im = inRowIM
1734			return true
1735		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1736			// Ignore the token.
1737			return true
1738		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1739			if !p.elementInScope(tableScope, p.tok.DataAtom) {
1740				// Ignore the token.
1741				return true
1742			}
1743			// Close the cell and reprocess.
1744			if p.popUntil(tableScope, a.Td, a.Th) {
1745				p.clearActiveFormattingElements()
1746			}
1747			p.im = inRowIM
1748			return false
1749		}
1750	}
1751	return inBodyIM(p)
1752}
1753
1754// Section 12.2.6.4.16.
1755func inSelectIM(p *parser) bool {
1756	switch p.tok.Type {
1757	case TextToken:
1758		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1759	case StartTagToken:
1760		switch p.tok.DataAtom {
1761		case a.Html:
1762			return inBodyIM(p)
1763		case a.Option:
1764			if p.top().DataAtom == a.Option {
1765				p.oe.pop()
1766			}
1767			p.addElement()
1768		case a.Optgroup:
1769			if p.top().DataAtom == a.Option {
1770				p.oe.pop()
1771			}
1772			if p.top().DataAtom == a.Optgroup {
1773				p.oe.pop()
1774			}
1775			p.addElement()
1776		case a.Select:
1777			if !p.popUntil(selectScope, a.Select) {
1778				// Ignore the token.
1779				return true
1780			}
1781			p.resetInsertionMode()
1782		case a.Input, a.Keygen, a.Textarea:
1783			if p.elementInScope(selectScope, a.Select) {
1784				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1785				return false
1786			}
1787			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
1788			p.tokenizer.NextIsNotRawText()
1789			// Ignore the token.
1790			return true
1791		case a.Script, a.Template:
1792			return inHeadIM(p)
1793		}
1794	case EndTagToken:
1795		switch p.tok.DataAtom {
1796		case a.Option:
1797			if p.top().DataAtom == a.Option {
1798				p.oe.pop()
1799			}
1800		case a.Optgroup:
1801			i := len(p.oe) - 1
1802			if p.oe[i].DataAtom == a.Option {
1803				i--
1804			}
1805			if p.oe[i].DataAtom == a.Optgroup {
1806				p.oe = p.oe[:i]
1807			}
1808		case a.Select:
1809			if !p.popUntil(selectScope, a.Select) {
1810				// Ignore the token.
1811				return true
1812			}
1813			p.resetInsertionMode()
1814		case a.Template:
1815			return inHeadIM(p)
1816		}
1817	case CommentToken:
1818		p.addChild(&Node{
1819			Type: CommentNode,
1820			Data: p.tok.Data,
1821		})
1822	case DoctypeToken:
1823		// Ignore the token.
1824		return true
1825	case ErrorToken:
1826		return inBodyIM(p)
1827	}
1828
1829	return true
1830}
1831
1832// Section 12.2.6.4.17.
1833func inSelectInTableIM(p *parser) bool {
1834	switch p.tok.Type {
1835	case StartTagToken, EndTagToken:
1836		switch p.tok.DataAtom {
1837		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1838			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1839				// Ignore the token.
1840				return true
1841			}
1842			// This is like p.popUntil(selectScope, a.Select), but it also
1843			// matches <math select>, not just <select>. Matching the MathML
1844			// tag is arguably incorrect (conceptually), but it mimics what
1845			// Chromium does.
1846			for i := len(p.oe) - 1; i >= 0; i-- {
1847				if n := p.oe[i]; n.DataAtom == a.Select {
1848					p.oe = p.oe[:i]
1849					break
1850				}
1851			}
1852			p.resetInsertionMode()
1853			return false
1854		}
1855	}
1856	return inSelectIM(p)
1857}
1858
1859// Section 12.2.6.4.18.
1860func inTemplateIM(p *parser) bool {
1861	switch p.tok.Type {
1862	case TextToken, CommentToken, DoctypeToken:
1863		return inBodyIM(p)
1864	case StartTagToken:
1865		switch p.tok.DataAtom {
1866		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1867			return inHeadIM(p)
1868		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1869			p.templateStack.pop()
1870			p.templateStack = append(p.templateStack, inTableIM)
1871			p.im = inTableIM
1872			return false
1873		case a.Col:
1874			p.templateStack.pop()
1875			p.templateStack = append(p.templateStack, inColumnGroupIM)
1876			p.im = inColumnGroupIM
1877			return false
1878		case a.Tr:
1879			p.templateStack.pop()
1880			p.templateStack = append(p.templateStack, inTableBodyIM)
1881			p.im = inTableBodyIM
1882			return false
1883		case a.Td, a.Th:
1884			p.templateStack.pop()
1885			p.templateStack = append(p.templateStack, inRowIM)
1886			p.im = inRowIM
1887			return false
1888		default:
1889			p.templateStack.pop()
1890			p.templateStack = append(p.templateStack, inBodyIM)
1891			p.im = inBodyIM
1892			return false
1893		}
1894	case EndTagToken:
1895		switch p.tok.DataAtom {
1896		case a.Template:
1897			return inHeadIM(p)
1898		default:
1899			// Ignore the token.
1900			return true
1901		}
1902	case ErrorToken:
1903		if !p.oe.contains(a.Template) {
1904			// Ignore the token.
1905			return true
1906		}
1907		// TODO: remove this divergence from the HTML5 spec.
1908		//
1909		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1910		p.generateImpliedEndTags()
1911		for i := len(p.oe) - 1; i >= 0; i-- {
1912			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1913				p.oe = p.oe[:i]
1914				break
1915			}
1916		}
1917		p.clearActiveFormattingElements()
1918		p.templateStack.pop()
1919		p.resetInsertionMode()
1920		return false
1921	}
1922	return false
1923}
1924
1925// Section 12.2.6.4.19.
1926func afterBodyIM(p *parser) bool {
1927	switch p.tok.Type {
1928	case ErrorToken:
1929		// Stop parsing.
1930		return true
1931	case TextToken:
1932		s := strings.TrimLeft(p.tok.Data, whitespace)
1933		if len(s) == 0 {
1934			// It was all whitespace.
1935			return inBodyIM(p)
1936		}
1937	case StartTagToken:
1938		if p.tok.DataAtom == a.Html {
1939			return inBodyIM(p)
1940		}
1941	case EndTagToken:
1942		if p.tok.DataAtom == a.Html {
1943			if !p.fragment {
1944				p.im = afterAfterBodyIM
1945			}
1946			return true
1947		}
1948	case CommentToken:
1949		// The comment is attached to the <html> element.
1950		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1951			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1952		}
1953		p.oe[0].AppendChild(&Node{
1954			Type: CommentNode,
1955			Data: p.tok.Data,
1956		})
1957		return true
1958	}
1959	p.im = inBodyIM
1960	return false
1961}
1962
1963// Section 12.2.6.4.20.
1964func inFramesetIM(p *parser) bool {
1965	switch p.tok.Type {
1966	case CommentToken:
1967		p.addChild(&Node{
1968			Type: CommentNode,
1969			Data: p.tok.Data,
1970		})
1971	case TextToken:
1972		// Ignore all text but whitespace.
1973		s := strings.Map(func(c rune) rune {
1974			switch c {
1975			case ' ', '\t', '\n', '\f', '\r':
1976				return c
1977			}
1978			return -1
1979		}, p.tok.Data)
1980		if s != "" {
1981			p.addText(s)
1982		}
1983	case StartTagToken:
1984		switch p.tok.DataAtom {
1985		case a.Html:
1986			return inBodyIM(p)
1987		case a.Frameset:
1988			p.addElement()
1989		case a.Frame:
1990			p.addElement()
1991			p.oe.pop()
1992			p.acknowledgeSelfClosingTag()
1993		case a.Noframes:
1994			return inHeadIM(p)
1995		}
1996	case EndTagToken:
1997		switch p.tok.DataAtom {
1998		case a.Frameset:
1999			if p.oe.top().DataAtom != a.Html {
2000				p.oe.pop()
2001				if p.oe.top().DataAtom != a.Frameset {
2002					p.im = afterFramesetIM
2003					return true
2004				}
2005			}
2006		}
2007	default:
2008		// Ignore the token.
2009	}
2010	return true
2011}
2012
2013// Section 12.2.6.4.21.
2014func afterFramesetIM(p *parser) bool {
2015	switch p.tok.Type {
2016	case CommentToken:
2017		p.addChild(&Node{
2018			Type: CommentNode,
2019			Data: p.tok.Data,
2020		})
2021	case TextToken:
2022		// Ignore all text but whitespace.
2023		s := strings.Map(func(c rune) rune {
2024			switch c {
2025			case ' ', '\t', '\n', '\f', '\r':
2026				return c
2027			}
2028			return -1
2029		}, p.tok.Data)
2030		if s != "" {
2031			p.addText(s)
2032		}
2033	case StartTagToken:
2034		switch p.tok.DataAtom {
2035		case a.Html:
2036			return inBodyIM(p)
2037		case a.Noframes:
2038			return inHeadIM(p)
2039		}
2040	case EndTagToken:
2041		switch p.tok.DataAtom {
2042		case a.Html:
2043			p.im = afterAfterFramesetIM
2044			return true
2045		}
2046	default:
2047		// Ignore the token.
2048	}
2049	return true
2050}
2051
2052// Section 12.2.6.4.22.
2053func afterAfterBodyIM(p *parser) bool {
2054	switch p.tok.Type {
2055	case ErrorToken:
2056		// Stop parsing.
2057		return true
2058	case TextToken:
2059		s := strings.TrimLeft(p.tok.Data, whitespace)
2060		if len(s) == 0 {
2061			// It was all whitespace.
2062			return inBodyIM(p)
2063		}
2064	case StartTagToken:
2065		if p.tok.DataAtom == a.Html {
2066			return inBodyIM(p)
2067		}
2068	case CommentToken:
2069		p.doc.AppendChild(&Node{
2070			Type: CommentNode,
2071			Data: p.tok.Data,
2072		})
2073		return true
2074	case DoctypeToken:
2075		return inBodyIM(p)
2076	}
2077	p.im = inBodyIM
2078	return false
2079}
2080
2081// Section 12.2.6.4.23.
2082func afterAfterFramesetIM(p *parser) bool {
2083	switch p.tok.Type {
2084	case CommentToken:
2085		p.doc.AppendChild(&Node{
2086			Type: CommentNode,
2087			Data: p.tok.Data,
2088		})
2089	case TextToken:
2090		// Ignore all text but whitespace.
2091		s := strings.Map(func(c rune) rune {
2092			switch c {
2093			case ' ', '\t', '\n', '\f', '\r':
2094				return c
2095			}
2096			return -1
2097		}, p.tok.Data)
2098		if s != "" {
2099			p.tok.Data = s
2100			return inBodyIM(p)
2101		}
2102	case StartTagToken:
2103		switch p.tok.DataAtom {
2104		case a.Html:
2105			return inBodyIM(p)
2106		case a.Noframes:
2107			return inHeadIM(p)
2108		}
2109	case DoctypeToken:
2110		return inBodyIM(p)
2111	default:
2112		// Ignore the token.
2113	}
2114	return true
2115}
2116
2117const whitespaceOrNUL = whitespace + "\x00"
2118
2119// Section 12.2.6.5
2120func parseForeignContent(p *parser) bool {
2121	switch p.tok.Type {
2122	case TextToken:
2123		if p.framesetOK {
2124			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2125		}
2126		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2127		p.addText(p.tok.Data)
2128	case CommentToken:
2129		p.addChild(&Node{
2130			Type: CommentNode,
2131			Data: p.tok.Data,
2132		})
2133	case StartTagToken:
2134		if !p.fragment {
2135			b := breakout[p.tok.Data]
2136			if p.tok.DataAtom == a.Font {
2137			loop:
2138				for _, attr := range p.tok.Attr {
2139					switch attr.Key {
2140					case "color", "face", "size":
2141						b = true
2142						break loop
2143					}
2144				}
2145			}
2146			if b {
2147				for i := len(p.oe) - 1; i >= 0; i-- {
2148					n := p.oe[i]
2149					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2150						p.oe = p.oe[:i+1]
2151						break
2152					}
2153				}
2154				return false
2155			}
2156		}
2157		current := p.adjustedCurrentNode()
2158		switch current.Namespace {
2159		case "math":
2160			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2161		case "svg":
2162			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
2163			// SVG wants e.g. "foreignObject" with a capital second "O".
2164			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2165				p.tok.DataAtom = a.Lookup([]byte(x))
2166				p.tok.Data = x
2167			}
2168			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2169		default:
2170			panic("html: bad parser state: unexpected namespace")
2171		}
2172		adjustForeignAttributes(p.tok.Attr)
2173		namespace := current.Namespace
2174		p.addElement()
2175		p.top().Namespace = namespace
2176		if namespace != "" {
2177			// Don't let the tokenizer go into raw text mode in foreign content
2178			// (e.g. in an SVG <title> tag).
2179			p.tokenizer.NextIsNotRawText()
2180		}
2181		if p.hasSelfClosingToken {
2182			p.oe.pop()
2183			p.acknowledgeSelfClosingTag()
2184		}
2185	case EndTagToken:
2186		for i := len(p.oe) - 1; i >= 0; i-- {
2187			if p.oe[i].Namespace == "" {
2188				return p.im(p)
2189			}
2190			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2191				p.oe = p.oe[:i]
2192				break
2193			}
2194		}
2195		return true
2196	default:
2197		// Ignore the token.
2198	}
2199	return true
2200}
2201
2202// Section 12.2.4.2.
2203func (p *parser) adjustedCurrentNode() *Node {
2204	if len(p.oe) == 1 && p.fragment && p.context != nil {
2205		return p.context
2206	}
2207	return p.oe.top()
2208}
2209
2210// Section 12.2.6.
2211func (p *parser) inForeignContent() bool {
2212	if len(p.oe) == 0 {
2213		return false
2214	}
2215	n := p.adjustedCurrentNode()
2216	if n.Namespace == "" {
2217		return false
2218	}
2219	if mathMLTextIntegrationPoint(n) {
2220		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2221			return false
2222		}
2223		if p.tok.Type == TextToken {
2224			return false
2225		}
2226	}
2227	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2228		return false
2229	}
2230	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2231		return false
2232	}
2233	if p.tok.Type == ErrorToken {
2234		return false
2235	}
2236	return true
2237}
2238
2239// parseImpliedToken parses a token as though it had appeared in the parser's
2240// input.
2241func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2242	realToken, selfClosing := p.tok, p.hasSelfClosingToken
2243	p.tok = Token{
2244		Type:     t,
2245		DataAtom: dataAtom,
2246		Data:     data,
2247	}
2248	p.hasSelfClosingToken = false
2249	p.parseCurrentToken()
2250	p.tok, p.hasSelfClosingToken = realToken, selfClosing
2251}
2252
2253// parseCurrentToken runs the current token through the parsing routines
2254// until it is consumed.
2255func (p *parser) parseCurrentToken() {
2256	if p.tok.Type == SelfClosingTagToken {
2257		p.hasSelfClosingToken = true
2258		p.tok.Type = StartTagToken
2259	}
2260
2261	consumed := false
2262	for !consumed {
2263		if p.inForeignContent() {
2264			consumed = parseForeignContent(p)
2265		} else {
2266			consumed = p.im(p)
2267		}
2268	}
2269
2270	if p.hasSelfClosingToken {
2271		// This is a parse error, but ignore it.
2272		p.hasSelfClosingToken = false
2273	}
2274}
2275
2276func (p *parser) parse() error {
2277	// Iterate until EOF. Any other error will cause an early return.
2278	var err error
2279	for err != io.EOF {
2280		// CDATA sections are allowed only in foreign content.
2281		n := p.oe.top()
2282		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2283		// Read and parse the next token.
2284		p.tokenizer.Next()
2285		p.tok = p.tokenizer.Token()
2286		if p.tok.Type == ErrorToken {
2287			err = p.tokenizer.Err()
2288			if err != nil && err != io.EOF {
2289				return err
2290			}
2291		}
2292		p.parseCurrentToken()
2293	}
2294	return nil
2295}
2296
2297// Parse returns the parse tree for the HTML from the given Reader.
2298//
2299// It implements the HTML5 parsing algorithm
2300// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2301// which is very complicated. The resultant tree can contain implicitly created
2302// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2303// differ from the nesting implied by a naive processing of start and end
2304// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2305// with no corresponding node in the resulting tree.
2306//
2307// The input is assumed to be UTF-8 encoded.
2308func Parse(r io.Reader) (*Node, error) {
2309	return ParseWithOptions(r)
2310}
2311
2312// ParseFragment parses a fragment of HTML and returns the nodes that were
2313// found. If the fragment is the InnerHTML for an existing element, pass that
2314// element in context.
2315//
2316// It has the same intricacies as Parse.
2317func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2318	return ParseFragmentWithOptions(r, context)
2319}
2320
2321// ParseOption configures a parser.
2322type ParseOption func(p *parser)
2323
2324// ParseOptionEnableScripting configures the scripting flag.
2325// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2326//
2327// By default, scripting is enabled.
2328func ParseOptionEnableScripting(enable bool) ParseOption {
2329	return func(p *parser) {
2330		p.scripting = enable
2331	}
2332}
2333
2334// ParseWithOptions is like Parse, with options.
2335func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2336	p := &parser{
2337		tokenizer: NewTokenizer(r),
2338		doc: &Node{
2339			Type: DocumentNode,
2340		},
2341		scripting:  true,
2342		framesetOK: true,
2343		im:         initialIM,
2344	}
2345
2346	for _, f := range opts {
2347		f(p)
2348	}
2349
2350	if err := p.parse(); err != nil {
2351		return nil, err
2352	}
2353	return p.doc, nil
2354}
2355
2356// ParseFragmentWithOptions is like ParseFragment, with options.
2357func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
2358	contextTag := ""
2359	if context != nil {
2360		if context.Type != ElementNode {
2361			return nil, errors.New("html: ParseFragment of non-element Node")
2362		}
2363		// The next check isn't just context.DataAtom.String() == context.Data because
2364		// it is valid to pass an element whose tag isn't a known atom. For example,
2365		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2366		if context.DataAtom != a.Lookup([]byte(context.Data)) {
2367			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2368		}
2369		contextTag = context.DataAtom.String()
2370	}
2371	p := &parser{
2372		doc: &Node{
2373			Type: DocumentNode,
2374		},
2375		scripting: true,
2376		fragment:  true,
2377		context:   context,
2378	}
2379	if context != nil && context.Namespace != "" {
2380		p.tokenizer = NewTokenizer(r)
2381	} else {
2382		p.tokenizer = NewTokenizerFragment(r, contextTag)
2383	}
2384
2385	for _, f := range opts {
2386		f(p)
2387	}
2388
2389	root := &Node{
2390		Type:     ElementNode,
2391		DataAtom: a.Html,
2392		Data:     a.Html.String(),
2393	}
2394	p.doc.AppendChild(root)
2395	p.oe = nodeStack{root}
2396	if context != nil && context.DataAtom == a.Template {
2397		p.templateStack = append(p.templateStack, inTemplateIM)
2398	}
2399	p.resetInsertionMode()
2400
2401	for n := context; n != nil; n = n.Parent {
2402		if n.Type == ElementNode && n.DataAtom == a.Form {
2403			p.form = n
2404			break
2405		}
2406	}
2407
2408	if err := p.parse(); err != nil {
2409		return nil, err
2410	}
2411
2412	parent := p.doc
2413	if context != nil {
2414		parent = root
2415	}
2416
2417	var result []*Node
2418	for c := parent.FirstChild; c != nil; {
2419		next := c.NextSibling
2420		parent.RemoveChild(c)
2421		result = append(result, c)
2422		c = next
2423	}
2424	return result, nil
2425}
2426