1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package html 6 7import ( 8 "errors" 9 "fmt" 10 "io" 11 "strings" 12 13 a "golang.org/x/net/html/atom" 14) 15 16// A parser implements the HTML5 parsing algorithm: 17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction 18type parser struct { 19 // tokenizer provides the tokens for the parser. 20 tokenizer *Tokenizer 21 // tok is the most recently read token. 22 tok Token 23 // Self-closing tags like <hr/> are treated as start tags, except that 24 // hasSelfClosingToken is set while they are being processed. 25 hasSelfClosingToken bool 26 // doc is the document root element. 27 doc *Node 28 // The stack of open elements (section 12.2.4.2) and active formatting 29 // elements (section 12.2.4.3). 30 oe, afe nodeStack 31 // Element pointers (section 12.2.4.4). 32 head, form *Node 33 // Other parsing state flags (section 12.2.4.5). 34 scripting, framesetOK bool 35 // The stack of template insertion modes 36 templateStack insertionModeStack 37 // im is the current insertion mode. 38 im insertionMode 39 // originalIM is the insertion mode to go back to after completing a text 40 // or inTableText insertion mode. 41 originalIM insertionMode 42 // fosterParenting is whether new elements should be inserted according to 43 // the foster parenting rules (section 12.2.6.1). 44 fosterParenting bool 45 // quirks is whether the parser is operating in "quirks mode." 46 quirks bool 47 // fragment is whether the parser is parsing an HTML fragment. 48 fragment bool 49 // context is the context element when parsing an HTML fragment 50 // (section 12.4). 51 context *Node 52} 53 54func (p *parser) top() *Node { 55 if n := p.oe.top(); n != nil { 56 return n 57 } 58 return p.doc 59} 60 61// Stop tags for use in popUntil. These come from section 12.2.4.2. 62var ( 63 defaultScopeStopTags = map[string][]a.Atom{ 64 "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template}, 65 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext}, 66 "svg": {a.Desc, a.ForeignObject, a.Title}, 67 } 68) 69 70type scope int 71 72const ( 73 defaultScope scope = iota 74 listItemScope 75 buttonScope 76 tableScope 77 tableRowScope 78 tableBodyScope 79 selectScope 80) 81 82// popUntil pops the stack of open elements at the highest element whose tag 83// is in matchTags, provided there is no higher element in the scope's stop 84// tags (as defined in section 12.2.4.2). It returns whether or not there was 85// such an element. If there was not, popUntil leaves the stack unchanged. 86// 87// For example, the set of stop tags for table scope is: "html", "table". If 88// the stack was: 89// ["html", "body", "font", "table", "b", "i", "u"] 90// then popUntil(tableScope, "font") would return false, but 91// popUntil(tableScope, "i") would return true and the stack would become: 92// ["html", "body", "font", "table", "b"] 93// 94// If an element's tag is in both the stop tags and matchTags, then the stack 95// will be popped and the function returns true (provided, of course, there was 96// no higher element in the stack that was also in the stop tags). For example, 97// popUntil(tableScope, "table") returns true and leaves: 98// ["html", "body", "font"] 99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool { 100 if i := p.indexOfElementInScope(s, matchTags...); i != -1 { 101 p.oe = p.oe[:i] 102 return true 103 } 104 return false 105} 106 107// indexOfElementInScope returns the index in p.oe of the highest element whose 108// tag is in matchTags that is in scope. If no matching element is in scope, it 109// returns -1. 110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { 111 for i := len(p.oe) - 1; i >= 0; i-- { 112 tagAtom := p.oe[i].DataAtom 113 if p.oe[i].Namespace == "" { 114 for _, t := range matchTags { 115 if t == tagAtom { 116 return i 117 } 118 } 119 switch s { 120 case defaultScope: 121 // No-op. 122 case listItemScope: 123 if tagAtom == a.Ol || tagAtom == a.Ul { 124 return -1 125 } 126 case buttonScope: 127 if tagAtom == a.Button { 128 return -1 129 } 130 case tableScope: 131 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { 132 return -1 133 } 134 case selectScope: 135 if tagAtom != a.Optgroup && tagAtom != a.Option { 136 return -1 137 } 138 default: 139 panic("unreachable") 140 } 141 } 142 switch s { 143 case defaultScope, listItemScope, buttonScope: 144 for _, t := range defaultScopeStopTags[p.oe[i].Namespace] { 145 if t == tagAtom { 146 return -1 147 } 148 } 149 } 150 } 151 return -1 152} 153 154// elementInScope is like popUntil, except that it doesn't modify the stack of 155// open elements. 156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool { 157 return p.indexOfElementInScope(s, matchTags...) != -1 158} 159 160// clearStackToContext pops elements off the stack of open elements until a 161// scope-defined element is found. 162func (p *parser) clearStackToContext(s scope) { 163 for i := len(p.oe) - 1; i >= 0; i-- { 164 tagAtom := p.oe[i].DataAtom 165 switch s { 166 case tableScope: 167 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { 168 p.oe = p.oe[:i+1] 169 return 170 } 171 case tableRowScope: 172 if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template { 173 p.oe = p.oe[:i+1] 174 return 175 } 176 case tableBodyScope: 177 if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template { 178 p.oe = p.oe[:i+1] 179 return 180 } 181 default: 182 panic("unreachable") 183 } 184 } 185} 186 187// generateImpliedEndTags pops nodes off the stack of open elements as long as 188// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. 189// If exceptions are specified, nodes with that name will not be popped off. 190func (p *parser) generateImpliedEndTags(exceptions ...string) { 191 var i int 192loop: 193 for i = len(p.oe) - 1; i >= 0; i-- { 194 n := p.oe[i] 195 if n.Type == ElementNode { 196 switch n.DataAtom { 197 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: 198 for _, except := range exceptions { 199 if n.Data == except { 200 break loop 201 } 202 } 203 continue 204 } 205 } 206 break 207 } 208 209 p.oe = p.oe[:i+1] 210} 211 212// addChild adds a child node n to the top element, and pushes n onto the stack 213// of open elements if it is an element node. 214func (p *parser) addChild(n *Node) { 215 if p.shouldFosterParent() { 216 p.fosterParent(n) 217 } else { 218 p.top().AppendChild(n) 219 } 220 221 if n.Type == ElementNode { 222 p.oe = append(p.oe, n) 223 } 224} 225 226// shouldFosterParent returns whether the next node to be added should be 227// foster parented. 228func (p *parser) shouldFosterParent() bool { 229 if p.fosterParenting { 230 switch p.top().DataAtom { 231 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: 232 return true 233 } 234 } 235 return false 236} 237 238// fosterParent adds a child node according to the foster parenting rules. 239// Section 12.2.6.1, "foster parenting". 240func (p *parser) fosterParent(n *Node) { 241 var table, parent, prev, template *Node 242 var i int 243 for i = len(p.oe) - 1; i >= 0; i-- { 244 if p.oe[i].DataAtom == a.Table { 245 table = p.oe[i] 246 break 247 } 248 } 249 250 var j int 251 for j = len(p.oe) - 1; j >= 0; j-- { 252 if p.oe[j].DataAtom == a.Template { 253 template = p.oe[j] 254 break 255 } 256 } 257 258 if template != nil && (table == nil || j > i) { 259 template.AppendChild(n) 260 return 261 } 262 263 if table == nil { 264 // The foster parent is the html element. 265 parent = p.oe[0] 266 } else { 267 parent = table.Parent 268 } 269 if parent == nil { 270 parent = p.oe[i-1] 271 } 272 273 if table != nil { 274 prev = table.PrevSibling 275 } else { 276 prev = parent.LastChild 277 } 278 if prev != nil && prev.Type == TextNode && n.Type == TextNode { 279 prev.Data += n.Data 280 return 281 } 282 283 parent.InsertBefore(n, table) 284} 285 286// addText adds text to the preceding node if it is a text node, or else it 287// calls addChild with a new text node. 288func (p *parser) addText(text string) { 289 if text == "" { 290 return 291 } 292 293 if p.shouldFosterParent() { 294 p.fosterParent(&Node{ 295 Type: TextNode, 296 Data: text, 297 }) 298 return 299 } 300 301 t := p.top() 302 if n := t.LastChild; n != nil && n.Type == TextNode { 303 n.Data += text 304 return 305 } 306 p.addChild(&Node{ 307 Type: TextNode, 308 Data: text, 309 }) 310} 311 312// addElement adds a child element based on the current token. 313func (p *parser) addElement() { 314 p.addChild(&Node{ 315 Type: ElementNode, 316 DataAtom: p.tok.DataAtom, 317 Data: p.tok.Data, 318 Attr: p.tok.Attr, 319 }) 320} 321 322// Section 12.2.4.3. 323func (p *parser) addFormattingElement() { 324 tagAtom, attr := p.tok.DataAtom, p.tok.Attr 325 p.addElement() 326 327 // Implement the Noah's Ark clause, but with three per family instead of two. 328 identicalElements := 0 329findIdenticalElements: 330 for i := len(p.afe) - 1; i >= 0; i-- { 331 n := p.afe[i] 332 if n.Type == scopeMarkerNode { 333 break 334 } 335 if n.Type != ElementNode { 336 continue 337 } 338 if n.Namespace != "" { 339 continue 340 } 341 if n.DataAtom != tagAtom { 342 continue 343 } 344 if len(n.Attr) != len(attr) { 345 continue 346 } 347 compareAttributes: 348 for _, t0 := range n.Attr { 349 for _, t1 := range attr { 350 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val { 351 // Found a match for this attribute, continue with the next attribute. 352 continue compareAttributes 353 } 354 } 355 // If we get here, there is no attribute that matches a. 356 // Therefore the element is not identical to the new one. 357 continue findIdenticalElements 358 } 359 360 identicalElements++ 361 if identicalElements >= 3 { 362 p.afe.remove(n) 363 } 364 } 365 366 p.afe = append(p.afe, p.top()) 367} 368 369// Section 12.2.4.3. 370func (p *parser) clearActiveFormattingElements() { 371 for { 372 n := p.afe.pop() 373 if len(p.afe) == 0 || n.Type == scopeMarkerNode { 374 return 375 } 376 } 377} 378 379// Section 12.2.4.3. 380func (p *parser) reconstructActiveFormattingElements() { 381 n := p.afe.top() 382 if n == nil { 383 return 384 } 385 if n.Type == scopeMarkerNode || p.oe.index(n) != -1 { 386 return 387 } 388 i := len(p.afe) - 1 389 for n.Type != scopeMarkerNode && p.oe.index(n) == -1 { 390 if i == 0 { 391 i = -1 392 break 393 } 394 i-- 395 n = p.afe[i] 396 } 397 for { 398 i++ 399 clone := p.afe[i].clone() 400 p.addChild(clone) 401 p.afe[i] = clone 402 if i == len(p.afe)-1 { 403 break 404 } 405 } 406} 407 408// Section 12.2.5. 409func (p *parser) acknowledgeSelfClosingTag() { 410 p.hasSelfClosingToken = false 411} 412 413// An insertion mode (section 12.2.4.1) is the state transition function from 414// a particular state in the HTML5 parser's state machine. It updates the 415// parser's fields depending on parser.tok (where ErrorToken means EOF). 416// It returns whether the token was consumed. 417type insertionMode func(*parser) bool 418 419// setOriginalIM sets the insertion mode to return to after completing a text or 420// inTableText insertion mode. 421// Section 12.2.4.1, "using the rules for". 422func (p *parser) setOriginalIM() { 423 if p.originalIM != nil { 424 panic("html: bad parser state: originalIM was set twice") 425 } 426 p.originalIM = p.im 427} 428 429// Section 12.2.4.1, "reset the insertion mode". 430func (p *parser) resetInsertionMode() { 431 for i := len(p.oe) - 1; i >= 0; i-- { 432 n := p.oe[i] 433 last := i == 0 434 if last && p.context != nil { 435 n = p.context 436 } 437 438 switch n.DataAtom { 439 case a.Select: 440 if !last { 441 for ancestor, first := n, p.oe[0]; ancestor != first; { 442 if ancestor == first { 443 break 444 } 445 ancestor = p.oe[p.oe.index(ancestor)-1] 446 switch ancestor.DataAtom { 447 case a.Template: 448 p.im = inSelectIM 449 return 450 case a.Table: 451 p.im = inSelectInTableIM 452 return 453 } 454 } 455 } 456 p.im = inSelectIM 457 case a.Td, a.Th: 458 // TODO: remove this divergence from the HTML5 spec. 459 // 460 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 461 p.im = inCellIM 462 case a.Tr: 463 p.im = inRowIM 464 case a.Tbody, a.Thead, a.Tfoot: 465 p.im = inTableBodyIM 466 case a.Caption: 467 p.im = inCaptionIM 468 case a.Colgroup: 469 p.im = inColumnGroupIM 470 case a.Table: 471 p.im = inTableIM 472 case a.Template: 473 // TODO: remove this divergence from the HTML5 spec. 474 if n.Namespace != "" { 475 continue 476 } 477 p.im = p.templateStack.top() 478 case a.Head: 479 // TODO: remove this divergence from the HTML5 spec. 480 // 481 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 482 p.im = inHeadIM 483 case a.Body: 484 p.im = inBodyIM 485 case a.Frameset: 486 p.im = inFramesetIM 487 case a.Html: 488 if p.head == nil { 489 p.im = beforeHeadIM 490 } else { 491 p.im = afterHeadIM 492 } 493 default: 494 if last { 495 p.im = inBodyIM 496 return 497 } 498 continue 499 } 500 return 501 } 502} 503 504const whitespace = " \t\r\n\f" 505 506// Section 12.2.6.4.1. 507func initialIM(p *parser) bool { 508 switch p.tok.Type { 509 case TextToken: 510 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) 511 if len(p.tok.Data) == 0 { 512 // It was all whitespace, so ignore it. 513 return true 514 } 515 case CommentToken: 516 p.doc.AppendChild(&Node{ 517 Type: CommentNode, 518 Data: p.tok.Data, 519 }) 520 return true 521 case DoctypeToken: 522 n, quirks := parseDoctype(p.tok.Data) 523 p.doc.AppendChild(n) 524 p.quirks = quirks 525 p.im = beforeHTMLIM 526 return true 527 } 528 p.quirks = true 529 p.im = beforeHTMLIM 530 return false 531} 532 533// Section 12.2.6.4.2. 534func beforeHTMLIM(p *parser) bool { 535 switch p.tok.Type { 536 case DoctypeToken: 537 // Ignore the token. 538 return true 539 case TextToken: 540 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) 541 if len(p.tok.Data) == 0 { 542 // It was all whitespace, so ignore it. 543 return true 544 } 545 case StartTagToken: 546 if p.tok.DataAtom == a.Html { 547 p.addElement() 548 p.im = beforeHeadIM 549 return true 550 } 551 case EndTagToken: 552 switch p.tok.DataAtom { 553 case a.Head, a.Body, a.Html, a.Br: 554 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) 555 return false 556 default: 557 // Ignore the token. 558 return true 559 } 560 case CommentToken: 561 p.doc.AppendChild(&Node{ 562 Type: CommentNode, 563 Data: p.tok.Data, 564 }) 565 return true 566 } 567 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) 568 return false 569} 570 571// Section 12.2.6.4.3. 572func beforeHeadIM(p *parser) bool { 573 switch p.tok.Type { 574 case TextToken: 575 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) 576 if len(p.tok.Data) == 0 { 577 // It was all whitespace, so ignore it. 578 return true 579 } 580 case StartTagToken: 581 switch p.tok.DataAtom { 582 case a.Head: 583 p.addElement() 584 p.head = p.top() 585 p.im = inHeadIM 586 return true 587 case a.Html: 588 return inBodyIM(p) 589 } 590 case EndTagToken: 591 switch p.tok.DataAtom { 592 case a.Head, a.Body, a.Html, a.Br: 593 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) 594 return false 595 default: 596 // Ignore the token. 597 return true 598 } 599 case CommentToken: 600 p.addChild(&Node{ 601 Type: CommentNode, 602 Data: p.tok.Data, 603 }) 604 return true 605 case DoctypeToken: 606 // Ignore the token. 607 return true 608 } 609 610 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) 611 return false 612} 613 614// Section 12.2.6.4.4. 615func inHeadIM(p *parser) bool { 616 switch p.tok.Type { 617 case TextToken: 618 s := strings.TrimLeft(p.tok.Data, whitespace) 619 if len(s) < len(p.tok.Data) { 620 // Add the initial whitespace to the current node. 621 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) 622 if s == "" { 623 return true 624 } 625 p.tok.Data = s 626 } 627 case StartTagToken: 628 switch p.tok.DataAtom { 629 case a.Html: 630 return inBodyIM(p) 631 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: 632 p.addElement() 633 p.oe.pop() 634 p.acknowledgeSelfClosingTag() 635 return true 636 case a.Script, a.Title, a.Noscript, a.Noframes, a.Style: 637 p.addElement() 638 p.setOriginalIM() 639 p.im = textIM 640 return true 641 case a.Head: 642 // Ignore the token. 643 return true 644 case a.Template: 645 p.addElement() 646 p.afe = append(p.afe, &scopeMarker) 647 p.framesetOK = false 648 p.im = inTemplateIM 649 p.templateStack = append(p.templateStack, inTemplateIM) 650 return true 651 } 652 case EndTagToken: 653 switch p.tok.DataAtom { 654 case a.Head: 655 p.oe.pop() 656 p.im = afterHeadIM 657 return true 658 case a.Body, a.Html, a.Br: 659 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) 660 return false 661 case a.Template: 662 if !p.oe.contains(a.Template) { 663 return true 664 } 665 // TODO: remove this divergence from the HTML5 spec. 666 // 667 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 668 p.generateImpliedEndTags() 669 for i := len(p.oe) - 1; i >= 0; i-- { 670 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template { 671 p.oe = p.oe[:i] 672 break 673 } 674 } 675 p.clearActiveFormattingElements() 676 p.templateStack.pop() 677 p.resetInsertionMode() 678 return true 679 default: 680 // Ignore the token. 681 return true 682 } 683 case CommentToken: 684 p.addChild(&Node{ 685 Type: CommentNode, 686 Data: p.tok.Data, 687 }) 688 return true 689 case DoctypeToken: 690 // Ignore the token. 691 return true 692 } 693 694 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) 695 return false 696} 697 698// Section 12.2.6.4.6. 699func afterHeadIM(p *parser) bool { 700 switch p.tok.Type { 701 case TextToken: 702 s := strings.TrimLeft(p.tok.Data, whitespace) 703 if len(s) < len(p.tok.Data) { 704 // Add the initial whitespace to the current node. 705 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) 706 if s == "" { 707 return true 708 } 709 p.tok.Data = s 710 } 711 case StartTagToken: 712 switch p.tok.DataAtom { 713 case a.Html: 714 return inBodyIM(p) 715 case a.Body: 716 p.addElement() 717 p.framesetOK = false 718 p.im = inBodyIM 719 return true 720 case a.Frameset: 721 p.addElement() 722 p.im = inFramesetIM 723 return true 724 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: 725 p.oe = append(p.oe, p.head) 726 defer p.oe.remove(p.head) 727 return inHeadIM(p) 728 case a.Head: 729 // Ignore the token. 730 return true 731 } 732 case EndTagToken: 733 switch p.tok.DataAtom { 734 case a.Body, a.Html, a.Br: 735 // Drop down to creating an implied <body> tag. 736 case a.Template: 737 return inHeadIM(p) 738 default: 739 // Ignore the token. 740 return true 741 } 742 case CommentToken: 743 p.addChild(&Node{ 744 Type: CommentNode, 745 Data: p.tok.Data, 746 }) 747 return true 748 case DoctypeToken: 749 // Ignore the token. 750 return true 751 } 752 753 p.parseImpliedToken(StartTagToken, a.Body, a.Body.String()) 754 p.framesetOK = true 755 return false 756} 757 758// copyAttributes copies attributes of src not found on dst to dst. 759func copyAttributes(dst *Node, src Token) { 760 if len(src.Attr) == 0 { 761 return 762 } 763 attr := map[string]string{} 764 for _, t := range dst.Attr { 765 attr[t.Key] = t.Val 766 } 767 for _, t := range src.Attr { 768 if _, ok := attr[t.Key]; !ok { 769 dst.Attr = append(dst.Attr, t) 770 attr[t.Key] = t.Val 771 } 772 } 773} 774 775// Section 12.2.6.4.7. 776func inBodyIM(p *parser) bool { 777 switch p.tok.Type { 778 case TextToken: 779 d := p.tok.Data 780 switch n := p.oe.top(); n.DataAtom { 781 case a.Pre, a.Listing: 782 if n.FirstChild == nil { 783 // Ignore a newline at the start of a <pre> block. 784 if d != "" && d[0] == '\r' { 785 d = d[1:] 786 } 787 if d != "" && d[0] == '\n' { 788 d = d[1:] 789 } 790 } 791 } 792 d = strings.Replace(d, "\x00", "", -1) 793 if d == "" { 794 return true 795 } 796 p.reconstructActiveFormattingElements() 797 p.addText(d) 798 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" { 799 // There were non-whitespace characters inserted. 800 p.framesetOK = false 801 } 802 case StartTagToken: 803 switch p.tok.DataAtom { 804 case a.Html: 805 if p.oe.contains(a.Template) { 806 return true 807 } 808 copyAttributes(p.oe[0], p.tok) 809 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: 810 return inHeadIM(p) 811 case a.Body: 812 if p.oe.contains(a.Template) { 813 return true 814 } 815 if len(p.oe) >= 2 { 816 body := p.oe[1] 817 if body.Type == ElementNode && body.DataAtom == a.Body { 818 p.framesetOK = false 819 copyAttributes(body, p.tok) 820 } 821 } 822 case a.Frameset: 823 if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body { 824 // Ignore the token. 825 return true 826 } 827 body := p.oe[1] 828 if body.Parent != nil { 829 body.Parent.RemoveChild(body) 830 } 831 p.oe = p.oe[:1] 832 p.addElement() 833 p.im = inFramesetIM 834 return true 835 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: 836 p.popUntil(buttonScope, a.P) 837 p.addElement() 838 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: 839 p.popUntil(buttonScope, a.P) 840 switch n := p.top(); n.DataAtom { 841 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: 842 p.oe.pop() 843 } 844 p.addElement() 845 case a.Pre, a.Listing: 846 p.popUntil(buttonScope, a.P) 847 p.addElement() 848 // The newline, if any, will be dealt with by the TextToken case. 849 p.framesetOK = false 850 case a.Form: 851 if p.form != nil && !p.oe.contains(a.Template) { 852 // Ignore the token 853 return true 854 } 855 p.popUntil(buttonScope, a.P) 856 p.addElement() 857 if !p.oe.contains(a.Template) { 858 p.form = p.top() 859 } 860 case a.Li: 861 p.framesetOK = false 862 for i := len(p.oe) - 1; i >= 0; i-- { 863 node := p.oe[i] 864 switch node.DataAtom { 865 case a.Li: 866 p.oe = p.oe[:i] 867 case a.Address, a.Div, a.P: 868 continue 869 default: 870 if !isSpecialElement(node) { 871 continue 872 } 873 } 874 break 875 } 876 p.popUntil(buttonScope, a.P) 877 p.addElement() 878 case a.Dd, a.Dt: 879 p.framesetOK = false 880 for i := len(p.oe) - 1; i >= 0; i-- { 881 node := p.oe[i] 882 switch node.DataAtom { 883 case a.Dd, a.Dt: 884 p.oe = p.oe[:i] 885 case a.Address, a.Div, a.P: 886 continue 887 default: 888 if !isSpecialElement(node) { 889 continue 890 } 891 } 892 break 893 } 894 p.popUntil(buttonScope, a.P) 895 p.addElement() 896 case a.Plaintext: 897 p.popUntil(buttonScope, a.P) 898 p.addElement() 899 case a.Button: 900 p.popUntil(defaultScope, a.Button) 901 p.reconstructActiveFormattingElements() 902 p.addElement() 903 p.framesetOK = false 904 case a.A: 905 for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- { 906 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A { 907 p.inBodyEndTagFormatting(a.A) 908 p.oe.remove(n) 909 p.afe.remove(n) 910 break 911 } 912 } 913 p.reconstructActiveFormattingElements() 914 p.addFormattingElement() 915 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: 916 p.reconstructActiveFormattingElements() 917 p.addFormattingElement() 918 case a.Nobr: 919 p.reconstructActiveFormattingElements() 920 if p.elementInScope(defaultScope, a.Nobr) { 921 p.inBodyEndTagFormatting(a.Nobr) 922 p.reconstructActiveFormattingElements() 923 } 924 p.addFormattingElement() 925 case a.Applet, a.Marquee, a.Object: 926 p.reconstructActiveFormattingElements() 927 p.addElement() 928 p.afe = append(p.afe, &scopeMarker) 929 p.framesetOK = false 930 case a.Table: 931 if !p.quirks { 932 p.popUntil(buttonScope, a.P) 933 } 934 p.addElement() 935 p.framesetOK = false 936 p.im = inTableIM 937 return true 938 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr: 939 p.reconstructActiveFormattingElements() 940 p.addElement() 941 p.oe.pop() 942 p.acknowledgeSelfClosingTag() 943 if p.tok.DataAtom == a.Input { 944 for _, t := range p.tok.Attr { 945 if t.Key == "type" { 946 if strings.ToLower(t.Val) == "hidden" { 947 // Skip setting framesetOK = false 948 return true 949 } 950 } 951 } 952 } 953 p.framesetOK = false 954 case a.Param, a.Source, a.Track: 955 p.addElement() 956 p.oe.pop() 957 p.acknowledgeSelfClosingTag() 958 case a.Hr: 959 p.popUntil(buttonScope, a.P) 960 p.addElement() 961 p.oe.pop() 962 p.acknowledgeSelfClosingTag() 963 p.framesetOK = false 964 case a.Image: 965 p.tok.DataAtom = a.Img 966 p.tok.Data = a.Img.String() 967 return false 968 case a.Isindex: 969 if p.form != nil { 970 // Ignore the token. 971 return true 972 } 973 action := "" 974 prompt := "This is a searchable index. Enter search keywords: " 975 attr := []Attribute{{Key: "name", Val: "isindex"}} 976 for _, t := range p.tok.Attr { 977 switch t.Key { 978 case "action": 979 action = t.Val 980 case "name": 981 // Ignore the attribute. 982 case "prompt": 983 prompt = t.Val 984 default: 985 attr = append(attr, t) 986 } 987 } 988 p.acknowledgeSelfClosingTag() 989 p.popUntil(buttonScope, a.P) 990 p.parseImpliedToken(StartTagToken, a.Form, a.Form.String()) 991 if p.form == nil { 992 // NOTE: The 'isindex' element has been removed, 993 // and the 'template' element has not been designed to be 994 // collaborative with the index element. 995 // 996 // Ignore the token. 997 return true 998 } 999 if action != "" { 1000 p.form.Attr = []Attribute{{Key: "action", Val: action}} 1001 } 1002 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) 1003 p.parseImpliedToken(StartTagToken, a.Label, a.Label.String()) 1004 p.addText(prompt) 1005 p.addChild(&Node{ 1006 Type: ElementNode, 1007 DataAtom: a.Input, 1008 Data: a.Input.String(), 1009 Attr: attr, 1010 }) 1011 p.oe.pop() 1012 p.parseImpliedToken(EndTagToken, a.Label, a.Label.String()) 1013 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) 1014 p.parseImpliedToken(EndTagToken, a.Form, a.Form.String()) 1015 case a.Textarea: 1016 p.addElement() 1017 p.setOriginalIM() 1018 p.framesetOK = false 1019 p.im = textIM 1020 case a.Xmp: 1021 p.popUntil(buttonScope, a.P) 1022 p.reconstructActiveFormattingElements() 1023 p.framesetOK = false 1024 p.addElement() 1025 p.setOriginalIM() 1026 p.im = textIM 1027 case a.Iframe: 1028 p.framesetOK = false 1029 p.addElement() 1030 p.setOriginalIM() 1031 p.im = textIM 1032 case a.Noembed, a.Noscript: 1033 p.addElement() 1034 p.setOriginalIM() 1035 p.im = textIM 1036 case a.Select: 1037 p.reconstructActiveFormattingElements() 1038 p.addElement() 1039 p.framesetOK = false 1040 p.im = inSelectIM 1041 return true 1042 case a.Optgroup, a.Option: 1043 if p.top().DataAtom == a.Option { 1044 p.oe.pop() 1045 } 1046 p.reconstructActiveFormattingElements() 1047 p.addElement() 1048 case a.Rb, a.Rtc: 1049 if p.elementInScope(defaultScope, a.Ruby) { 1050 p.generateImpliedEndTags() 1051 } 1052 p.addElement() 1053 case a.Rp, a.Rt: 1054 if p.elementInScope(defaultScope, a.Ruby) { 1055 p.generateImpliedEndTags("rtc") 1056 } 1057 p.addElement() 1058 case a.Math, a.Svg: 1059 p.reconstructActiveFormattingElements() 1060 if p.tok.DataAtom == a.Math { 1061 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) 1062 } else { 1063 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments) 1064 } 1065 adjustForeignAttributes(p.tok.Attr) 1066 p.addElement() 1067 p.top().Namespace = p.tok.Data 1068 if p.hasSelfClosingToken { 1069 p.oe.pop() 1070 p.acknowledgeSelfClosingTag() 1071 } 1072 return true 1073 case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: 1074 // Ignore the token. 1075 default: 1076 p.reconstructActiveFormattingElements() 1077 p.addElement() 1078 } 1079 case EndTagToken: 1080 switch p.tok.DataAtom { 1081 case a.Body: 1082 if p.elementInScope(defaultScope, a.Body) { 1083 p.im = afterBodyIM 1084 } 1085 case a.Html: 1086 if p.elementInScope(defaultScope, a.Body) { 1087 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String()) 1088 return false 1089 } 1090 return true 1091 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: 1092 p.popUntil(defaultScope, p.tok.DataAtom) 1093 case a.Form: 1094 if p.oe.contains(a.Template) { 1095 i := p.indexOfElementInScope(defaultScope, a.Form) 1096 if i == -1 { 1097 // Ignore the token. 1098 return true 1099 } 1100 p.generateImpliedEndTags() 1101 if p.oe[i].DataAtom != a.Form { 1102 // Ignore the token. 1103 return true 1104 } 1105 p.popUntil(defaultScope, a.Form) 1106 } else { 1107 node := p.form 1108 p.form = nil 1109 i := p.indexOfElementInScope(defaultScope, a.Form) 1110 if node == nil || i == -1 || p.oe[i] != node { 1111 // Ignore the token. 1112 return true 1113 } 1114 p.generateImpliedEndTags() 1115 p.oe.remove(node) 1116 } 1117 case a.P: 1118 if !p.elementInScope(buttonScope, a.P) { 1119 p.parseImpliedToken(StartTagToken, a.P, a.P.String()) 1120 } 1121 p.popUntil(buttonScope, a.P) 1122 case a.Li: 1123 p.popUntil(listItemScope, a.Li) 1124 case a.Dd, a.Dt: 1125 p.popUntil(defaultScope, p.tok.DataAtom) 1126 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: 1127 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6) 1128 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: 1129 p.inBodyEndTagFormatting(p.tok.DataAtom) 1130 case a.Applet, a.Marquee, a.Object: 1131 if p.popUntil(defaultScope, p.tok.DataAtom) { 1132 p.clearActiveFormattingElements() 1133 } 1134 case a.Br: 1135 p.tok.Type = StartTagToken 1136 return false 1137 case a.Template: 1138 return inHeadIM(p) 1139 default: 1140 p.inBodyEndTagOther(p.tok.DataAtom) 1141 } 1142 case CommentToken: 1143 p.addChild(&Node{ 1144 Type: CommentNode, 1145 Data: p.tok.Data, 1146 }) 1147 case ErrorToken: 1148 // TODO: remove this divergence from the HTML5 spec. 1149 if len(p.templateStack) > 0 { 1150 p.im = inTemplateIM 1151 return false 1152 } else { 1153 for _, e := range p.oe { 1154 switch e.DataAtom { 1155 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th, 1156 a.Thead, a.Tr, a.Body, a.Html: 1157 default: 1158 return true 1159 } 1160 } 1161 } 1162 } 1163 1164 return true 1165} 1166 1167func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { 1168 // This is the "adoption agency" algorithm, described at 1169 // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency 1170 1171 // TODO: this is a fairly literal line-by-line translation of that algorithm. 1172 // Once the code successfully parses the comprehensive test suite, we should 1173 // refactor this code to be more idiomatic. 1174 1175 // Steps 1-4. The outer loop. 1176 for i := 0; i < 8; i++ { 1177 // Step 5. Find the formatting element. 1178 var formattingElement *Node 1179 for j := len(p.afe) - 1; j >= 0; j-- { 1180 if p.afe[j].Type == scopeMarkerNode { 1181 break 1182 } 1183 if p.afe[j].DataAtom == tagAtom { 1184 formattingElement = p.afe[j] 1185 break 1186 } 1187 } 1188 if formattingElement == nil { 1189 p.inBodyEndTagOther(tagAtom) 1190 return 1191 } 1192 feIndex := p.oe.index(formattingElement) 1193 if feIndex == -1 { 1194 p.afe.remove(formattingElement) 1195 return 1196 } 1197 if !p.elementInScope(defaultScope, tagAtom) { 1198 // Ignore the tag. 1199 return 1200 } 1201 1202 // Steps 9-10. Find the furthest block. 1203 var furthestBlock *Node 1204 for _, e := range p.oe[feIndex:] { 1205 if isSpecialElement(e) { 1206 furthestBlock = e 1207 break 1208 } 1209 } 1210 if furthestBlock == nil { 1211 e := p.oe.pop() 1212 for e != formattingElement { 1213 e = p.oe.pop() 1214 } 1215 p.afe.remove(e) 1216 return 1217 } 1218 1219 // Steps 11-12. Find the common ancestor and bookmark node. 1220 commonAncestor := p.oe[feIndex-1] 1221 bookmark := p.afe.index(formattingElement) 1222 1223 // Step 13. The inner loop. Find the lastNode to reparent. 1224 lastNode := furthestBlock 1225 node := furthestBlock 1226 x := p.oe.index(node) 1227 // Steps 13.1-13.2 1228 for j := 0; j < 3; j++ { 1229 // Step 13.3. 1230 x-- 1231 node = p.oe[x] 1232 // Step 13.4 - 13.5. 1233 if p.afe.index(node) == -1 { 1234 p.oe.remove(node) 1235 continue 1236 } 1237 // Step 13.6. 1238 if node == formattingElement { 1239 break 1240 } 1241 // Step 13.7. 1242 clone := node.clone() 1243 p.afe[p.afe.index(node)] = clone 1244 p.oe[p.oe.index(node)] = clone 1245 node = clone 1246 // Step 13.8. 1247 if lastNode == furthestBlock { 1248 bookmark = p.afe.index(node) + 1 1249 } 1250 // Step 13.9. 1251 if lastNode.Parent != nil { 1252 lastNode.Parent.RemoveChild(lastNode) 1253 } 1254 node.AppendChild(lastNode) 1255 // Step 13.10. 1256 lastNode = node 1257 } 1258 1259 // Step 14. Reparent lastNode to the common ancestor, 1260 // or for misnested table nodes, to the foster parent. 1261 if lastNode.Parent != nil { 1262 lastNode.Parent.RemoveChild(lastNode) 1263 } 1264 switch commonAncestor.DataAtom { 1265 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: 1266 p.fosterParent(lastNode) 1267 default: 1268 commonAncestor.AppendChild(lastNode) 1269 } 1270 1271 // Steps 15-17. Reparent nodes from the furthest block's children 1272 // to a clone of the formatting element. 1273 clone := formattingElement.clone() 1274 reparentChildren(clone, furthestBlock) 1275 furthestBlock.AppendChild(clone) 1276 1277 // Step 18. Fix up the list of active formatting elements. 1278 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark { 1279 // Move the bookmark with the rest of the list. 1280 bookmark-- 1281 } 1282 p.afe.remove(formattingElement) 1283 p.afe.insert(bookmark, clone) 1284 1285 // Step 19. Fix up the stack of open elements. 1286 p.oe.remove(formattingElement) 1287 p.oe.insert(p.oe.index(furthestBlock)+1, clone) 1288 } 1289} 1290 1291// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM. 1292// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content 1293// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign 1294func (p *parser) inBodyEndTagOther(tagAtom a.Atom) { 1295 for i := len(p.oe) - 1; i >= 0; i-- { 1296 if p.oe[i].DataAtom == tagAtom { 1297 p.oe = p.oe[:i] 1298 break 1299 } 1300 if isSpecialElement(p.oe[i]) { 1301 break 1302 } 1303 } 1304} 1305 1306// Section 12.2.6.4.8. 1307func textIM(p *parser) bool { 1308 switch p.tok.Type { 1309 case ErrorToken: 1310 p.oe.pop() 1311 case TextToken: 1312 d := p.tok.Data 1313 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil { 1314 // Ignore a newline at the start of a <textarea> block. 1315 if d != "" && d[0] == '\r' { 1316 d = d[1:] 1317 } 1318 if d != "" && d[0] == '\n' { 1319 d = d[1:] 1320 } 1321 } 1322 if d == "" { 1323 return true 1324 } 1325 p.addText(d) 1326 return true 1327 case EndTagToken: 1328 p.oe.pop() 1329 } 1330 p.im = p.originalIM 1331 p.originalIM = nil 1332 return p.tok.Type == EndTagToken 1333} 1334 1335// Section 12.2.6.4.9. 1336func inTableIM(p *parser) bool { 1337 switch p.tok.Type { 1338 case TextToken: 1339 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1) 1340 switch p.oe.top().DataAtom { 1341 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: 1342 if strings.Trim(p.tok.Data, whitespace) == "" { 1343 p.addText(p.tok.Data) 1344 return true 1345 } 1346 } 1347 case StartTagToken: 1348 switch p.tok.DataAtom { 1349 case a.Caption: 1350 p.clearStackToContext(tableScope) 1351 p.afe = append(p.afe, &scopeMarker) 1352 p.addElement() 1353 p.im = inCaptionIM 1354 return true 1355 case a.Colgroup: 1356 p.clearStackToContext(tableScope) 1357 p.addElement() 1358 p.im = inColumnGroupIM 1359 return true 1360 case a.Col: 1361 p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String()) 1362 return false 1363 case a.Tbody, a.Tfoot, a.Thead: 1364 p.clearStackToContext(tableScope) 1365 p.addElement() 1366 p.im = inTableBodyIM 1367 return true 1368 case a.Td, a.Th, a.Tr: 1369 p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String()) 1370 return false 1371 case a.Table: 1372 if p.popUntil(tableScope, a.Table) { 1373 p.resetInsertionMode() 1374 return false 1375 } 1376 // Ignore the token. 1377 return true 1378 case a.Style, a.Script, a.Template: 1379 return inHeadIM(p) 1380 case a.Input: 1381 for _, t := range p.tok.Attr { 1382 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" { 1383 p.addElement() 1384 p.oe.pop() 1385 return true 1386 } 1387 } 1388 // Otherwise drop down to the default action. 1389 case a.Form: 1390 if p.oe.contains(a.Template) || p.form != nil { 1391 // Ignore the token. 1392 return true 1393 } 1394 p.addElement() 1395 p.form = p.oe.pop() 1396 case a.Select: 1397 p.reconstructActiveFormattingElements() 1398 switch p.top().DataAtom { 1399 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: 1400 p.fosterParenting = true 1401 } 1402 p.addElement() 1403 p.fosterParenting = false 1404 p.framesetOK = false 1405 p.im = inSelectInTableIM 1406 return true 1407 } 1408 case EndTagToken: 1409 switch p.tok.DataAtom { 1410 case a.Table: 1411 if p.popUntil(tableScope, a.Table) { 1412 p.resetInsertionMode() 1413 return true 1414 } 1415 // Ignore the token. 1416 return true 1417 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: 1418 // Ignore the token. 1419 return true 1420 case a.Template: 1421 return inHeadIM(p) 1422 } 1423 case CommentToken: 1424 p.addChild(&Node{ 1425 Type: CommentNode, 1426 Data: p.tok.Data, 1427 }) 1428 return true 1429 case DoctypeToken: 1430 // Ignore the token. 1431 return true 1432 case ErrorToken: 1433 return inBodyIM(p) 1434 } 1435 1436 p.fosterParenting = true 1437 defer func() { p.fosterParenting = false }() 1438 1439 return inBodyIM(p) 1440} 1441 1442// Section 12.2.6.4.11. 1443func inCaptionIM(p *parser) bool { 1444 switch p.tok.Type { 1445 case StartTagToken: 1446 switch p.tok.DataAtom { 1447 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr: 1448 if p.popUntil(tableScope, a.Caption) { 1449 p.clearActiveFormattingElements() 1450 p.im = inTableIM 1451 return false 1452 } else { 1453 // Ignore the token. 1454 return true 1455 } 1456 case a.Select: 1457 p.reconstructActiveFormattingElements() 1458 p.addElement() 1459 p.framesetOK = false 1460 p.im = inSelectInTableIM 1461 return true 1462 } 1463 case EndTagToken: 1464 switch p.tok.DataAtom { 1465 case a.Caption: 1466 if p.popUntil(tableScope, a.Caption) { 1467 p.clearActiveFormattingElements() 1468 p.im = inTableIM 1469 } 1470 return true 1471 case a.Table: 1472 if p.popUntil(tableScope, a.Caption) { 1473 p.clearActiveFormattingElements() 1474 p.im = inTableIM 1475 return false 1476 } else { 1477 // Ignore the token. 1478 return true 1479 } 1480 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: 1481 // Ignore the token. 1482 return true 1483 } 1484 } 1485 return inBodyIM(p) 1486} 1487 1488// Section 12.2.6.4.12. 1489func inColumnGroupIM(p *parser) bool { 1490 switch p.tok.Type { 1491 case TextToken: 1492 s := strings.TrimLeft(p.tok.Data, whitespace) 1493 if len(s) < len(p.tok.Data) { 1494 // Add the initial whitespace to the current node. 1495 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) 1496 if s == "" { 1497 return true 1498 } 1499 p.tok.Data = s 1500 } 1501 case CommentToken: 1502 p.addChild(&Node{ 1503 Type: CommentNode, 1504 Data: p.tok.Data, 1505 }) 1506 return true 1507 case DoctypeToken: 1508 // Ignore the token. 1509 return true 1510 case StartTagToken: 1511 switch p.tok.DataAtom { 1512 case a.Html: 1513 return inBodyIM(p) 1514 case a.Col: 1515 p.addElement() 1516 p.oe.pop() 1517 p.acknowledgeSelfClosingTag() 1518 return true 1519 case a.Template: 1520 return inHeadIM(p) 1521 } 1522 case EndTagToken: 1523 switch p.tok.DataAtom { 1524 case a.Colgroup: 1525 if p.oe.top().DataAtom == a.Colgroup { 1526 p.oe.pop() 1527 p.im = inTableIM 1528 } 1529 return true 1530 case a.Col: 1531 // Ignore the token. 1532 return true 1533 case a.Template: 1534 return inHeadIM(p) 1535 } 1536 case ErrorToken: 1537 return inBodyIM(p) 1538 } 1539 if p.oe.top().DataAtom != a.Colgroup { 1540 return true 1541 } 1542 p.oe.pop() 1543 p.im = inTableIM 1544 return false 1545} 1546 1547// Section 12.2.6.4.13. 1548func inTableBodyIM(p *parser) bool { 1549 switch p.tok.Type { 1550 case StartTagToken: 1551 switch p.tok.DataAtom { 1552 case a.Tr: 1553 p.clearStackToContext(tableBodyScope) 1554 p.addElement() 1555 p.im = inRowIM 1556 return true 1557 case a.Td, a.Th: 1558 p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String()) 1559 return false 1560 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead: 1561 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) { 1562 p.im = inTableIM 1563 return false 1564 } 1565 // Ignore the token. 1566 return true 1567 } 1568 case EndTagToken: 1569 switch p.tok.DataAtom { 1570 case a.Tbody, a.Tfoot, a.Thead: 1571 if p.elementInScope(tableScope, p.tok.DataAtom) { 1572 p.clearStackToContext(tableBodyScope) 1573 p.oe.pop() 1574 p.im = inTableIM 1575 } 1576 return true 1577 case a.Table: 1578 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) { 1579 p.im = inTableIM 1580 return false 1581 } 1582 // Ignore the token. 1583 return true 1584 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr: 1585 // Ignore the token. 1586 return true 1587 } 1588 case CommentToken: 1589 p.addChild(&Node{ 1590 Type: CommentNode, 1591 Data: p.tok.Data, 1592 }) 1593 return true 1594 } 1595 1596 return inTableIM(p) 1597} 1598 1599// Section 12.2.6.4.14. 1600func inRowIM(p *parser) bool { 1601 switch p.tok.Type { 1602 case StartTagToken: 1603 switch p.tok.DataAtom { 1604 case a.Td, a.Th: 1605 p.clearStackToContext(tableRowScope) 1606 p.addElement() 1607 p.afe = append(p.afe, &scopeMarker) 1608 p.im = inCellIM 1609 return true 1610 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr: 1611 if p.popUntil(tableScope, a.Tr) { 1612 p.im = inTableBodyIM 1613 return false 1614 } 1615 // Ignore the token. 1616 return true 1617 } 1618 case EndTagToken: 1619 switch p.tok.DataAtom { 1620 case a.Tr: 1621 if p.popUntil(tableScope, a.Tr) { 1622 p.im = inTableBodyIM 1623 return true 1624 } 1625 // Ignore the token. 1626 return true 1627 case a.Table: 1628 if p.popUntil(tableScope, a.Tr) { 1629 p.im = inTableBodyIM 1630 return false 1631 } 1632 // Ignore the token. 1633 return true 1634 case a.Tbody, a.Tfoot, a.Thead: 1635 if p.elementInScope(tableScope, p.tok.DataAtom) { 1636 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String()) 1637 return false 1638 } 1639 // Ignore the token. 1640 return true 1641 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th: 1642 // Ignore the token. 1643 return true 1644 } 1645 } 1646 1647 return inTableIM(p) 1648} 1649 1650// Section 12.2.6.4.15. 1651func inCellIM(p *parser) bool { 1652 switch p.tok.Type { 1653 case StartTagToken: 1654 switch p.tok.DataAtom { 1655 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: 1656 if p.popUntil(tableScope, a.Td, a.Th) { 1657 // Close the cell and reprocess. 1658 p.clearActiveFormattingElements() 1659 p.im = inRowIM 1660 return false 1661 } 1662 // Ignore the token. 1663 return true 1664 case a.Select: 1665 p.reconstructActiveFormattingElements() 1666 p.addElement() 1667 p.framesetOK = false 1668 p.im = inSelectInTableIM 1669 return true 1670 } 1671 case EndTagToken: 1672 switch p.tok.DataAtom { 1673 case a.Td, a.Th: 1674 if !p.popUntil(tableScope, p.tok.DataAtom) { 1675 // Ignore the token. 1676 return true 1677 } 1678 p.clearActiveFormattingElements() 1679 p.im = inRowIM 1680 return true 1681 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html: 1682 // Ignore the token. 1683 return true 1684 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: 1685 if !p.elementInScope(tableScope, p.tok.DataAtom) { 1686 // Ignore the token. 1687 return true 1688 } 1689 // Close the cell and reprocess. 1690 p.popUntil(tableScope, a.Td, a.Th) 1691 p.clearActiveFormattingElements() 1692 p.im = inRowIM 1693 return false 1694 } 1695 } 1696 return inBodyIM(p) 1697} 1698 1699// Section 12.2.6.4.16. 1700func inSelectIM(p *parser) bool { 1701 switch p.tok.Type { 1702 case TextToken: 1703 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1)) 1704 case StartTagToken: 1705 switch p.tok.DataAtom { 1706 case a.Html: 1707 return inBodyIM(p) 1708 case a.Option: 1709 if p.top().DataAtom == a.Option { 1710 p.oe.pop() 1711 } 1712 p.addElement() 1713 case a.Optgroup: 1714 if p.top().DataAtom == a.Option { 1715 p.oe.pop() 1716 } 1717 if p.top().DataAtom == a.Optgroup { 1718 p.oe.pop() 1719 } 1720 p.addElement() 1721 case a.Select: 1722 p.tok.Type = EndTagToken 1723 return false 1724 case a.Input, a.Keygen, a.Textarea: 1725 if p.elementInScope(selectScope, a.Select) { 1726 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) 1727 return false 1728 } 1729 // In order to properly ignore <textarea>, we need to change the tokenizer mode. 1730 p.tokenizer.NextIsNotRawText() 1731 // Ignore the token. 1732 return true 1733 case a.Script, a.Template: 1734 return inHeadIM(p) 1735 } 1736 case EndTagToken: 1737 switch p.tok.DataAtom { 1738 case a.Option: 1739 if p.top().DataAtom == a.Option { 1740 p.oe.pop() 1741 } 1742 case a.Optgroup: 1743 i := len(p.oe) - 1 1744 if p.oe[i].DataAtom == a.Option { 1745 i-- 1746 } 1747 if p.oe[i].DataAtom == a.Optgroup { 1748 p.oe = p.oe[:i] 1749 } 1750 case a.Select: 1751 if p.popUntil(selectScope, a.Select) { 1752 p.resetInsertionMode() 1753 } 1754 case a.Template: 1755 return inHeadIM(p) 1756 } 1757 case CommentToken: 1758 p.addChild(&Node{ 1759 Type: CommentNode, 1760 Data: p.tok.Data, 1761 }) 1762 case DoctypeToken: 1763 // Ignore the token. 1764 return true 1765 case ErrorToken: 1766 return inBodyIM(p) 1767 } 1768 1769 return true 1770} 1771 1772// Section 12.2.6.4.17. 1773func inSelectInTableIM(p *parser) bool { 1774 switch p.tok.Type { 1775 case StartTagToken, EndTagToken: 1776 switch p.tok.DataAtom { 1777 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th: 1778 if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) { 1779 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) 1780 return false 1781 } else { 1782 // Ignore the token. 1783 return true 1784 } 1785 } 1786 } 1787 return inSelectIM(p) 1788} 1789 1790// Section 12.2.6.4.18. 1791func inTemplateIM(p *parser) bool { 1792 switch p.tok.Type { 1793 case TextToken, CommentToken, DoctypeToken: 1794 return inBodyIM(p) 1795 case StartTagToken: 1796 switch p.tok.DataAtom { 1797 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: 1798 return inHeadIM(p) 1799 case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead: 1800 p.templateStack.pop() 1801 p.templateStack = append(p.templateStack, inTableIM) 1802 p.im = inTableIM 1803 return false 1804 case a.Col: 1805 p.templateStack.pop() 1806 p.templateStack = append(p.templateStack, inColumnGroupIM) 1807 p.im = inColumnGroupIM 1808 return false 1809 case a.Tr: 1810 p.templateStack.pop() 1811 p.templateStack = append(p.templateStack, inTableBodyIM) 1812 p.im = inTableBodyIM 1813 return false 1814 case a.Td, a.Th: 1815 p.templateStack.pop() 1816 p.templateStack = append(p.templateStack, inRowIM) 1817 p.im = inRowIM 1818 return false 1819 default: 1820 p.templateStack.pop() 1821 p.templateStack = append(p.templateStack, inBodyIM) 1822 p.im = inBodyIM 1823 return false 1824 } 1825 case EndTagToken: 1826 switch p.tok.DataAtom { 1827 case a.Template: 1828 return inHeadIM(p) 1829 default: 1830 // Ignore the token. 1831 return true 1832 } 1833 case ErrorToken: 1834 if !p.oe.contains(a.Template) { 1835 // Ignore the token. 1836 return true 1837 } 1838 // TODO: remove this divergence from the HTML5 spec. 1839 // 1840 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 1841 p.generateImpliedEndTags() 1842 for i := len(p.oe) - 1; i >= 0; i-- { 1843 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template { 1844 p.oe = p.oe[:i] 1845 break 1846 } 1847 } 1848 p.clearActiveFormattingElements() 1849 p.templateStack.pop() 1850 p.resetInsertionMode() 1851 return false 1852 } 1853 return false 1854} 1855 1856// Section 12.2.6.4.19. 1857func afterBodyIM(p *parser) bool { 1858 switch p.tok.Type { 1859 case ErrorToken: 1860 // Stop parsing. 1861 return true 1862 case TextToken: 1863 s := strings.TrimLeft(p.tok.Data, whitespace) 1864 if len(s) == 0 { 1865 // It was all whitespace. 1866 return inBodyIM(p) 1867 } 1868 case StartTagToken: 1869 if p.tok.DataAtom == a.Html { 1870 return inBodyIM(p) 1871 } 1872 case EndTagToken: 1873 if p.tok.DataAtom == a.Html { 1874 if !p.fragment { 1875 p.im = afterAfterBodyIM 1876 } 1877 return true 1878 } 1879 case CommentToken: 1880 // The comment is attached to the <html> element. 1881 if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html { 1882 panic("html: bad parser state: <html> element not found, in the after-body insertion mode") 1883 } 1884 p.oe[0].AppendChild(&Node{ 1885 Type: CommentNode, 1886 Data: p.tok.Data, 1887 }) 1888 return true 1889 } 1890 p.im = inBodyIM 1891 return false 1892} 1893 1894// Section 12.2.6.4.20. 1895func inFramesetIM(p *parser) bool { 1896 switch p.tok.Type { 1897 case CommentToken: 1898 p.addChild(&Node{ 1899 Type: CommentNode, 1900 Data: p.tok.Data, 1901 }) 1902 case TextToken: 1903 // Ignore all text but whitespace. 1904 s := strings.Map(func(c rune) rune { 1905 switch c { 1906 case ' ', '\t', '\n', '\f', '\r': 1907 return c 1908 } 1909 return -1 1910 }, p.tok.Data) 1911 if s != "" { 1912 p.addText(s) 1913 } 1914 case StartTagToken: 1915 switch p.tok.DataAtom { 1916 case a.Html: 1917 return inBodyIM(p) 1918 case a.Frameset: 1919 p.addElement() 1920 case a.Frame: 1921 p.addElement() 1922 p.oe.pop() 1923 p.acknowledgeSelfClosingTag() 1924 case a.Noframes: 1925 return inHeadIM(p) 1926 } 1927 case EndTagToken: 1928 switch p.tok.DataAtom { 1929 case a.Frameset: 1930 if p.oe.top().DataAtom != a.Html { 1931 p.oe.pop() 1932 if p.oe.top().DataAtom != a.Frameset { 1933 p.im = afterFramesetIM 1934 return true 1935 } 1936 } 1937 } 1938 default: 1939 // Ignore the token. 1940 } 1941 return true 1942} 1943 1944// Section 12.2.6.4.21. 1945func afterFramesetIM(p *parser) bool { 1946 switch p.tok.Type { 1947 case CommentToken: 1948 p.addChild(&Node{ 1949 Type: CommentNode, 1950 Data: p.tok.Data, 1951 }) 1952 case TextToken: 1953 // Ignore all text but whitespace. 1954 s := strings.Map(func(c rune) rune { 1955 switch c { 1956 case ' ', '\t', '\n', '\f', '\r': 1957 return c 1958 } 1959 return -1 1960 }, p.tok.Data) 1961 if s != "" { 1962 p.addText(s) 1963 } 1964 case StartTagToken: 1965 switch p.tok.DataAtom { 1966 case a.Html: 1967 return inBodyIM(p) 1968 case a.Noframes: 1969 return inHeadIM(p) 1970 } 1971 case EndTagToken: 1972 switch p.tok.DataAtom { 1973 case a.Html: 1974 p.im = afterAfterFramesetIM 1975 return true 1976 } 1977 default: 1978 // Ignore the token. 1979 } 1980 return true 1981} 1982 1983// Section 12.2.6.4.22. 1984func afterAfterBodyIM(p *parser) bool { 1985 switch p.tok.Type { 1986 case ErrorToken: 1987 // Stop parsing. 1988 return true 1989 case TextToken: 1990 s := strings.TrimLeft(p.tok.Data, whitespace) 1991 if len(s) == 0 { 1992 // It was all whitespace. 1993 return inBodyIM(p) 1994 } 1995 case StartTagToken: 1996 if p.tok.DataAtom == a.Html { 1997 return inBodyIM(p) 1998 } 1999 case CommentToken: 2000 p.doc.AppendChild(&Node{ 2001 Type: CommentNode, 2002 Data: p.tok.Data, 2003 }) 2004 return true 2005 case DoctypeToken: 2006 return inBodyIM(p) 2007 } 2008 p.im = inBodyIM 2009 return false 2010} 2011 2012// Section 12.2.6.4.23. 2013func afterAfterFramesetIM(p *parser) bool { 2014 switch p.tok.Type { 2015 case CommentToken: 2016 p.doc.AppendChild(&Node{ 2017 Type: CommentNode, 2018 Data: p.tok.Data, 2019 }) 2020 case TextToken: 2021 // Ignore all text but whitespace. 2022 s := strings.Map(func(c rune) rune { 2023 switch c { 2024 case ' ', '\t', '\n', '\f', '\r': 2025 return c 2026 } 2027 return -1 2028 }, p.tok.Data) 2029 if s != "" { 2030 p.tok.Data = s 2031 return inBodyIM(p) 2032 } 2033 case StartTagToken: 2034 switch p.tok.DataAtom { 2035 case a.Html: 2036 return inBodyIM(p) 2037 case a.Noframes: 2038 return inHeadIM(p) 2039 } 2040 case DoctypeToken: 2041 return inBodyIM(p) 2042 default: 2043 // Ignore the token. 2044 } 2045 return true 2046} 2047 2048const whitespaceOrNUL = whitespace + "\x00" 2049 2050// Section 12.2.6.5 2051func parseForeignContent(p *parser) bool { 2052 switch p.tok.Type { 2053 case TextToken: 2054 if p.framesetOK { 2055 p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == "" 2056 } 2057 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1) 2058 p.addText(p.tok.Data) 2059 case CommentToken: 2060 p.addChild(&Node{ 2061 Type: CommentNode, 2062 Data: p.tok.Data, 2063 }) 2064 case StartTagToken: 2065 b := breakout[p.tok.Data] 2066 if p.tok.DataAtom == a.Font { 2067 loop: 2068 for _, attr := range p.tok.Attr { 2069 switch attr.Key { 2070 case "color", "face", "size": 2071 b = true 2072 break loop 2073 } 2074 } 2075 } 2076 if b { 2077 for i := len(p.oe) - 1; i >= 0; i-- { 2078 n := p.oe[i] 2079 if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) { 2080 p.oe = p.oe[:i+1] 2081 break 2082 } 2083 } 2084 return false 2085 } 2086 switch p.top().Namespace { 2087 case "math": 2088 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) 2089 case "svg": 2090 // Adjust SVG tag names. The tokenizer lower-cases tag names, but 2091 // SVG wants e.g. "foreignObject" with a capital second "O". 2092 if x := svgTagNameAdjustments[p.tok.Data]; x != "" { 2093 p.tok.DataAtom = a.Lookup([]byte(x)) 2094 p.tok.Data = x 2095 } 2096 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments) 2097 default: 2098 panic("html: bad parser state: unexpected namespace") 2099 } 2100 adjustForeignAttributes(p.tok.Attr) 2101 namespace := p.top().Namespace 2102 p.addElement() 2103 p.top().Namespace = namespace 2104 if namespace != "" { 2105 // Don't let the tokenizer go into raw text mode in foreign content 2106 // (e.g. in an SVG <title> tag). 2107 p.tokenizer.NextIsNotRawText() 2108 } 2109 if p.hasSelfClosingToken { 2110 p.oe.pop() 2111 p.acknowledgeSelfClosingTag() 2112 } 2113 case EndTagToken: 2114 for i := len(p.oe) - 1; i >= 0; i-- { 2115 if p.oe[i].Namespace == "" { 2116 return p.im(p) 2117 } 2118 if strings.EqualFold(p.oe[i].Data, p.tok.Data) { 2119 p.oe = p.oe[:i] 2120 break 2121 } 2122 } 2123 return true 2124 default: 2125 // Ignore the token. 2126 } 2127 return true 2128} 2129 2130// Section 12.2.6. 2131func (p *parser) inForeignContent() bool { 2132 if len(p.oe) == 0 { 2133 return false 2134 } 2135 n := p.oe[len(p.oe)-1] 2136 if n.Namespace == "" { 2137 return false 2138 } 2139 if mathMLTextIntegrationPoint(n) { 2140 if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark { 2141 return false 2142 } 2143 if p.tok.Type == TextToken { 2144 return false 2145 } 2146 } 2147 if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg { 2148 return false 2149 } 2150 if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) { 2151 return false 2152 } 2153 if p.tok.Type == ErrorToken { 2154 return false 2155 } 2156 return true 2157} 2158 2159// parseImpliedToken parses a token as though it had appeared in the parser's 2160// input. 2161func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) { 2162 realToken, selfClosing := p.tok, p.hasSelfClosingToken 2163 p.tok = Token{ 2164 Type: t, 2165 DataAtom: dataAtom, 2166 Data: data, 2167 } 2168 p.hasSelfClosingToken = false 2169 p.parseCurrentToken() 2170 p.tok, p.hasSelfClosingToken = realToken, selfClosing 2171} 2172 2173// parseCurrentToken runs the current token through the parsing routines 2174// until it is consumed. 2175func (p *parser) parseCurrentToken() { 2176 if p.tok.Type == SelfClosingTagToken { 2177 p.hasSelfClosingToken = true 2178 p.tok.Type = StartTagToken 2179 } 2180 2181 consumed := false 2182 for !consumed { 2183 if p.inForeignContent() { 2184 consumed = parseForeignContent(p) 2185 } else { 2186 consumed = p.im(p) 2187 } 2188 } 2189 2190 if p.hasSelfClosingToken { 2191 // This is a parse error, but ignore it. 2192 p.hasSelfClosingToken = false 2193 } 2194} 2195 2196func (p *parser) parse() error { 2197 // Iterate until EOF. Any other error will cause an early return. 2198 var err error 2199 for err != io.EOF { 2200 // CDATA sections are allowed only in foreign content. 2201 n := p.oe.top() 2202 p.tokenizer.AllowCDATA(n != nil && n.Namespace != "") 2203 // Read and parse the next token. 2204 p.tokenizer.Next() 2205 p.tok = p.tokenizer.Token() 2206 if p.tok.Type == ErrorToken { 2207 err = p.tokenizer.Err() 2208 if err != nil && err != io.EOF { 2209 return err 2210 } 2211 } 2212 p.parseCurrentToken() 2213 } 2214 return nil 2215} 2216 2217// Parse returns the parse tree for the HTML from the given Reader. 2218// 2219// It implements the HTML5 parsing algorithm 2220// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction), 2221// which is very complicated. The resultant tree can contain implicitly created 2222// nodes that have no explicit <tag> listed in r's data, and nodes' parents can 2223// differ from the nesting implied by a naive processing of start and end 2224// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped, 2225// with no corresponding node in the resulting tree. 2226// 2227// The input is assumed to be UTF-8 encoded. 2228func Parse(r io.Reader) (*Node, error) { 2229 p := &parser{ 2230 tokenizer: NewTokenizer(r), 2231 doc: &Node{ 2232 Type: DocumentNode, 2233 }, 2234 scripting: true, 2235 framesetOK: true, 2236 im: initialIM, 2237 } 2238 err := p.parse() 2239 if err != nil { 2240 return nil, err 2241 } 2242 return p.doc, nil 2243} 2244 2245// ParseFragment parses a fragment of HTML and returns the nodes that were 2246// found. If the fragment is the InnerHTML for an existing element, pass that 2247// element in context. 2248// 2249// It has the same intricacies as Parse. 2250func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { 2251 contextTag := "" 2252 if context != nil { 2253 if context.Type != ElementNode { 2254 return nil, errors.New("html: ParseFragment of non-element Node") 2255 } 2256 // The next check isn't just context.DataAtom.String() == context.Data because 2257 // it is valid to pass an element whose tag isn't a known atom. For example, 2258 // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent. 2259 if context.DataAtom != a.Lookup([]byte(context.Data)) { 2260 return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data) 2261 } 2262 contextTag = context.DataAtom.String() 2263 } 2264 p := &parser{ 2265 tokenizer: NewTokenizerFragment(r, contextTag), 2266 doc: &Node{ 2267 Type: DocumentNode, 2268 }, 2269 scripting: true, 2270 fragment: true, 2271 context: context, 2272 } 2273 2274 root := &Node{ 2275 Type: ElementNode, 2276 DataAtom: a.Html, 2277 Data: a.Html.String(), 2278 } 2279 p.doc.AppendChild(root) 2280 p.oe = nodeStack{root} 2281 if context != nil && context.DataAtom == a.Template { 2282 p.templateStack = append(p.templateStack, inTemplateIM) 2283 } 2284 p.resetInsertionMode() 2285 2286 for n := context; n != nil; n = n.Parent { 2287 if n.Type == ElementNode && n.DataAtom == a.Form { 2288 p.form = n 2289 break 2290 } 2291 } 2292 2293 err := p.parse() 2294 if err != nil { 2295 return nil, err 2296 } 2297 2298 parent := p.doc 2299 if context != nil { 2300 parent = root 2301 } 2302 2303 var result []*Node 2304 for c := parent.FirstChild; c != nil; { 2305 next := c.NextSibling 2306 parent.RemoveChild(c) 2307 result = append(result, c) 2308 c = next 2309 } 2310 return result, nil 2311} 2312