1package goquery 2 3import ( 4 "strings" 5 6 "golang.org/x/net/html" 7) 8 9// After applies the selector from the root document and inserts the matched elements 10// after the elements in the set of matched elements. 11// 12// If one of the matched elements in the selection is not currently in the 13// document, it's impossible to insert nodes after it, so it will be ignored. 14// 15// This follows the same rules as Selection.Append. 16func (s *Selection) After(selector string) *Selection { 17 return s.AfterMatcher(compileMatcher(selector)) 18} 19 20// AfterMatcher applies the matcher from the root document and inserts the matched elements 21// after the elements in the set of matched elements. 22// 23// If one of the matched elements in the selection is not currently in the 24// document, it's impossible to insert nodes after it, so it will be ignored. 25// 26// This follows the same rules as Selection.Append. 27func (s *Selection) AfterMatcher(m Matcher) *Selection { 28 return s.AfterNodes(m.MatchAll(s.document.rootNode)...) 29} 30 31// AfterSelection inserts the elements in the selection after each element in the set of matched 32// elements. 33// 34// This follows the same rules as Selection.Append. 35func (s *Selection) AfterSelection(sel *Selection) *Selection { 36 return s.AfterNodes(sel.Nodes...) 37} 38 39// AfterHtml parses the html and inserts it after the set of matched elements. 40// 41// This follows the same rules as Selection.Append. 42func (s *Selection) AfterHtml(htmlStr string) *Selection { 43 return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { 44 nextSibling := node.NextSibling 45 for _, n := range nodes { 46 if node.Parent != nil { 47 node.Parent.InsertBefore(n, nextSibling) 48 } 49 } 50 }) 51} 52 53// AfterNodes inserts the nodes after each element in the set of matched elements. 54// 55// This follows the same rules as Selection.Append. 56func (s *Selection) AfterNodes(ns ...*html.Node) *Selection { 57 return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) { 58 if sn.Parent != nil { 59 sn.Parent.InsertBefore(n, sn.NextSibling) 60 } 61 }) 62} 63 64// Append appends the elements specified by the selector to the end of each element 65// in the set of matched elements, following those rules: 66// 67// 1) The selector is applied to the root document. 68// 69// 2) Elements that are part of the document will be moved to the new location. 70// 71// 3) If there are multiple locations to append to, cloned nodes will be 72// appended to all target locations except the last one, which will be moved 73// as noted in (2). 74func (s *Selection) Append(selector string) *Selection { 75 return s.AppendMatcher(compileMatcher(selector)) 76} 77 78// AppendMatcher appends the elements specified by the matcher to the end of each element 79// in the set of matched elements. 80// 81// This follows the same rules as Selection.Append. 82func (s *Selection) AppendMatcher(m Matcher) *Selection { 83 return s.AppendNodes(m.MatchAll(s.document.rootNode)...) 84} 85 86// AppendSelection appends the elements in the selection to the end of each element 87// in the set of matched elements. 88// 89// This follows the same rules as Selection.Append. 90func (s *Selection) AppendSelection(sel *Selection) *Selection { 91 return s.AppendNodes(sel.Nodes...) 92} 93 94// AppendHtml parses the html and appends it to the set of matched elements. 95func (s *Selection) AppendHtml(htmlStr string) *Selection { 96 return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { 97 for _, n := range nodes { 98 node.AppendChild(n) 99 } 100 }) 101} 102 103// AppendNodes appends the specified nodes to each node in the set of matched elements. 104// 105// This follows the same rules as Selection.Append. 106func (s *Selection) AppendNodes(ns ...*html.Node) *Selection { 107 return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) { 108 sn.AppendChild(n) 109 }) 110} 111 112// Before inserts the matched elements before each element in the set of matched elements. 113// 114// This follows the same rules as Selection.Append. 115func (s *Selection) Before(selector string) *Selection { 116 return s.BeforeMatcher(compileMatcher(selector)) 117} 118 119// BeforeMatcher inserts the matched elements before each element in the set of matched elements. 120// 121// This follows the same rules as Selection.Append. 122func (s *Selection) BeforeMatcher(m Matcher) *Selection { 123 return s.BeforeNodes(m.MatchAll(s.document.rootNode)...) 124} 125 126// BeforeSelection inserts the elements in the selection before each element in the set of matched 127// elements. 128// 129// This follows the same rules as Selection.Append. 130func (s *Selection) BeforeSelection(sel *Selection) *Selection { 131 return s.BeforeNodes(sel.Nodes...) 132} 133 134// BeforeHtml parses the html and inserts it before the set of matched elements. 135// 136// This follows the same rules as Selection.Append. 137func (s *Selection) BeforeHtml(htmlStr string) *Selection { 138 return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { 139 for _, n := range nodes { 140 if node.Parent != nil { 141 node.Parent.InsertBefore(n, node) 142 } 143 } 144 }) 145} 146 147// BeforeNodes inserts the nodes before each element in the set of matched elements. 148// 149// This follows the same rules as Selection.Append. 150func (s *Selection) BeforeNodes(ns ...*html.Node) *Selection { 151 return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) { 152 if sn.Parent != nil { 153 sn.Parent.InsertBefore(n, sn) 154 } 155 }) 156} 157 158// Clone creates a deep copy of the set of matched nodes. The new nodes will not be 159// attached to the document. 160func (s *Selection) Clone() *Selection { 161 ns := newEmptySelection(s.document) 162 ns.Nodes = cloneNodes(s.Nodes) 163 return ns 164} 165 166// Empty removes all children nodes from the set of matched elements. 167// It returns the children nodes in a new Selection. 168func (s *Selection) Empty() *Selection { 169 var nodes []*html.Node 170 171 for _, n := range s.Nodes { 172 for c := n.FirstChild; c != nil; c = n.FirstChild { 173 n.RemoveChild(c) 174 nodes = append(nodes, c) 175 } 176 } 177 178 return pushStack(s, nodes) 179} 180 181// Prepend prepends the elements specified by the selector to each element in 182// the set of matched elements, following the same rules as Append. 183func (s *Selection) Prepend(selector string) *Selection { 184 return s.PrependMatcher(compileMatcher(selector)) 185} 186 187// PrependMatcher prepends the elements specified by the matcher to each 188// element in the set of matched elements. 189// 190// This follows the same rules as Selection.Append. 191func (s *Selection) PrependMatcher(m Matcher) *Selection { 192 return s.PrependNodes(m.MatchAll(s.document.rootNode)...) 193} 194 195// PrependSelection prepends the elements in the selection to each element in 196// the set of matched elements. 197// 198// This follows the same rules as Selection.Append. 199func (s *Selection) PrependSelection(sel *Selection) *Selection { 200 return s.PrependNodes(sel.Nodes...) 201} 202 203// PrependHtml parses the html and prepends it to the set of matched elements. 204func (s *Selection) PrependHtml(htmlStr string) *Selection { 205 return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { 206 firstChild := node.FirstChild 207 for _, n := range nodes { 208 node.InsertBefore(n, firstChild) 209 } 210 }) 211} 212 213// PrependNodes prepends the specified nodes to each node in the set of 214// matched elements. 215// 216// This follows the same rules as Selection.Append. 217func (s *Selection) PrependNodes(ns ...*html.Node) *Selection { 218 return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) { 219 // sn.FirstChild may be nil, in which case this functions like 220 // sn.AppendChild() 221 sn.InsertBefore(n, sn.FirstChild) 222 }) 223} 224 225// Remove removes the set of matched elements from the document. 226// It returns the same selection, now consisting of nodes not in the document. 227func (s *Selection) Remove() *Selection { 228 for _, n := range s.Nodes { 229 if n.Parent != nil { 230 n.Parent.RemoveChild(n) 231 } 232 } 233 234 return s 235} 236 237// RemoveFiltered removes from the current set of matched elements those that 238// match the selector filter. It returns the Selection of removed nodes. 239// 240// For example if the selection s contains "<h1>", "<h2>" and "<h3>" 241// and s.RemoveFiltered("h2") is called, only the "<h2>" node is removed 242// (and returned), while "<h1>" and "<h3>" are kept in the document. 243func (s *Selection) RemoveFiltered(selector string) *Selection { 244 return s.RemoveMatcher(compileMatcher(selector)) 245} 246 247// RemoveMatcher removes from the current set of matched elements those that 248// match the Matcher filter. It returns the Selection of removed nodes. 249// See RemoveFiltered for additional information. 250func (s *Selection) RemoveMatcher(m Matcher) *Selection { 251 return s.FilterMatcher(m).Remove() 252} 253 254// ReplaceWith replaces each element in the set of matched elements with the 255// nodes matched by the given selector. 256// It returns the removed elements. 257// 258// This follows the same rules as Selection.Append. 259func (s *Selection) ReplaceWith(selector string) *Selection { 260 return s.ReplaceWithMatcher(compileMatcher(selector)) 261} 262 263// ReplaceWithMatcher replaces each element in the set of matched elements with 264// the nodes matched by the given Matcher. 265// It returns the removed elements. 266// 267// This follows the same rules as Selection.Append. 268func (s *Selection) ReplaceWithMatcher(m Matcher) *Selection { 269 return s.ReplaceWithNodes(m.MatchAll(s.document.rootNode)...) 270} 271 272// ReplaceWithSelection replaces each element in the set of matched elements with 273// the nodes from the given Selection. 274// It returns the removed elements. 275// 276// This follows the same rules as Selection.Append. 277func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection { 278 return s.ReplaceWithNodes(sel.Nodes...) 279} 280 281// ReplaceWithHtml replaces each element in the set of matched elements with 282// the parsed HTML. 283// It returns the removed elements. 284// 285// This follows the same rules as Selection.Append. 286func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection { 287 s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { 288 nextSibling := node.NextSibling 289 for _, n := range nodes { 290 if node.Parent != nil { 291 node.Parent.InsertBefore(n, nextSibling) 292 } 293 } 294 }) 295 return s.Remove() 296} 297 298// ReplaceWithNodes replaces each element in the set of matched elements with 299// the given nodes. 300// It returns the removed elements. 301// 302// This follows the same rules as Selection.Append. 303func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection { 304 s.AfterNodes(ns...) 305 return s.Remove() 306} 307 308// SetHtml sets the html content of each element in the selection to 309// specified html string. 310func (s *Selection) SetHtml(htmlStr string) *Selection { 311 for _, context := range s.Nodes { 312 for c := context.FirstChild; c != nil; c = context.FirstChild { 313 context.RemoveChild(c) 314 } 315 } 316 return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { 317 for _, n := range nodes { 318 node.AppendChild(n) 319 } 320 }) 321} 322 323// SetText sets the content of each element in the selection to specified content. 324// The provided text string is escaped. 325func (s *Selection) SetText(text string) *Selection { 326 return s.SetHtml(html.EscapeString(text)) 327} 328 329// Unwrap removes the parents of the set of matched elements, leaving the matched 330// elements (and their siblings, if any) in their place. 331// It returns the original selection. 332func (s *Selection) Unwrap() *Selection { 333 s.Parent().Each(func(i int, ss *Selection) { 334 // For some reason, jquery allows unwrap to remove the <head> element, so 335 // allowing it here too. Same for <html>. Why it allows those elements to 336 // be unwrapped while not allowing body is a mystery to me. 337 if ss.Nodes[0].Data != "body" { 338 ss.ReplaceWithSelection(ss.Contents()) 339 } 340 }) 341 342 return s 343} 344 345// Wrap wraps each element in the set of matched elements inside the first 346// element matched by the given selector. The matched child is cloned before 347// being inserted into the document. 348// 349// It returns the original set of elements. 350func (s *Selection) Wrap(selector string) *Selection { 351 return s.WrapMatcher(compileMatcher(selector)) 352} 353 354// WrapMatcher wraps each element in the set of matched elements inside the 355// first element matched by the given matcher. The matched child is cloned 356// before being inserted into the document. 357// 358// It returns the original set of elements. 359func (s *Selection) WrapMatcher(m Matcher) *Selection { 360 return s.wrapNodes(m.MatchAll(s.document.rootNode)...) 361} 362 363// WrapSelection wraps each element in the set of matched elements inside the 364// first element in the given Selection. The element is cloned before being 365// inserted into the document. 366// 367// It returns the original set of elements. 368func (s *Selection) WrapSelection(sel *Selection) *Selection { 369 return s.wrapNodes(sel.Nodes...) 370} 371 372// WrapHtml wraps each element in the set of matched elements inside the inner- 373// most child of the given HTML. 374// 375// It returns the original set of elements. 376func (s *Selection) WrapHtml(htmlStr string) *Selection { 377 nodesMap := make(map[string][]*html.Node) 378 for _, context := range s.Nodes { 379 var parent *html.Node 380 if context.Parent != nil { 381 parent = context.Parent 382 } else { 383 parent = &html.Node{Type: html.ElementNode} 384 } 385 nodes, found := nodesMap[nodeName(parent)] 386 if !found { 387 nodes = parseHtmlWithContext(htmlStr, parent) 388 nodesMap[nodeName(parent)] = nodes 389 } 390 newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...) 391 } 392 return s 393} 394 395// WrapNode wraps each element in the set of matched elements inside the inner- 396// most child of the given node. The given node is copied before being inserted 397// into the document. 398// 399// It returns the original set of elements. 400func (s *Selection) WrapNode(n *html.Node) *Selection { 401 return s.wrapNodes(n) 402} 403 404func (s *Selection) wrapNodes(ns ...*html.Node) *Selection { 405 s.Each(func(i int, ss *Selection) { 406 ss.wrapAllNodes(ns...) 407 }) 408 409 return s 410} 411 412// WrapAll wraps a single HTML structure, matched by the given selector, around 413// all elements in the set of matched elements. The matched child is cloned 414// before being inserted into the document. 415// 416// It returns the original set of elements. 417func (s *Selection) WrapAll(selector string) *Selection { 418 return s.WrapAllMatcher(compileMatcher(selector)) 419} 420 421// WrapAllMatcher wraps a single HTML structure, matched by the given Matcher, 422// around all elements in the set of matched elements. The matched child is 423// cloned before being inserted into the document. 424// 425// It returns the original set of elements. 426func (s *Selection) WrapAllMatcher(m Matcher) *Selection { 427 return s.wrapAllNodes(m.MatchAll(s.document.rootNode)...) 428} 429 430// WrapAllSelection wraps a single HTML structure, the first node of the given 431// Selection, around all elements in the set of matched elements. The matched 432// child is cloned before being inserted into the document. 433// 434// It returns the original set of elements. 435func (s *Selection) WrapAllSelection(sel *Selection) *Selection { 436 return s.wrapAllNodes(sel.Nodes...) 437} 438 439// WrapAllHtml wraps the given HTML structure around all elements in the set of 440// matched elements. The matched child is cloned before being inserted into the 441// document. 442// 443// It returns the original set of elements. 444func (s *Selection) WrapAllHtml(htmlStr string) *Selection { 445 var context *html.Node 446 var nodes []*html.Node 447 if len(s.Nodes) > 0 { 448 context = s.Nodes[0] 449 if context.Parent != nil { 450 nodes = parseHtmlWithContext(htmlStr, context) 451 } else { 452 nodes = parseHtml(htmlStr) 453 } 454 } 455 return s.wrapAllNodes(nodes...) 456} 457 458func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection { 459 if len(ns) > 0 { 460 return s.WrapAllNode(ns[0]) 461 } 462 return s 463} 464 465// WrapAllNode wraps the given node around the first element in the Selection, 466// making all other nodes in the Selection children of the given node. The node 467// is cloned before being inserted into the document. 468// 469// It returns the original set of elements. 470func (s *Selection) WrapAllNode(n *html.Node) *Selection { 471 if s.Size() == 0 { 472 return s 473 } 474 475 wrap := cloneNode(n) 476 477 first := s.Nodes[0] 478 if first.Parent != nil { 479 first.Parent.InsertBefore(wrap, first) 480 first.Parent.RemoveChild(first) 481 } 482 483 for c := getFirstChildEl(wrap); c != nil; c = getFirstChildEl(wrap) { 484 wrap = c 485 } 486 487 newSingleSelection(wrap, s.document).AppendSelection(s) 488 489 return s 490} 491 492// WrapInner wraps an HTML structure, matched by the given selector, around the 493// content of element in the set of matched elements. The matched child is 494// cloned before being inserted into the document. 495// 496// It returns the original set of elements. 497func (s *Selection) WrapInner(selector string) *Selection { 498 return s.WrapInnerMatcher(compileMatcher(selector)) 499} 500 501// WrapInnerMatcher wraps an HTML structure, matched by the given selector, 502// around the content of element in the set of matched elements. The matched 503// child is cloned before being inserted into the document. 504// 505// It returns the original set of elements. 506func (s *Selection) WrapInnerMatcher(m Matcher) *Selection { 507 return s.wrapInnerNodes(m.MatchAll(s.document.rootNode)...) 508} 509 510// WrapInnerSelection wraps an HTML structure, matched by the given selector, 511// around the content of element in the set of matched elements. The matched 512// child is cloned before being inserted into the document. 513// 514// It returns the original set of elements. 515func (s *Selection) WrapInnerSelection(sel *Selection) *Selection { 516 return s.wrapInnerNodes(sel.Nodes...) 517} 518 519// WrapInnerHtml wraps an HTML structure, matched by the given selector, around 520// the content of element in the set of matched elements. The matched child is 521// cloned before being inserted into the document. 522// 523// It returns the original set of elements. 524func (s *Selection) WrapInnerHtml(htmlStr string) *Selection { 525 nodesMap := make(map[string][]*html.Node) 526 for _, context := range s.Nodes { 527 nodes, found := nodesMap[nodeName(context)] 528 if !found { 529 nodes = parseHtmlWithContext(htmlStr, context) 530 nodesMap[nodeName(context)] = nodes 531 } 532 newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...) 533 } 534 return s 535} 536 537// WrapInnerNode wraps an HTML structure, matched by the given selector, around 538// the content of element in the set of matched elements. The matched child is 539// cloned before being inserted into the document. 540// 541// It returns the original set of elements. 542func (s *Selection) WrapInnerNode(n *html.Node) *Selection { 543 return s.wrapInnerNodes(n) 544} 545 546func (s *Selection) wrapInnerNodes(ns ...*html.Node) *Selection { 547 if len(ns) == 0 { 548 return s 549 } 550 551 s.Each(func(i int, s *Selection) { 552 contents := s.Contents() 553 554 if contents.Size() > 0 { 555 contents.wrapAllNodes(ns...) 556 } else { 557 s.AppendNodes(cloneNode(ns[0])) 558 } 559 }) 560 561 return s 562} 563 564func parseHtml(h string) []*html.Node { 565 // Errors are only returned when the io.Reader returns any error besides 566 // EOF, but strings.Reader never will 567 nodes, err := html.ParseFragment(strings.NewReader(h), &html.Node{Type: html.ElementNode}) 568 if err != nil { 569 panic("goquery: failed to parse HTML: " + err.Error()) 570 } 571 return nodes 572} 573 574func parseHtmlWithContext(h string, context *html.Node) []*html.Node { 575 // Errors are only returned when the io.Reader returns any error besides 576 // EOF, but strings.Reader never will 577 nodes, err := html.ParseFragment(strings.NewReader(h), context) 578 if err != nil { 579 panic("goquery: failed to parse HTML: " + err.Error()) 580 } 581 return nodes 582} 583 584// Get the first child that is an ElementNode 585func getFirstChildEl(n *html.Node) *html.Node { 586 c := n.FirstChild 587 for c != nil && c.Type != html.ElementNode { 588 c = c.NextSibling 589 } 590 return c 591} 592 593// Deep copy a slice of nodes. 594func cloneNodes(ns []*html.Node) []*html.Node { 595 cns := make([]*html.Node, 0, len(ns)) 596 597 for _, n := range ns { 598 cns = append(cns, cloneNode(n)) 599 } 600 601 return cns 602} 603 604// Deep copy a node. The new node has clones of all the original node's 605// children but none of its parents or siblings. 606func cloneNode(n *html.Node) *html.Node { 607 nn := &html.Node{ 608 Type: n.Type, 609 DataAtom: n.DataAtom, 610 Data: n.Data, 611 Attr: make([]html.Attribute, len(n.Attr)), 612 } 613 614 copy(nn.Attr, n.Attr) 615 for c := n.FirstChild; c != nil; c = c.NextSibling { 616 nn.AppendChild(cloneNode(c)) 617 } 618 619 return nn 620} 621 622func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool, 623 f func(sn *html.Node, n *html.Node)) *Selection { 624 625 lasti := s.Size() - 1 626 627 // net.Html doesn't provide document fragments for insertion, so to get 628 // things in the correct order with After() and Prepend(), the callback 629 // needs to be called on the reverse of the nodes. 630 if reverse { 631 for i, j := 0, len(ns)-1; i < j; i, j = i+1, j-1 { 632 ns[i], ns[j] = ns[j], ns[i] 633 } 634 } 635 636 for i, sn := range s.Nodes { 637 for _, n := range ns { 638 if i != lasti { 639 f(sn, cloneNode(n)) 640 } else { 641 if n.Parent != nil { 642 n.Parent.RemoveChild(n) 643 } 644 f(sn, n) 645 } 646 } 647 } 648 649 return s 650} 651 652// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn. 653// The parsed nodes are inserted for each element of the selection. 654// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html. 655// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context. 656func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection { 657 // cache to avoid parsing the html for the same context multiple times 658 nodeCache := make(map[string][]*html.Node) 659 var context *html.Node 660 for _, n := range s.Nodes { 661 if isParent { 662 context = n.Parent 663 } else { 664 if n.Type != html.ElementNode { 665 continue 666 } 667 context = n 668 } 669 if context != nil { 670 nodes, found := nodeCache[nodeName(context)] 671 if !found { 672 nodes = parseHtmlWithContext(htmlStr, context) 673 nodeCache[nodeName(context)] = nodes 674 } 675 mergeFn(n, cloneNodes(nodes)) 676 } 677 } 678 return s 679} 680