1package goquery 2 3import ( 4 "errors" 5 "io" 6 "net/http" 7 "net/url" 8 9 "github.com/andybalholm/cascadia" 10 11 "golang.org/x/net/html" 12) 13 14// Document represents an HTML document to be manipulated. Unlike jQuery, which 15// is loaded as part of a DOM document, and thus acts upon its containing 16// document, GoQuery doesn't know which HTML document to act upon. So it needs 17// to be told, and that's what the Document class is for. It holds the root 18// document node to manipulate, and can make selections on this document. 19type Document struct { 20 *Selection 21 Url *url.URL 22 rootNode *html.Node 23} 24 25// NewDocumentFromNode is a Document constructor that takes a root html Node 26// as argument. 27func NewDocumentFromNode(root *html.Node) *Document { 28 return newDocument(root, nil) 29} 30 31// NewDocument is a Document constructor that takes a string URL as argument. 32// It loads the specified document, parses it, and stores the root Document 33// node, ready to be manipulated. 34// 35// Deprecated: Use the net/http standard library package to make the request 36// and validate the response before calling goquery.NewDocumentFromReader 37// with the response's body. 38func NewDocument(url string) (*Document, error) { 39 // Load the URL 40 res, e := http.Get(url) 41 if e != nil { 42 return nil, e 43 } 44 return NewDocumentFromResponse(res) 45} 46 47// NewDocumentFromReader returns a Document from an io.Reader. 48// It returns an error as second value if the reader's data cannot be parsed 49// as html. It does not check if the reader is also an io.Closer, the 50// provided reader is never closed by this call. It is the responsibility 51// of the caller to close it if required. 52func NewDocumentFromReader(r io.Reader) (*Document, error) { 53 root, e := html.Parse(r) 54 if e != nil { 55 return nil, e 56 } 57 return newDocument(root, nil), nil 58} 59 60// NewDocumentFromResponse is another Document constructor that takes an http response as argument. 61// It loads the specified response's document, parses it, and stores the root Document 62// node, ready to be manipulated. The response's body is closed on return. 63// 64// Deprecated: Use goquery.NewDocumentFromReader with the response's body. 65func NewDocumentFromResponse(res *http.Response) (*Document, error) { 66 if res == nil { 67 return nil, errors.New("Response is nil") 68 } 69 defer res.Body.Close() 70 if res.Request == nil { 71 return nil, errors.New("Response.Request is nil") 72 } 73 74 // Parse the HTML into nodes 75 root, e := html.Parse(res.Body) 76 if e != nil { 77 return nil, e 78 } 79 80 // Create and fill the document 81 return newDocument(root, res.Request.URL), nil 82} 83 84// CloneDocument creates a deep-clone of a document. 85func CloneDocument(doc *Document) *Document { 86 return newDocument(cloneNode(doc.rootNode), doc.Url) 87} 88 89// Private constructor, make sure all fields are correctly filled. 90func newDocument(root *html.Node, url *url.URL) *Document { 91 // Create and fill the document 92 d := &Document{nil, url, root} 93 d.Selection = newSingleSelection(root, d) 94 return d 95} 96 97// Selection represents a collection of nodes matching some criteria. The 98// initial Selection can be created by using Document.Find, and then 99// manipulated using the jQuery-like chainable syntax and methods. 100type Selection struct { 101 Nodes []*html.Node 102 document *Document 103 prevSel *Selection 104} 105 106// Helper constructor to create an empty selection 107func newEmptySelection(doc *Document) *Selection { 108 return &Selection{nil, doc, nil} 109} 110 111// Helper constructor to create a selection of only one node 112func newSingleSelection(node *html.Node, doc *Document) *Selection { 113 return &Selection{[]*html.Node{node}, doc, nil} 114} 115 116// Matcher is an interface that defines the methods to match 117// HTML nodes against a compiled selector string. Cascadia's 118// Selector implements this interface. 119type Matcher interface { 120 Match(*html.Node) bool 121 MatchAll(*html.Node) []*html.Node 122 Filter([]*html.Node) []*html.Node 123} 124 125// compileMatcher compiles the selector string s and returns 126// the corresponding Matcher. If s is an invalid selector string, 127// it returns a Matcher that fails all matches. 128func compileMatcher(s string) Matcher { 129 cs, err := cascadia.Compile(s) 130 if err != nil { 131 return invalidMatcher{} 132 } 133 return cs 134} 135 136// invalidMatcher is a Matcher that always fails to match. 137type invalidMatcher struct{} 138 139func (invalidMatcher) Match(n *html.Node) bool { return false } 140func (invalidMatcher) MatchAll(n *html.Node) []*html.Node { return nil } 141func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil } 142