1package goquery
2
3import (
4	"errors"
5	"io"
6	"net/http"
7	"net/url"
8
9	"github.com/andybalholm/cascadia"
10
11	"golang.org/x/net/html"
12)
13
14// Document represents an HTML document to be manipulated. Unlike jQuery, which
15// is loaded as part of a DOM document, and thus acts upon its containing
16// document, GoQuery doesn't know which HTML document to act upon. So it needs
17// to be told, and that's what the Document class is for. It holds the root
18// document node to manipulate, and can make selections on this document.
19type Document struct {
20	*Selection
21	Url      *url.URL
22	rootNode *html.Node
23}
24
25// NewDocumentFromNode is a Document constructor that takes a root html Node
26// as argument.
27func NewDocumentFromNode(root *html.Node) *Document {
28	return newDocument(root, nil)
29}
30
31// NewDocument is a Document constructor that takes a string URL as argument.
32// It loads the specified document, parses it, and stores the root Document
33// node, ready to be manipulated.
34//
35// Deprecated: Use the net/http standard library package to make the request
36// and validate the response before calling goquery.NewDocumentFromReader
37// with the response's body.
38func NewDocument(url string) (*Document, error) {
39	// Load the URL
40	res, e := http.Get(url)
41	if e != nil {
42		return nil, e
43	}
44	return NewDocumentFromResponse(res)
45}
46
47// NewDocumentFromReader returns a Document from an io.Reader.
48// It returns an error as second value if the reader's data cannot be parsed
49// as html. It does not check if the reader is also an io.Closer, the
50// provided reader is never closed by this call. It is the responsibility
51// of the caller to close it if required.
52func NewDocumentFromReader(r io.Reader) (*Document, error) {
53	root, e := html.Parse(r)
54	if e != nil {
55		return nil, e
56	}
57	return newDocument(root, nil), nil
58}
59
60// NewDocumentFromResponse is another Document constructor that takes an http response as argument.
61// It loads the specified response's document, parses it, and stores the root Document
62// node, ready to be manipulated. The response's body is closed on return.
63//
64// Deprecated: Use goquery.NewDocumentFromReader with the response's body.
65func NewDocumentFromResponse(res *http.Response) (*Document, error) {
66	if res == nil {
67		return nil, errors.New("Response is nil")
68	}
69	defer res.Body.Close()
70	if res.Request == nil {
71		return nil, errors.New("Response.Request is nil")
72	}
73
74	// Parse the HTML into nodes
75	root, e := html.Parse(res.Body)
76	if e != nil {
77		return nil, e
78	}
79
80	// Create and fill the document
81	return newDocument(root, res.Request.URL), nil
82}
83
84// CloneDocument creates a deep-clone of a document.
85func CloneDocument(doc *Document) *Document {
86	return newDocument(cloneNode(doc.rootNode), doc.Url)
87}
88
89// Private constructor, make sure all fields are correctly filled.
90func newDocument(root *html.Node, url *url.URL) *Document {
91	// Create and fill the document
92	d := &Document{nil, url, root}
93	d.Selection = newSingleSelection(root, d)
94	return d
95}
96
97// Selection represents a collection of nodes matching some criteria. The
98// initial Selection can be created by using Document.Find, and then
99// manipulated using the jQuery-like chainable syntax and methods.
100type Selection struct {
101	Nodes    []*html.Node
102	document *Document
103	prevSel  *Selection
104}
105
106// Helper constructor to create an empty selection
107func newEmptySelection(doc *Document) *Selection {
108	return &Selection{nil, doc, nil}
109}
110
111// Helper constructor to create a selection of only one node
112func newSingleSelection(node *html.Node, doc *Document) *Selection {
113	return &Selection{[]*html.Node{node}, doc, nil}
114}
115
116// Matcher is an interface that defines the methods to match
117// HTML nodes against a compiled selector string. Cascadia's
118// Selector implements this interface.
119type Matcher interface {
120	Match(*html.Node) bool
121	MatchAll(*html.Node) []*html.Node
122	Filter([]*html.Node) []*html.Node
123}
124
125// compileMatcher compiles the selector string s and returns
126// the corresponding Matcher. If s is an invalid selector string,
127// it returns a Matcher that fails all matches.
128func compileMatcher(s string) Matcher {
129	cs, err := cascadia.Compile(s)
130	if err != nil {
131		return invalidMatcher{}
132	}
133	return cs
134}
135
136// invalidMatcher is a Matcher that always fails to match.
137type invalidMatcher struct{}
138
139func (invalidMatcher) Match(n *html.Node) bool             { return false }
140func (invalidMatcher) MatchAll(n *html.Node) []*html.Node  { return nil }
141func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }
142