1package goquery
2
3import (
4	"errors"
5	"io"
6	"net/http"
7	"net/url"
8
9	"github.com/andybalholm/cascadia"
10	"golang.org/x/net/html"
11)
12
13// Document represents an HTML document to be manipulated. Unlike jQuery, which
14// is loaded as part of a DOM document, and thus acts upon its containing
15// document, GoQuery doesn't know which HTML document to act upon. So it needs
16// to be told, and that's what the Document class is for. It holds the root
17// document node to manipulate, and can make selections on this document.
18type Document struct {
19	*Selection
20	Url      *url.URL
21	rootNode *html.Node
22}
23
24// NewDocumentFromNode is a Document constructor that takes a root html Node
25// as argument.
26func NewDocumentFromNode(root *html.Node) *Document {
27	return newDocument(root, nil)
28}
29
30// NewDocument is a Document constructor that takes a string URL as argument.
31// It loads the specified document, parses it, and stores the root Document
32// node, ready to be manipulated.
33//
34// Deprecated: Use the net/http standard library package to make the request
35// and validate the response before calling goquery.NewDocumentFromReader
36// with the response's body.
37func NewDocument(url string) (*Document, error) {
38	// Load the URL
39	res, e := http.Get(url)
40	if e != nil {
41		return nil, e
42	}
43	return NewDocumentFromResponse(res)
44}
45
46// NewDocumentFromReader returns a Document from an io.Reader.
47// It returns an error as second value if the reader's data cannot be parsed
48// as html. It does not check if the reader is also an io.Closer, the
49// provided reader is never closed by this call. It is the responsibility
50// of the caller to close it if required.
51func NewDocumentFromReader(r io.Reader) (*Document, error) {
52	root, e := html.Parse(r)
53	if e != nil {
54		return nil, e
55	}
56	return newDocument(root, nil), nil
57}
58
59// NewDocumentFromResponse is another Document constructor that takes an http response as argument.
60// It loads the specified response's document, parses it, and stores the root Document
61// node, ready to be manipulated. The response's body is closed on return.
62//
63// Deprecated: Use goquery.NewDocumentFromReader with the response's body.
64func NewDocumentFromResponse(res *http.Response) (*Document, error) {
65	if res == nil {
66		return nil, errors.New("Response is nil")
67	}
68	defer res.Body.Close()
69	if res.Request == nil {
70		return nil, errors.New("Response.Request is nil")
71	}
72
73	// Parse the HTML into nodes
74	root, e := html.Parse(res.Body)
75	if e != nil {
76		return nil, e
77	}
78
79	// Create and fill the document
80	return newDocument(root, res.Request.URL), nil
81}
82
83// CloneDocument creates a deep-clone of a document.
84func CloneDocument(doc *Document) *Document {
85	return newDocument(cloneNode(doc.rootNode), doc.Url)
86}
87
88// Private constructor, make sure all fields are correctly filled.
89func newDocument(root *html.Node, url *url.URL) *Document {
90	// Create and fill the document
91	d := &Document{nil, url, root}
92	d.Selection = newSingleSelection(root, d)
93	return d
94}
95
96// Selection represents a collection of nodes matching some criteria. The
97// initial Selection can be created by using Document.Find, and then
98// manipulated using the jQuery-like chainable syntax and methods.
99type Selection struct {
100	Nodes    []*html.Node
101	document *Document
102	prevSel  *Selection
103}
104
105// Helper constructor to create an empty selection
106func newEmptySelection(doc *Document) *Selection {
107	return &Selection{nil, doc, nil}
108}
109
110// Helper constructor to create a selection of only one node
111func newSingleSelection(node *html.Node, doc *Document) *Selection {
112	return &Selection{[]*html.Node{node}, doc, nil}
113}
114
115// Matcher is an interface that defines the methods to match
116// HTML nodes against a compiled selector string. Cascadia's
117// Selector implements this interface.
118type Matcher interface {
119	Match(*html.Node) bool
120	MatchAll(*html.Node) []*html.Node
121	Filter([]*html.Node) []*html.Node
122}
123
124// Single compiles a selector string to a Matcher that stops after the first
125// match is found.
126//
127// By default, Selection.Find and other functions that accept a selector string
128// to select nodes will use all matches corresponding to that selector. By
129// using the Matcher returned by Single, at most the first match will be
130// selected.
131//
132// For example, those two statements are semantically equivalent:
133//
134//     sel1 := doc.Find("a").First()
135//     sel2 := doc.FindMatcher(goquery.Single("a"))
136//
137// The one using Single is optimized to be potentially much faster on large
138// documents.
139//
140// Only the behaviour of the MatchAll method of the Matcher interface is
141// altered compared to standard Matchers. This means that the single-selection
142// property of the Matcher only applies for Selection methods where the Matcher
143// is used to select nodes, not to filter or check if a node matches the
144// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
145// FilterMatcher(Single("div")) will still result in a Selection with multiple
146// "div"s if there were many "div"s in the Selection to begin with).
147func Single(selector string) Matcher {
148	return singleMatcher{compileMatcher(selector)}
149}
150
151// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
152// after the first match is found.
153//
154// See the documentation of function Single for more details.
155func SingleMatcher(m Matcher) Matcher {
156	if _, ok := m.(singleMatcher); ok {
157		// m is already a singleMatcher
158		return m
159	}
160	return singleMatcher{m}
161}
162
163// compileMatcher compiles the selector string s and returns
164// the corresponding Matcher. If s is an invalid selector string,
165// it returns a Matcher that fails all matches.
166func compileMatcher(s string) Matcher {
167	cs, err := cascadia.Compile(s)
168	if err != nil {
169		return invalidMatcher{}
170	}
171	return cs
172}
173
174type singleMatcher struct {
175	Matcher
176}
177
178func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
179	// Optimized version - stops finding at the first match (cascadia-compiled
180	// matchers all use this code path).
181	if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
182		node := mm.MatchFirst(n)
183		if node == nil {
184			return nil
185		}
186		return []*html.Node{node}
187	}
188
189	// Fallback version, for e.g. test mocks that don't provide the MatchFirst
190	// method.
191	nodes := m.Matcher.MatchAll(n)
192	if len(nodes) > 0 {
193		return nodes[:1:1]
194	}
195	return nil
196}
197
198// invalidMatcher is a Matcher that always fails to match.
199type invalidMatcher struct{}
200
201func (invalidMatcher) Match(n *html.Node) bool             { return false }
202func (invalidMatcher) MatchAll(n *html.Node) []*html.Node  { return nil }
203func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }
204