1css_to_xpath <- function(selector, prefix = "descendant-or-self::", translator = "generic") {
2    if (missing(selector) || is.null(selector))
3        stop("A valid selector (character vector) must be provided.")
4
5    if (!is.character(selector))
6        stop("The 'selector' argument must be a character vector")
7    if (!is.character(prefix))
8        stop("The 'prefix' argument must be a character vector")
9    if (!is.character(translator))
10        stop("The 'translator' argument must be a character vector")
11
12    if (anyNA(selector)) {
13        warning("NA values were found in the 'selector' argument, they have been removed")
14        selector <- selector[!is.na(selector)]
15    }
16
17    if (anyNA(prefix)) {
18        warning("NA values were found in the 'prefix' argument, they have been removed")
19        prefix <- prefix[!is.na(prefix)]
20    }
21
22    if (anyNA(translator)) {
23        warning("NA values were found in the 'translator' argument, they have been removed")
24        translator <- translator[!is.na(translator)]
25    }
26
27    zeroLengthArgs <- character(0)
28    if (!length(selector))
29        zeroLengthArgs <- c(zeroLengthArgs, "selector")
30    if (!length(prefix))
31        zeroLengthArgs <- c(zeroLengthArgs, "prefix")
32    if (!length(translator))
33        zeroLengthArgs <- c(zeroLengthArgs, "translator")
34
35    if (length(zeroLengthArgs)) {
36        plural <- if (length(zeroLengthArgs) > 1) "s" else ""
37        stop("Zero length character vector found for the following argument",
38             plural,
39             ": ",
40             paste0(zeroLengthArgs, collapse = ", "))
41    }
42
43    translator <- sapply(translator, function(tran) {
44        match.arg(tolower(tran), c("generic", "html", "xhtml"))
45    })
46
47    maxArgLength <- max(length(selector), length(prefix), length(translator))
48    selector <- rep(selector, length.out = maxArgLength)
49    prefix <- rep(prefix, length.out = maxArgLength)
50    translator <- rep(translator, length.out = maxArgLength)
51
52    results <- character(maxArgLength)
53    for (i in seq_len(maxArgLength)) {
54        sel <- selector[i]
55        pref <- prefix[i]
56        trans <- translator[i]
57
58        tran <- if (trans == "html") {
59            HTMLTranslator$new()
60        } else if (trans == "xhtml") {
61            HTMLTranslator$new(xhtml = TRUE)
62        } else {
63            GenericTranslator$new()
64        }
65
66        results[i] <- tran$css_to_xpath(sel, pref)
67    }
68
69    as.character(results)
70}
71
72querySelector <- function(doc, selector, ns = NULL, ...) {
73    UseMethod("querySelector", doc)
74}
75
76querySelectorAll <- function(doc, selector, ns = NULL, ...) {
77    UseMethod("querySelectorAll", doc)
78}
79
80querySelectorNS <- function(doc, selector, ns,
81                            prefix = "descendant-or-self::", ...) {
82    UseMethod("querySelectorNS", doc)
83}
84
85querySelectorAllNS <- function(doc, selector, ns,
86                               prefix = "descendant-or-self::", ...) {
87    UseMethod("querySelectorAllNS", doc)
88}
89
90querySelector.default <- function(doc, selector, ns = NULL, ...) {
91    stop("The object given to querySelector() is not an 'XML' or 'xml2' document or node.")
92}
93
94querySelectorAll.default <- function(doc, selector, ns = NULL, ...) {
95    stop("The object given to querySelectorAll() is not an 'XML' or 'xml2' document or node.")
96}
97
98querySelectorNS.default <- function(doc, selector, ns,
99                                    prefix = "descendant-or-self::", ...) {
100    stop("The object given to querySelectorNS() is not an 'XML' or 'xml2' document or node.")
101}
102
103querySelectorAllNS.default <- function(doc, selector, ns,
104                                    prefix = "descendant-or-self::", ...) {
105    stop("The object given to querySelectorAllNS() is not an 'XML' or 'xml2' document or node.")
106}
107
108querySelector.XMLInternalNode     <-
109querySelector.XMLInternalDocument <- function(doc, selector, ns = NULL, ...) {
110    if (missing(selector))
111        stop("A valid selector (character vector) must be provided.")
112    results <- querySelectorAll(doc, selector, ns, ...)
113    if (length(results))
114        results[[1]]
115    else
116        NULL
117}
118
119querySelectorAll.XMLInternalNode <- function(doc, selector, ns = NULL, ...) {
120    if (missing(selector))
121        stop("A valid selector (character vector) must be provided.")
122    xpath <- css_to_xpath(selector, ...)
123    if (!is.null(ns)) {
124        ns <- formatNS(ns)
125        XML::getNodeSet(doc, xpath, ns)
126    } else {
127        XML::getNodeSet(doc, xpath)
128    }
129}
130
131querySelectorAll.XMLInternalDocument <- function(doc, selector, ns = NULL, ...) {
132    if (missing(selector))
133        stop("A valid selector (character vector) must be provided.")
134    doc <- XML::xmlRoot(doc)
135    querySelectorAll(doc, selector, ns, ...)
136}
137
138querySelectorNS.XMLInternalNode     <-
139querySelectorNS.XMLInternalDocument <- function(doc, selector, ns,
140                                                prefix = "descendant-or-self::", ...) {
141    if (missing(selector))
142        stop("A valid selector (character vector) must be provided.")
143    if (missing(ns) || !length(ns))
144        stop("A namespace must be provided.")
145    ns <- formatNS(ns)
146    prefix <- formatNSPrefix(ns, prefix)
147    querySelector(doc, selector, ns, prefix = prefix, ...)
148}
149
150querySelectorAllNS.XMLInternalNode     <-
151querySelectorAllNS.XMLInternalDocument <- function(doc, selector, ns,
152                                                   prefix = "descendant-or-self::", ...) {
153    if (missing(selector))
154        stop("A valid selector (character vector) must be provided.")
155    if (missing(ns) || !length(ns))
156        stop("A namespace must be provided.")
157    ns <- formatNS(ns)
158    prefix <- formatNSPrefix(ns, prefix)
159    querySelectorAll(doc, selector, ns, prefix = prefix, ...)
160}
161
162querySelector.xml_node <- function(doc, selector, ns = NULL, ...) {
163    if (missing(selector))
164        stop("A valid selector (character vector) must be provided.")
165    if (is.null(ns))
166        ns <- xml2::xml_ns(doc)
167    validateNS(ns)
168    xpath <- css_to_xpath(selector, ...)
169    result <- xml2::xml_find_first(doc, xpath, ns)
170    if (length(result))
171        result
172    else
173        NULL
174}
175
176querySelectorAll.xml_node <- function(doc, selector, ns = NULL, ...) {
177    if (missing(selector))
178        stop("A valid selector (character vector) must be provided.")
179    if (is.null(ns))
180        ns <- xml2::xml_ns(doc)
181    validateNS(ns)
182    xpath <- css_to_xpath(selector, ...)
183    xml2::xml_find_all(doc, xpath, ns)
184}
185
186querySelectorNS.xml_node <- function(doc, selector, ns,
187                                     prefix = "descendant-or-self::", ...) {
188    if (missing(selector))
189        stop("A valid selector (character vector) must be provided.")
190    if (missing(ns) || is.null(ns) || !length(ns))
191        stop("A namespace must be provided.")
192    ns <- formatNS(ns)
193    prefix <- formatNSPrefix(ns, prefix)
194    querySelector(doc, selector, ns, prefix = prefix, ...)
195}
196
197querySelectorAllNS.xml_node <- function(doc, selector, ns,
198                                        prefix = "descendant-or-self::", ...) {
199    if (missing(selector))
200        stop("A valid selector (character vector) must be provided.")
201    if (missing(ns) || is.null(ns) || !length(ns))
202        stop("A namespace must be provided.")
203    ns <- formatNS(ns)
204    prefix <- formatNSPrefix(ns, prefix)
205    querySelectorAll(doc, selector, ns, prefix = prefix, ...)
206}
207
208# Takes a named vector or list and gives a named vector back
209formatNS <- function(ns) {
210    if (is.null(ns))
211        return(NULL)
212    if (!is.list(ns) && !is.character(ns))
213        stop("A namespace object must be either a named list or a named character vector.")
214    nsNames <- names(ns)
215    if (is.null(nsNames) || anyNA(nsNames) || !all(nzchar(nsNames)))
216        stop("The namespace object either missing some or all names for each element in its collection.")
217    ns <- unlist(ns)
218    if (!is.character(ns))
219        stop("The values in the namespace object must be a character vector.")
220    names(ns) <- nsNames
221    ns
222}
223
224formatNSPrefix <- function(ns, prefix) {
225    filters <- paste0("//", names(ns), ":*", collapse = "|")
226    prefix <- paste0("(", filters, ")/", prefix)
227    prefix
228}
229
230# Checks whether a vector is a valid character vector for namespaces
231validateNS <- function(ns) {
232    if (!is.character(ns))
233        stop("A namespace object must be comprised of characters")
234    nsNames <- names(ns)
235    if (is.null(nsNames) || anyNA(nsNames))
236        stop("The namespace object either missing some or all names for each element in its collection.")
237}
238