1css_to_xpath <- function(selector, prefix = "descendant-or-self::", translator = "generic") { 2 if (missing(selector) || is.null(selector)) 3 stop("A valid selector (character vector) must be provided.") 4 5 if (!is.character(selector)) 6 stop("The 'selector' argument must be a character vector") 7 if (!is.character(prefix)) 8 stop("The 'prefix' argument must be a character vector") 9 if (!is.character(translator)) 10 stop("The 'translator' argument must be a character vector") 11 12 if (anyNA(selector)) { 13 warning("NA values were found in the 'selector' argument, they have been removed") 14 selector <- selector[!is.na(selector)] 15 } 16 17 if (anyNA(prefix)) { 18 warning("NA values were found in the 'prefix' argument, they have been removed") 19 prefix <- prefix[!is.na(prefix)] 20 } 21 22 if (anyNA(translator)) { 23 warning("NA values were found in the 'translator' argument, they have been removed") 24 translator <- translator[!is.na(translator)] 25 } 26 27 zeroLengthArgs <- character(0) 28 if (!length(selector)) 29 zeroLengthArgs <- c(zeroLengthArgs, "selector") 30 if (!length(prefix)) 31 zeroLengthArgs <- c(zeroLengthArgs, "prefix") 32 if (!length(translator)) 33 zeroLengthArgs <- c(zeroLengthArgs, "translator") 34 35 if (length(zeroLengthArgs)) { 36 plural <- if (length(zeroLengthArgs) > 1) "s" else "" 37 stop("Zero length character vector found for the following argument", 38 plural, 39 ": ", 40 paste0(zeroLengthArgs, collapse = ", ")) 41 } 42 43 translator <- sapply(translator, function(tran) { 44 match.arg(tolower(tran), c("generic", "html", "xhtml")) 45 }) 46 47 maxArgLength <- max(length(selector), length(prefix), length(translator)) 48 selector <- rep(selector, length.out = maxArgLength) 49 prefix <- rep(prefix, length.out = maxArgLength) 50 translator <- rep(translator, length.out = maxArgLength) 51 52 results <- character(maxArgLength) 53 for (i in seq_len(maxArgLength)) { 54 sel <- selector[i] 55 pref <- prefix[i] 56 trans <- translator[i] 57 58 tran <- if (trans == "html") { 59 HTMLTranslator$new() 60 } else if (trans == "xhtml") { 61 HTMLTranslator$new(xhtml = TRUE) 62 } else { 63 GenericTranslator$new() 64 } 65 66 results[i] <- tran$css_to_xpath(sel, pref) 67 } 68 69 as.character(results) 70} 71 72querySelector <- function(doc, selector, ns = NULL, ...) { 73 UseMethod("querySelector", doc) 74} 75 76querySelectorAll <- function(doc, selector, ns = NULL, ...) { 77 UseMethod("querySelectorAll", doc) 78} 79 80querySelectorNS <- function(doc, selector, ns, 81 prefix = "descendant-or-self::", ...) { 82 UseMethod("querySelectorNS", doc) 83} 84 85querySelectorAllNS <- function(doc, selector, ns, 86 prefix = "descendant-or-self::", ...) { 87 UseMethod("querySelectorAllNS", doc) 88} 89 90querySelector.default <- function(doc, selector, ns = NULL, ...) { 91 stop("The object given to querySelector() is not an 'XML' or 'xml2' document or node.") 92} 93 94querySelectorAll.default <- function(doc, selector, ns = NULL, ...) { 95 stop("The object given to querySelectorAll() is not an 'XML' or 'xml2' document or node.") 96} 97 98querySelectorNS.default <- function(doc, selector, ns, 99 prefix = "descendant-or-self::", ...) { 100 stop("The object given to querySelectorNS() is not an 'XML' or 'xml2' document or node.") 101} 102 103querySelectorAllNS.default <- function(doc, selector, ns, 104 prefix = "descendant-or-self::", ...) { 105 stop("The object given to querySelectorAllNS() is not an 'XML' or 'xml2' document or node.") 106} 107 108querySelector.XMLInternalNode <- 109querySelector.XMLInternalDocument <- function(doc, selector, ns = NULL, ...) { 110 if (missing(selector)) 111 stop("A valid selector (character vector) must be provided.") 112 results <- querySelectorAll(doc, selector, ns, ...) 113 if (length(results)) 114 results[[1]] 115 else 116 NULL 117} 118 119querySelectorAll.XMLInternalNode <- function(doc, selector, ns = NULL, ...) { 120 if (missing(selector)) 121 stop("A valid selector (character vector) must be provided.") 122 xpath <- css_to_xpath(selector, ...) 123 if (!is.null(ns)) { 124 ns <- formatNS(ns) 125 XML::getNodeSet(doc, xpath, ns) 126 } else { 127 XML::getNodeSet(doc, xpath) 128 } 129} 130 131querySelectorAll.XMLInternalDocument <- function(doc, selector, ns = NULL, ...) { 132 if (missing(selector)) 133 stop("A valid selector (character vector) must be provided.") 134 doc <- XML::xmlRoot(doc) 135 querySelectorAll(doc, selector, ns, ...) 136} 137 138querySelectorNS.XMLInternalNode <- 139querySelectorNS.XMLInternalDocument <- function(doc, selector, ns, 140 prefix = "descendant-or-self::", ...) { 141 if (missing(selector)) 142 stop("A valid selector (character vector) must be provided.") 143 if (missing(ns) || !length(ns)) 144 stop("A namespace must be provided.") 145 ns <- formatNS(ns) 146 prefix <- formatNSPrefix(ns, prefix) 147 querySelector(doc, selector, ns, prefix = prefix, ...) 148} 149 150querySelectorAllNS.XMLInternalNode <- 151querySelectorAllNS.XMLInternalDocument <- function(doc, selector, ns, 152 prefix = "descendant-or-self::", ...) { 153 if (missing(selector)) 154 stop("A valid selector (character vector) must be provided.") 155 if (missing(ns) || !length(ns)) 156 stop("A namespace must be provided.") 157 ns <- formatNS(ns) 158 prefix <- formatNSPrefix(ns, prefix) 159 querySelectorAll(doc, selector, ns, prefix = prefix, ...) 160} 161 162querySelector.xml_node <- function(doc, selector, ns = NULL, ...) { 163 if (missing(selector)) 164 stop("A valid selector (character vector) must be provided.") 165 if (is.null(ns)) 166 ns <- xml2::xml_ns(doc) 167 validateNS(ns) 168 xpath <- css_to_xpath(selector, ...) 169 result <- xml2::xml_find_first(doc, xpath, ns) 170 if (length(result)) 171 result 172 else 173 NULL 174} 175 176querySelectorAll.xml_node <- function(doc, selector, ns = NULL, ...) { 177 if (missing(selector)) 178 stop("A valid selector (character vector) must be provided.") 179 if (is.null(ns)) 180 ns <- xml2::xml_ns(doc) 181 validateNS(ns) 182 xpath <- css_to_xpath(selector, ...) 183 xml2::xml_find_all(doc, xpath, ns) 184} 185 186querySelectorNS.xml_node <- function(doc, selector, ns, 187 prefix = "descendant-or-self::", ...) { 188 if (missing(selector)) 189 stop("A valid selector (character vector) must be provided.") 190 if (missing(ns) || is.null(ns) || !length(ns)) 191 stop("A namespace must be provided.") 192 ns <- formatNS(ns) 193 prefix <- formatNSPrefix(ns, prefix) 194 querySelector(doc, selector, ns, prefix = prefix, ...) 195} 196 197querySelectorAllNS.xml_node <- function(doc, selector, ns, 198 prefix = "descendant-or-self::", ...) { 199 if (missing(selector)) 200 stop("A valid selector (character vector) must be provided.") 201 if (missing(ns) || is.null(ns) || !length(ns)) 202 stop("A namespace must be provided.") 203 ns <- formatNS(ns) 204 prefix <- formatNSPrefix(ns, prefix) 205 querySelectorAll(doc, selector, ns, prefix = prefix, ...) 206} 207 208# Takes a named vector or list and gives a named vector back 209formatNS <- function(ns) { 210 if (is.null(ns)) 211 return(NULL) 212 if (!is.list(ns) && !is.character(ns)) 213 stop("A namespace object must be either a named list or a named character vector.") 214 nsNames <- names(ns) 215 if (is.null(nsNames) || anyNA(nsNames) || !all(nzchar(nsNames))) 216 stop("The namespace object either missing some or all names for each element in its collection.") 217 ns <- unlist(ns) 218 if (!is.character(ns)) 219 stop("The values in the namespace object must be a character vector.") 220 names(ns) <- nsNames 221 ns 222} 223 224formatNSPrefix <- function(ns, prefix) { 225 filters <- paste0("//", names(ns), ":*", collapse = "|") 226 prefix <- paste0("(", filters, ")/", prefix) 227 prefix 228} 229 230# Checks whether a vector is a valid character vector for namespaces 231validateNS <- function(ns) { 232 if (!is.character(ns)) 233 stop("A namespace object must be comprised of characters") 234 nsNames <- names(ns) 235 if (is.null(nsNames) || anyNA(nsNames)) 236 stop("The namespace object either missing some or all names for each element in its collection.") 237} 238