1#' @useDynLib xml2, .registration = TRUE 2NULL 3 4# node ------------------------------------------------------------------------- 5 6xml_node <- function(node = NULL, doc = NULL) { 7 if (inherits(node, "xml_node")) { 8 node 9 } else { 10 structure(list(node = node, doc = doc), class = "xml_node") 11 } 12} 13 14#' @export 15as.character.xml_node <- function(x, ..., options = "format", encoding = "UTF-8") { 16 options <- parse_options(options, xml_save_options()) 17 .Call(node_write_character, x$node, encoding, options) 18} 19 20#' @export 21print.xml_node <- function(x, width = getOption("width"), max_n = 20, ...) { 22 cat("{", doc_type(x), "_node}\n", sep = "") 23 cat(format(x), "\n", sep = "") 24 show_nodes(xml_children(x), width = width, max_n = max_n) 25} 26 27#' @export 28print.xml_missing <- function(x, width = getOption("width"), max_n = 20, ...) { 29 cat("{xml_missing}\n") 30 cat(format(x), "\n", sep = "") 31} 32 33# document --------------------------------------------------------------------- 34 35xml_document <- function(doc) { 36 if (.Call(doc_has_root, doc)) { 37 x <- xml_node(.Call(doc_root, doc), doc) 38 class(x) <- c("xml_document", class(x)) 39 x 40 } else { 41 structure(list(doc = doc), class = "xml_document") 42 } 43} 44 45doc_type <- function(x) { 46 if (is.null(x$doc)) { 47 return("xml") 48 } 49 if (.Call(doc_is_html, x$doc)) { 50 "html" 51 } else { 52 "xml" 53 } 54} 55 56#' @export 57print.xml_document <- function(x, width = getOption("width"), max_n = 20, ...) { 58 doc <- xml_document(x$doc) 59 cat("{", doc_type(x), "_document}\n", sep = "") 60 if (inherits(doc, "xml_node")) { 61 cat(format(doc), "\n", sep = "") 62 show_nodes(xml_children(doc), width = width, max_n = max_n) 63 } 64} 65 66#' @export 67as.character.xml_document <- function(x, ..., options = "format", encoding = "UTF-8") { 68 options <- parse_options(options, xml_save_options()) 69 .Call(doc_write_character, x$doc, encoding, options) 70} 71 72# nodeset ---------------------------------------------------------------------- 73 74xml_nodeset <- function(nodes = list(), deduplicate = TRUE) { 75 if (isTRUE(deduplicate)) { 76 nodes <- nodes[!.Call(nodes_duplicated, nodes)] 77 } 78 structure(nodes, class = "xml_nodeset") 79} 80 81#' @param nodes A list (possible nested) of external pointers to nodes 82#' @return a nodeset 83#' @noRd 84make_nodeset <- function(nodes, doc) { 85 nodes <- unlist(nodes, recursive = FALSE) 86 87 xml_nodeset(lapply(nodes, xml_node, doc = doc)) 88} 89 90#' @export 91print.xml_nodeset <- function(x, width = getOption("width"), max_n = 20, ...) { 92 n <- length(x) 93 cat("{", doc_type(x), "_nodeset (", n, ")}\n", sep = "") 94 95 if (n > 0) 96 show_nodes(x, width = width, max_n = max_n) 97} 98 99#' @export 100as.character.xml_nodeset <- function(x, ...) { 101 vapply(x, as.character, FUN.VALUE = character(1)) 102} 103 104#' @export 105`[.xml_nodeset` <- function(x, i, ...) { 106 if (length(x) == 0) { 107 return(x) 108 } 109 xml_nodeset(NextMethod()) 110} 111 112show_nodes <- function(x, width = getOption("width"), max_n = 20) { 113 stopifnot(inherits(x, "xml_nodeset")) 114 115 n <- length(x) 116 if (n == 0) 117 return() 118 119 if (n > max_n) { 120 n <- max_n 121 x <- x[seq_len(n)] 122 trunc <- TRUE 123 } else { 124 trunc <- FALSE 125 } 126 127 label <- format(paste0("[", seq_len(n), "]"), justify = "right") 128 contents <- encodeString(vapply(x, as.character, FUN.VALUE = character(1))) 129 130 desc <- paste0(label, " ", contents) 131 needs_trunc <- nchar(desc) > width 132 desc[needs_trunc] <- paste(substr(desc[needs_trunc], 1, width - 3), "...") 133 134 cat(desc, sep = "\n") 135 if (trunc) { 136 cat("...\n") 137 } 138 invisible() 139} 140 141 142nodeset_apply <- function(x, fun, ...) UseMethod("nodeset_apply") 143 144#' @export 145nodeset_apply.xml_missing <- function(x, fun, ...) { 146 xml_nodeset() 147} 148 149#' @export 150nodeset_apply.xml_nodeset <- function(x, fun, ...) { 151 if (length(x) == 0) 152 return(xml_nodeset()) 153 154 is_missing <- is.na(x) 155 res <- list(length(x)) 156 157 res[is_missing] <- list(xml_missing()) 158 if (any(!is_missing)) { 159 res[!is_missing] <- lapply(x[!is_missing], function(x) fun(x$node, ...)) 160 } 161 162 make_nodeset(res, x[[1]]$doc) 163} 164 165#' @export 166nodeset_apply.xml_node <- function(x, fun, ...) { 167 nodes <- fun(x$node, ...) 168 xml_nodeset(lapply(nodes, xml_node, doc = x$doc)) 169} 170 171#' @export 172nodeset_apply.xml_document <- function(x, fun, ...) { 173 if (inherits(x, "xml_node")) { 174 NextMethod() 175 } else { 176 xml_nodeset() 177 } 178} 179 180#' @export 181format.xml_node <- function(x, ...) { 182 attrs <- xml_attrs(x) 183 paste("<", 184 paste( 185 c(xml_name(x), 186 format_attributes(attrs)), 187 collapse = " "), 188 ">", sep = "") 189} 190 191format_attributes <- function(x) { 192 if (length(x) == 0) { 193 character(0) 194 } else { 195 paste(names(x), quote_str(x), sep = "=") 196 } 197} 198 199#' Construct an missing xml object 200#' @export 201#' @keywords internal 202xml_missing <- function() { 203 structure(list(), class = "xml_missing") 204} 205 206#' @export 207is.na.xml_missing <- function(x) { 208 TRUE 209} 210 211#' @export 212is.na.xml_nodeset <- function(x) { 213 vapply(x, is.na, logical(1)) 214} 215 216#' @export 217is.na.xml_node <- function(x) { 218 FALSE 219} 220 221format.xml_missing <- function(x, ...) { 222 "<NA>" 223} 224 225#' @export 226as.character.xml_missing <- function(x, ...) { 227 NA_character_ 228} 229 230# These mimic the behavior of NA[[1]], NA[[2]], NA[1], NA[2] 231 232#' @export 233`[.xml_missing` <- function(x, i, ...) x 234 235#' @export 236`[[.xml_missing` <- function(x, i, ...) if (i == 1L) x else stop("subscript out of bounds") 237 238#' Construct a cdata node 239#' @param content The CDATA content, does not include `<![CDATA[` 240#' @examples 241#' x <- xml_new_root("root") 242#' xml_add_child(x, xml_cdata("<d/>")) 243#' as.character(x) 244#' @export 245xml_cdata <- function(content) { 246 structure(content, class = "xml_cdata") 247} 248 249#' Construct a comment node 250#' @param content The comment content 251#' @examples 252#' x <- xml_new_document() 253#' r <- xml_add_child(x, "root") 254#' xml_add_child(r, xml_comment("Hello!")) 255#' as.character(x) 256#' @export 257xml_comment <- function(content) { 258 structure(content, class = "xml_comment") 259} 260 261#' Construct a document type definition 262#' 263#' This is used to create simple document type definitions. If you need to 264#' create a more complicated definition with internal subsets it is recommended 265#' to parse a string directly with `read_xml()`. 266#' @param name The name of the declaration 267#' @param external_id The external ID of the declaration 268#' @param system_id The system ID of the declaration 269#' @examples 270#' r <- xml_new_root( 271#' xml_dtd("html", 272#' "-//W3C//DTD XHTML 1.0 Transitional//EN", 273#' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd")) 274#' 275#' # Use read_xml directly for more complicated DTD 276#' d <- read_xml( 277#' '<!DOCTYPE doc [ 278#' <!ELEMENT doc (#PCDATA)> 279#' <!ENTITY foo " test "> 280#' ]> 281#' <doc>This is a valid document &foo; !</doc>') 282#' @export 283xml_dtd <- function(name = "", external_id = "", system_id = "") { 284 structure(list(name = name, external_id = external_id, system_id = system_id), class = "xml_dtd") 285} 286