1# File src/library/tools/R/parseRd.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2021 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19parse_Rd <- function(file, srcfile = NULL, encoding = "unknown", 20 verbose = FALSE, fragment = FALSE, 21 warningCalls = TRUE, 22 macros = file.path(R.home("share"), "Rd", "macros", "system.Rd"), 23 permissive = FALSE) 24{ 25 if(is.character(file)) { 26 file0 <- file 27 if(file == "") { 28 file <- stdin() 29 } else { 30 ## keep.source is FALSE in batch use 31 ## encoding issues here, for now use file encoding 32 if (missing(srcfile)) ## && isTRUE(getOption("keep.source"))) 33 srcfile <- srcfile(file) 34 } 35 } else file0 <- "<connection>" 36 lines <- readLines(file, warn = FALSE) 37 if(is.character(macros)) 38 macros <- initialRdMacros(macros = macros) 39 ## remove old-style marking for data, keep line nos 40 lines[lines == "\\non_function{}"] <- "" 41 ## Extract the encoding if marked in the file: 42 ## do this in two steps to minimize warnings in MBCS locales 43 ## Note this is required to be on a line by itself, 44 ## but some people have preceding whitespace 45 enc <- grep("\\encoding{", lines, fixed = TRUE, useBytes=TRUE, value=TRUE) 46 enc <- grep("^[[:space:]]*\\\\encoding\\{([^}]*)\\}.*", enc, value = TRUE) 47 if(length(enc)) { 48 if(length(enc) > 1L) 49 warning(file0, ": multiple \\encoding lines, using the first", 50 domain = NA, call. = warningCalls) 51 ## keep first one 52 enc <- enc[1L] 53 enc <- sub("^[[:space:]]*\\\\encoding\\{([^}]*)\\}.*", "\\1", enc) 54 if(verbose) message("found encoding ", enc, domain = NA) 55 encoding <- if(enc %in% c("UTF-8", "utf-8", "utf8")) "UTF-8" else enc 56 } 57 if (encoding == "unknown") encoding <- "" 58 59 ## the internal function must get some sort of srcfile 60 if (!inherits(srcfile, "srcfile")) 61 srcfile <- srcfile(file0) 62 basename <- basename(srcfile$filename) 63 srcfile$encoding <- encoding 64 srcfile$Enc <- "UTF-8" 65 66 if (encoding == "ASCII") { 67 if (anyNA(iconv(lines, "", "ASCII"))) 68 stop(file0, ": non-ASCII input and no declared encoding", 69 domain = NA, call. = warningCalls) 70 } else { 71 if (encoding != "UTF-8") 72 lines <- iconv(lines, encoding, "UTF-8", sub = "byte") 73 ## Strip UTF-8 BOM if necessary. 74 bytes <- charToRaw(lines[1L]) 75 if(identical(as.integer(bytes[1L : 3L]), 76 c(0xefL, 0xbbL, 0xbfL))) 77 lines[1L] <- rawToChar(bytes[-(1L : 3L)]) 78 } 79 80 tcon <- file() 81 writeLines(lines, tcon, useBytes = TRUE) 82 on.exit(close(tcon)) 83 84 warndups <- config_val_to_logical(Sys.getenv("_R_WARN_DUPLICATE_RD_MACROS_", "FALSE")) 85 86 result <- if(permissive) 87 ## FIXME: this should test for a special class of warning rather than testing the 88 ## message, but those are currently not easily generated from C code. 89 withCallingHandlers(.External2(C_parseRd, tcon, srcfile, "UTF-8", 90 verbose, basename, fragment, 91 warningCalls, macros, warndups), 92 warning = function(w) 93 if (grepl("unknown macro", conditionMessage(w))) 94 tryInvokeRestart("muffleWarning")) 95 else 96 .External2(C_parseRd, tcon, srcfile, "UTF-8", 97 verbose, basename, fragment, warningCalls, 98 macros, warndups) 99 result <- expandDynamicFlags(result) 100 if (permissive) 101 permissify(result) 102 else 103 result 104} 105 106print.Rd <- function(x, deparse = FALSE, ...) 107{ 108 cat(as.character.Rd(x, deparse = deparse), sep = "") 109 invisible(x) 110} 111 112as.character.Rd <- function(x, deparse = FALSE, ...) 113{ 114 ZEROARG <- c("\\cr", "\\dots", "\\ldots", "\\R", "\\tab") # Only these cause trouble when {} is added 115 TWOARG <- c("\\section", "\\subsection", "\\item", "\\enc", 116 "\\method", "\\S3method", "\\S4method", "\\tabular", 117 "\\if", "\\href") 118 USERMACROS <- c("USERMACRO", "\\newcommand", "\\renewcommand") 119 EQN <- c("\\deqn", "\\eqn", "\\figure") 120 modes <- c(RLIKE = 1L, LATEXLIKE = 2L, VERBATIM = 3L, INOPTION = 4L, COMMENTMODE = 5L, UNKNOWNMODE = 6L) 121 tags <- c(RCODE = 1L, TEXT = 2L, VERB = 3L, COMMENT = 5L, UNKNOWN = 6L) 122 state <- c(braceDepth = 0L, inRString = 0L) 123 needBraces <- FALSE # if next character is alphabetic, separate by braces. 124 inEqn <- 0L 125 126 pr <- function(x, quoteBraces) { 127 tag <- attr(x, "Rd_tag") 128 if (is.null(tag) || tag == "LIST") tag <- "" 129 if (is.list(x)) { 130 savestate <- state 131 state <<- c(0L, 0L) 132 needBraces <<- FALSE 133 if (tag == "Rd") { # a whole file 134 result <- character() 135 for (i in seq_along(x)) 136 result <- c(result, pr(x[[i]], quoteBraces)) 137 } else if (startsWith(tag, "#")) { 138 if (deparse) { 139 dep <- deparseRdElement(x[[1L]][[1L]], 140 c(state, modes["LATEXLIKE"], 141 inEqn, 142 as.integer(quoteBraces))) 143 result <- c(tag, dep[[1L]]) 144 } else 145 result <- c(tag, x[[1L]][[1L]]) 146 for (i in seq_along(x[[2L]])) 147 result <- c(result, pr(x[[2L]][[i]], quoteBraces)) 148 result <- c(result, "#endif\n") 149 } else if (tag %in% ZEROARG) { 150 result <- tag 151 needBraces <<- TRUE 152 } else if (tag %in% TWOARG) { 153 result <- tag 154 for (i in seq_along(x)) 155 result <- c(result, pr(x[[i]], quoteBraces)) 156 } else if (tag %in% EQN) { 157 result <- tag 158 inEqn <<- 1L 159 result <- c(result, pr(x[[1L]], quoteBraces)) 160 inEqn <<- 0L 161 if (length(x) > 1L) 162 result <- c(result, pr(x[[2L]], quoteBraces)) 163 } else { 164 result <- tag 165 if (!is.null(option <- attr(x, "Rd_option"))) 166 result <- c(result, "[", pr(option, quoteBraces), "]") 167 result <- c(result, "{") 168 for (i in seq_along(x)) 169 result <- c(result, pr(x[[i]], quoteBraces)) 170 result <- c(result, "}") 171 } 172 if (state[1L]) # If braces didn't match within the list, try again, quoting them 173 result <- pr(x, TRUE) 174 state <<- savestate 175 } else if (tag %in% USERMACROS) { 176 result <- c() 177 } else { 178 if (deparse) { 179 dep <- deparseRdElement(as.character(x), c(state, tags[tag], inEqn, as.integer(quoteBraces))) 180 result <- dep[[1L]] 181 state <<- dep[[2L]][1L:2L] 182 } else { 183 if (inherits(x, "Rd")) 184 class(x) <- setdiff(class(x), "Rd") # Avoid infinite recursion from misuse (PR#16448) 185 result <- as.character(x) 186 } 187 if (needBraces) { 188 if (grepl("^[[:alpha:]]", result)) result <- c("{}", result) 189 needBraces <<- FALSE 190 } 191 } 192 result 193 } 194 if (is.null(attr(x, "Rd_tag"))) attr(x, "Rd_tag") <- "Rd" 195 pr(x, quoteBraces = FALSE) 196} 197 198deparseRdElement <- function(element, state) 199 .Call(C_deparseRd, element, state) 200 201# Convert unknown tags into text displaying the tag with braces if necessary 202# This allows unknown LateX macros to be embedded in the text, and to be just passed 203# through. 204 205permissify <- function(Rd) 206{ 207 tags <- RdTags(Rd) 208 oldclass <- class(Rd) 209 oldsrcref <- utils::getSrcref(Rd) 210 oldtag <- attr(Rd, "Rd_tag") 211 i <- 0 212 while (i < length(tags)) { 213 i <- i+1 214 if (tags[i] == "UNKNOWN") { 215 Rd[[i]] <- tagged(Rd[[i]], "TEXT", utils::getSrcref(Rd[[i]])) 216 while (i < length(tags)) { 217 if (tags[i+1] == "LIST") { 218 Rd <- c(Rd[seq_len(i)], 219 list(tagged("{", "TEXT", utils::getSrcref(Rd[[i+1]]))), 220 permissify(Rd[[i+1]]), 221 list(tagged("}", "TEXT", utils::getSrcref(Rd[[i+1]]))), 222 Rd[seq_along(Rd)[-seq_len(i+1)]]) 223 tags <- RdTags(Rd) 224 i <- i+3 225 } else if (tags[i+1] == "TEXT" && grepl("^ *$", Rd[[i+1]])) 226 i <- i + 1 227 else 228 break 229 } 230 } else if (is.recursive(Rd[[i]])) 231 Rd[[i]] <- permissify(Rd[[i]]) 232 } 233 class(Rd) <- oldclass 234 attr(Rd, "srcref") <- oldsrcref 235 attr(Rd, "Rd_tag") <- oldtag 236 Rd 237} 238