1#  File src/library/tools/R/parseRd.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2021 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19parse_Rd <- function(file, srcfile = NULL, encoding = "unknown",
20                     verbose = FALSE, fragment = FALSE,
21                     warningCalls = TRUE,
22                     macros = file.path(R.home("share"), "Rd", "macros", "system.Rd"),
23		     permissive = FALSE)
24{
25    if(is.character(file)) {
26        file0 <- file
27        if(file == "") {
28            file <- stdin()
29        } else {
30            ## keep.source is FALSE in batch use
31            ## encoding issues here, for now use file encoding
32            if (missing(srcfile)) ## && isTRUE(getOption("keep.source")))
33                srcfile <- srcfile(file)
34        }
35    } else file0 <- "<connection>"
36    lines <- readLines(file, warn = FALSE)
37    if(is.character(macros))
38    	macros <- initialRdMacros(macros = macros)
39    ## remove old-style marking for data, keep line nos
40    lines[lines == "\\non_function{}"] <- ""
41    ## Extract the encoding if marked in the file:
42    ## do this in two steps to minimize warnings in MBCS locales
43    ## Note this is required to be on a line by itself,
44    ## but some people have preceding whitespace
45    enc <- grep("\\encoding{", lines, fixed = TRUE, useBytes=TRUE, value=TRUE)
46    enc <- grep("^[[:space:]]*\\\\encoding\\{([^}]*)\\}.*", enc, value = TRUE)
47    if(length(enc)) {
48        if(length(enc) > 1L)
49            warning(file0, ": multiple \\encoding lines, using the first",
50                    domain = NA, call. = warningCalls)
51        ## keep first one
52        enc <- enc[1L]
53        enc <- sub("^[[:space:]]*\\\\encoding\\{([^}]*)\\}.*", "\\1", enc)
54        if(verbose) message("found encoding ", enc, domain = NA)
55        encoding <- if(enc %in% c("UTF-8", "utf-8", "utf8")) "UTF-8" else enc
56    }
57    if (encoding == "unknown") encoding <- ""
58
59    ## the internal function must get some sort of srcfile
60    if (!inherits(srcfile, "srcfile"))
61    	srcfile <- srcfile(file0)
62    basename <- basename(srcfile$filename)
63    srcfile$encoding <- encoding
64    srcfile$Enc <- "UTF-8"
65
66    if (encoding == "ASCII") {
67        if (anyNA(iconv(lines, "", "ASCII")))
68            stop(file0, ": non-ASCII input and no declared encoding",
69                 domain = NA, call. = warningCalls)
70    } else {
71	if (encoding != "UTF-8")
72    	    lines <- iconv(lines, encoding, "UTF-8", sub = "byte")
73        ## Strip UTF-8 BOM if necessary.
74        bytes <- charToRaw(lines[1L])
75        if(identical(as.integer(bytes[1L : 3L]),
76                     c(0xefL, 0xbbL, 0xbfL)))
77            lines[1L] <- rawToChar(bytes[-(1L : 3L)])
78    }
79
80    tcon <- file()
81    writeLines(lines, tcon, useBytes = TRUE)
82    on.exit(close(tcon))
83
84    warndups <- config_val_to_logical(Sys.getenv("_R_WARN_DUPLICATE_RD_MACROS_", "FALSE"))
85
86    result <- if(permissive)
87                  ## FIXME:  this should test for a special class of warning rather than testing the
88                  ##         message, but those are currently not easily generated from C code.
89                  withCallingHandlers(.External2(C_parseRd, tcon, srcfile, "UTF-8",
90                                                 verbose, basename, fragment,
91                                                 warningCalls, macros, warndups),
92		       warning = function(w)
93			    if (grepl("unknown macro", conditionMessage(w)))
94				tryInvokeRestart("muffleWarning"))
95              else
96                  .External2(C_parseRd, tcon, srcfile, "UTF-8",
97                             verbose, basename, fragment, warningCalls,
98                             macros, warndups)
99    result <- expandDynamicFlags(result)
100    if (permissive)
101	permissify(result)
102    else
103        result
104}
105
106print.Rd <- function(x, deparse = FALSE, ...)
107{
108    cat(as.character.Rd(x, deparse = deparse), sep = "")
109    invisible(x)
110}
111
112as.character.Rd <- function(x, deparse = FALSE, ...)
113{
114    ZEROARG <- c("\\cr", "\\dots", "\\ldots", "\\R", "\\tab") # Only these cause trouble when {} is added
115    TWOARG <- c("\\section", "\\subsection", "\\item", "\\enc",
116                "\\method", "\\S3method", "\\S4method", "\\tabular",
117                "\\if", "\\href")
118    USERMACROS <- c("USERMACRO", "\\newcommand", "\\renewcommand")
119    EQN <- c("\\deqn", "\\eqn", "\\figure")
120    modes <- c(RLIKE = 1L, LATEXLIKE = 2L, VERBATIM = 3L, INOPTION = 4L, COMMENTMODE = 5L, UNKNOWNMODE = 6L)
121    tags  <- c(RCODE = 1L, TEXT      = 2L, VERB     = 3L,                COMMENT     = 5L, UNKNOWN     = 6L)
122    state <- c(braceDepth = 0L, inRString = 0L)
123    needBraces <- FALSE  # if next character is alphabetic, separate by braces.
124    inEqn <- 0L
125
126    pr <- function(x, quoteBraces) {
127        tag <- attr(x, "Rd_tag")
128        if (is.null(tag) || tag == "LIST") tag <- ""
129    	if (is.list(x)) {
130    	    savestate <- state
131    	    state <<- c(0L, 0L)
132    	    needBraces <<- FALSE
133    	    if (tag == "Rd") { # a whole file
134    	        result <- character()
135    	    	for (i in seq_along(x))
136                    result <- c(result, pr(x[[i]], quoteBraces))
137    	    } else if (startsWith(tag, "#")) {
138    	    	if (deparse) {
139    	    	    dep <- deparseRdElement(x[[1L]][[1L]],
140                                            c(state, modes["LATEXLIKE"],
141                                              inEqn,
142                                              as.integer(quoteBraces)))
143    	    	    result <- c(tag, dep[[1L]])
144    	    	} else
145    	    	    result <- c(tag, x[[1L]][[1L]])
146    	    	for (i in seq_along(x[[2L]]))
147                    result <- c(result, pr(x[[2L]][[i]], quoteBraces))
148    	    	result <- c(result, "#endif\n")
149    	    } else if (tag %in% ZEROARG) {
150    	    	result <- tag
151    	    	needBraces <<- TRUE
152    	    } else if (tag %in% TWOARG) {
153    	    	result <- tag
154    	    	for (i in seq_along(x))
155                    result <- c(result, pr(x[[i]], quoteBraces))
156    	    } else if (tag %in% EQN) {
157    	    	result <- tag
158    	    	inEqn <<- 1L
159    	    	result <- c(result, pr(x[[1L]], quoteBraces))
160    	    	inEqn <<- 0L
161    	    	if (length(x) > 1L)
162    	    	    result <- c(result, pr(x[[2L]], quoteBraces))
163    	    } else {
164    	    	result <- tag
165    	    	if (!is.null(option <- attr(x, "Rd_option")))
166    	    	    result <- c(result, "[", pr(option, quoteBraces), "]")
167    	    	result <- c(result, "{")
168    	    	for (i in seq_along(x))
169                    result <- c(result, pr(x[[i]], quoteBraces))
170    	    	result <- c(result, "}")
171    	    }
172    	    if (state[1L])  # If braces didn't match within the list, try again, quoting them
173    	    	result <- pr(x, TRUE)
174    	    state <<- savestate
175    	} else if (tag %in% USERMACROS) {
176    	    	result <- c()
177    	} else {
178    	    if (deparse) {
179    		dep <- deparseRdElement(as.character(x), c(state, tags[tag], inEqn, as.integer(quoteBraces)))
180    	    	result <- dep[[1L]]
181    	    	state <<- dep[[2L]][1L:2L]
182    	    } else {
183	        if (inherits(x, "Rd"))
184		    class(x) <- setdiff(class(x), "Rd") # Avoid infinite recursion from misuse (PR#16448)
185    	    	result <- as.character(x)
186	    }
187    	    if (needBraces) {
188    	    	if (grepl("^[[:alpha:]]", result)) result <- c("{}", result)
189    	    	needBraces <<- FALSE
190    	    }
191        }
192    	result
193    }
194    if (is.null(attr(x, "Rd_tag"))) attr(x, "Rd_tag") <- "Rd"
195    pr(x, quoteBraces = FALSE)
196}
197
198deparseRdElement <- function(element, state)
199    .Call(C_deparseRd, element, state)
200
201# Convert unknown tags into text displaying the tag with braces if necessary
202# This allows unknown LateX macros to be embedded in the text, and to be just passed
203# through.
204
205permissify <- function(Rd)
206{
207    tags <- RdTags(Rd)
208    oldclass <- class(Rd)
209    oldsrcref <- utils::getSrcref(Rd)
210    oldtag <- attr(Rd, "Rd_tag")
211    i <- 0
212    while (i < length(tags)) {
213        i <- i+1
214   	if (tags[i] == "UNKNOWN") {
215   	    Rd[[i]] <- tagged(Rd[[i]], "TEXT", utils::getSrcref(Rd[[i]]))
216            while (i < length(tags)) {
217		if (tags[i+1] == "LIST") {
218		    Rd <- c(Rd[seq_len(i)],
219                            list(tagged("{", "TEXT", utils::getSrcref(Rd[[i+1]]))),
220                            permissify(Rd[[i+1]]),
221                            list(tagged("}", "TEXT", utils::getSrcref(Rd[[i+1]]))),
222			    Rd[seq_along(Rd)[-seq_len(i+1)]])
223		    tags <- RdTags(Rd)
224		    i <- i+3
225		} else if (tags[i+1] == "TEXT" && grepl("^ *$", Rd[[i+1]]))
226		    i <- i + 1
227		else
228		    break
229            }
230        } else if (is.recursive(Rd[[i]]))
231            Rd[[i]] <- permissify(Rd[[i]])
232    }
233    class(Rd) <- oldclass
234    attr(Rd, "srcref") <- oldsrcref
235    attr(Rd, "Rd_tag") <- oldtag
236    Rd
237}
238