1
2# This is not actually used anywhere, we I'll leave it here.
3# It might be useful for testing improvements for the more complicated
4# parsers.
5
6parse_description <- function(path) {
7  path <- path.expand(path)
8  path <- encode_path(path)
9  ret <- .Call(pkgcache_parse_description, path)
10  ret2 <- gsub("\r", "", ret, fixed = TRUE)
11
12  # Fix encoding. If UTF-8 is declared, then we just mark it.
13  # Otherwise we convert it to UTF-8. Conversion might throw an error.
14  if ("Encoding" %in% names(ret2)) {
15    enc <- trimws(ret2[["Encoding"]])
16    if (enc == "UTF-8") {
17      Encoding(ret2) <- "UTF-8"
18    } else {
19      trs <- iconv(ret2, enc, "UTF-8")
20      ret2[] <- ifelse(is.na(trs), ret2, trs)
21    }
22  }
23  ret2
24}
25
26fix_encodings <- function(lst, col = "Encoding") {
27  if (! col %in% names(lst)) return(lst)
28  utf8 <- which(!is.na(lst[[col]]) & lst[[col]] == "UTF-8")
29  other <- which(!is.na(lst[[col]]) & lst[[col]] != "UTf-8")
30  unq <- unique(lst[[col]][other])
31  if (length(utf8)) {
32    for (i in seq_along(lst)) {
33      Encoding(lst[[i]][utf8]) <- "UTF-8"
34    }
35  }
36  if (length(unq)) {
37    for (u in unq) {
38      wh <- which(!is.na(lst[[col]]) & lst[[col]] == u)
39      for (i in seq_along(lst)) {
40        tryCatch({
41          trs <- iconv(lst[[i]][wh], u, "UTF-8")
42          lst[[i]][wh] <- ifelse(is.na(trs), lst[[i]][wh], trs)
43        }, error = function(e) NULL)
44      }
45    }
46  }
47  lst
48}
49
50#' Parse a repository metadata `PACAKGES*` file
51#'
52#' @details
53#' Non-existent, unreadable or corrupt `PACKAGES` files with trigger an
54#' error.
55#'
56#' `PACKAGES*` files do not usually declare an encoding, but nevertheless
57#' `parse_packages()` works correctly if they do.
58#'
59#' # Note
60#' `parse_packages()` cannot currently read files that have very many
61#' different fields (many columns in the result tibble). The current
62#' limit is 1000. Typical `PACKAGES` files contain less than 20 field
63#' types.
64#'
65#' @param path Path to `PACKAGES`. The file can be `gzip` compressed, with
66#'   extension `.gz`; `bzip2` compressed, with extension `.bz2` or `bzip2`;
67#'   or `xz` compressed with extension `xz`. It may also be a `PACKAGES.rds`
68#'   file, which will be read using [base::readRDS()]. Otherwise the file at
69#'   `path` is assumed to be uncompressed.
70#' @return A tibble, with all columns from the file at `path`.
71#'
72#' @export
73
74parse_packages <- function(path) {
75  stopifnot(
76    "`path` must be a character scalar" = is_string(path)
77  )
78  path <- path.expand(path)
79  path <- encode_path(path)
80
81  ext <- tools::file_ext(path)
82  if (ext == "rds") {
83    tab <- readRDS(path)
84
85  } else {
86    cmp <- .Call(pkgcache_read_raw, path)[[1]]
87    if (is.character(cmp)) {
88      stop(cmp)
89    }
90
91    if (ext == "gz") {
92      if (getRversion() >= "4.0.0") {
93        bts <- memDecompress(cmp, type = "gzip")
94      } else {
95        bts <- gzip_decompress(cmp)     # nocov
96      }
97    } else if (ext %in% c("bz2", "bzip2")) {
98      bts <- memDecompress(cmp, type = "bzip2")
99    } else if (ext == "xz") {
100      bts <- memDecompress(cmp, type = "xz")
101    } else {
102      bts <- cmp
103    }
104
105    tab <- .Call(pkgcache_parse_packages_raw, bts)
106    tab[] <- lapply(tab, function(x) {
107      x <- gsub("\r", "", fixed = TRUE, x)
108      empt <- is.na(x)
109      miss <- x == ""
110      x[empt] <- ""
111      x[miss] <- NA_character_
112      x
113    })
114
115    # this is rarely needed for PACKAGES files, included for completeness
116    tab <- fix_encodings(tab)
117  }
118
119  tbl <- tibble::as_tibble(tab)
120
121  tbl
122}
123
124#' List metadata of installed packages
125#'
126#' This function is similar to [utils::installed.packages()].
127#' See the differences below.
128#'
129#' @details
130#' Differences with [utils::installed.packages()]:
131#' * `parse_installed()` cannot subset the extracted fields. (But you can
132#'   subset the result.)
133#' * `parse_installed()` does not cache the results.
134#' * `parse_installed()` handles errors better. See Section 'Errors' below.
135#' #' * `parse_installed()` uses the `DESCRIPTION` files in the installed packages
136#'   instead of the `Meta/package.rds` files. This should not matter,
137#'   but because of a bug `Meta/package.rds` might contain the wrong
138#'   `Archs` field on multi-arch platforms.
139#' * `parse_installed()` reads _all_ fields from the `DESCRIPTION` files.
140#'   [utils::installed.packages()] only reads the specified fields.
141#' * `parse_installed()` converts its output to UTF-8 encoding, from the
142#'   encodings declared in the `DESCRIPTION` files.
143#' * `parse_installed()` is considerably faster.
144#'
145#' ## Encodings
146#'
147#' `parse_installed()` always returns its result in UTF-8 encoding.
148#' It uses the `Encoding` fields in the `DESCRIPTION` files to learn their
149#' encodings. `parse_installed()` does not check that an UTF-8 file has a
150#' valid encoding. If it fails to convert a string to UTF-8 from another
151#' declared encoding, then it leaves it as `"bytes"` encoded, without a
152#' warning.
153#'
154#' ## Errors
155#'
156#' pkgcache silently ignores files and directories inside the library
157#' directory.
158#'
159#' The result also omits broken package installations. These include
160#'
161#' * packages with invalid `DESCRIPTION` files, and
162#' * packages the current user have no access to.
163#'
164#' These errors are reported via a condition with class
165#' `pkgcache_broken_install`. The condition has an `errors` entry, which
166#' is a tibble with columns
167#'
168#' * `file`: path to the `DESCRIPTION` file of the broken package,
169#' * `error`: error message for this particular failure.
170#'
171#' If you intend to handle broken package installation, you need to catch
172#' this condition with `withCallingHandlers()`.
173#'
174#' @param library Character vector of library paths.
175#' @param priority If not `NULL` then it may be a `"base"` `"recommended"`
176#'   `NA` or a vector of these to select _base_ packages, _recommended_
177#'   packages or _other_ packages. (These are the official, CRAN supported
178#'   package priorities, but you may introduce others in non-CRAN packages.)
179#' @param lowercase Whether to convert keys in `DESCRIPTION` to lowercase.
180#' @param reencode Whether to re-encode strings in UTF-8, from the
181#'   encodings specified in the `DESCRIPTION` files. Re-encoding is
182#'   somewhat costly, and sometimes it is not important (e.g. when you only
183#'   want to extract the dependencies of the installed packages).
184#'
185#' @export
186
187parse_installed <- function(library = .libPaths(), priority = NULL,
188                            lowercase = FALSE, reencode = TRUE) {
189  stopifnot(
190    "`library` must be a character vector" = is.character(library),
191    "`priority` must be `NULL` or a character vector" =
192      is.null(priority) || is.character(priority) || identical(NA, priority),
193    "`library` cannot have length zero" = length(library) > 0,
194    "`lowercase` must be a boolean flag" = is_flag(lowercase)
195  )
196
197  # Merge multiple libraries
198  if (length(library) > 1) {
199    lsts <- lapply(
200      library,
201      parse_installed,
202      priority = priority,
203      lowercase = lowercase,
204      reencode = reencode
205    )
206    return(rbind_expand(.list = lsts))
207  }
208
209  # Handle ~ in path name
210  library <- path.expand(library)
211
212  # NOTE: a package is a directory with `DESCRIPTION`, other
213  #       files and directories are ignored.
214  # TODO: replace dir() with something that errors if library does not
215  #       exist or we cannot get the list of directories.
216  dscs <- file.path(library, dir(library), "DESCRIPTION")
217  dscs <- dscs[file.exists(dscs)]
218
219  dscs <- encode_path(dscs)
220  prs <- .Call(pkgcache_parse_descriptions, dscs, lowercase)
221  tab <- prs[[1]]
222
223  tab[] <- lapply(tab, function(x) {
224    x <- gsub("\r", "", x, fixed = TRUE)
225    empt <- is.na(x)
226    miss <- x == ""
227    x[empt] <- ""
228    x[miss] <- NA_character_
229    x
230  })
231
232  tbl <- tibble::as_tibble(tab)
233  if (lowercase) {
234    tbl$libpath <- library
235  } else {
236    tbl$LibPath <- library
237  }
238
239  # Filter out errors
240  if (prs[[3]]) {
241    bad <- prs[[2]] != ""
242    tbl <- tbl[!bad, ]
243    cnd <- new_pkgcache_warning(
244      "Cannot read DESCRIPTION files:\n", paste0("* ", prs[[2]][bad], "\n"),
245      class = "pkgcache_broken_install",
246      data = list(errors = tibble(file = dscs[bad], error = prs[[2]][bad]))
247    )
248    withRestarts(
249      muffleWarning = function() NULL,
250      signalCondition(cnd)
251    )
252  }
253
254  # filter for priority
255  prname <- if (lowercase) "priority" else "Priority"
256  if (!is.null(priority) && prname %in% names(tbl)) {
257    keep <- if (anyNA(priority)) {
258      is.na(tbl[[prname]])
259    } else {
260      FALSE
261    }
262    priority <- na_omit(priority)
263    keep <- keep | tbl[[prname]] %in% priority
264    tbl <- tbl[keep, ]
265  }
266
267  if (reencode) tbl <- fix_encodings(tbl)
268
269  tbl
270}
271