1
2cmc__data <- new.env(parent = emptyenv())
3
4#' Metadata cache for a CRAN-like repository
5#'
6#' This is an R6 class that implements the metadata cache of a CRAN-like
7#' repository. For a higher level interface, see the [meta_cache_list()],
8#' [meta_cache_deps()], [meta_cache_revdeps()] and [meta_cache_update()]
9#' functions.
10#'
11#' The cache has several layers:
12#' * The data is stored inside the `cranlike_metadata_cache` object.
13#' * It is also stored as an RDS file, in the session temporary directory.
14#'   This ensures that the same data is used for all queries of a
15#'   `cranlike_metadata_cache` object.
16#' * It is stored in an RDS file in the user's cache directory.
17#' * The downloaded raw `PACKAGES*` files are cached, together with HTTP
18#'   ETags, to minimize downloads.
19#'
20#' It has a synchronous and an asynchronous API.
21#'
22#' @section Usage:
23#' ```
24#' cmc <- cranlike_metadata_cache$new(
25#'   primary_path = NULL, replica_path = tempfile(),
26#'   platforms = default_platforms(), r_version = getRversion(),
27#'   bioc = TRUE, cran_mirror = default_cran_mirror(),
28#'   repos = getOption("repos"),
29#'   update_after = as.difftime(7, units = "days"))
30#'
31#' cmc$list(packages = NULL)
32#' cmc$async_list(packages = NULL)
33#'
34#' cmc$deps(packages, dependencies = NA, recursive = TRUE)
35#' cmc$async_deps(packages, dependencies = NA, recursive = TRUE)
36#'
37#' cmc$revdeps(packages, dependencies = NA, recursive = TRUE)
38#' cmc$async_revdeps(packages, dependencies = NA, recursive = TRUE)
39#'
40#' cmc$update()
41#' cmc$async_update()
42#' cmc$check_update()
43#' cmc$asnyc_check_update()
44#'
45#' cmc$summary()
46#'
47#' cmc$cleanup(force = FALSE)
48#' ```
49#'
50#' @section Arguments:
51#' * `primary_path`: Path of the primary, user level cache. Defaults to
52#'   the user level cache directory of the machine.
53#' * `replica_path`: Path of the replica. Defaults to a temporary directory
54#'   within the session temporary directory.
55#' * `platforms`: see [default_platforms()] for possible values.
56#' * `r_version`: R version to create the cache for.
57#' * `bioc`: Whether to include BioConductor packages.
58#' * `cran_mirror`: CRAN mirror to use, this takes precedence over `repos`.
59#' * `repos`: Repositories to use.
60#' * `update_after`: `difftime` object. Automatically update the cache if
61#'   it gets older than this. Set it to `Inf` to avoid updates. Defaults
62#'   to seven days.
63#' * `packages`: Packages to query, character vector.
64#' * `dependencies`: Which kind of dependencies to include. Works the same
65#'   way as the `dependencies` argument of [utils::install.packages()].
66#' * `recursive`: Whether to include recursive dependencies.
67#' * `force`: Whether to force cleanup without asking the user.
68#'
69#' @section Details:
70#'
71#' `cranlike_metadata_cache$new()` creates a new cache object. Creation
72#' does not trigger the population of the cache. It is only populated on
73#' demand, when queries are executed against it. In your package, you may
74#' want to create a cache instance in the `.onLoad()` function of the
75#' package, and store it in the package namespace. As this is a cheap
76#' operation, the package will still load fast, and then the package code
77#' can refer to the common cache object.
78#'
79#' `cmc$list()` lists all (or the specified) packages in the cache.
80#' It returns a tibble, see the list of columns below.
81#'
82#' `cmc$async_list()` is similar, but it is asynchronous, it returns a
83#' `deferred` object.
84#'
85#' `cmc$deps()` returns a tibble, with the (potentially recursive)
86#' dependencies of `packages`.
87#'
88#' `cmc$async_deps()` is the same, but it is asynchronous, it
89#' returns a `deferred` object.
90#'
91#' `cmc$revdeps()` returns a tibble, with the (potentially recursive)
92#' reverse dependencies of `packages`.
93#'
94#' `cmc$async_revdeps()` does the same, asynchronously, it returns an
95#' `deferred` object.
96#'
97#' `cmc$update()` updates the the metadata (as needed) in the cache,
98#' and then returns a tibble with all packages, invisibly.
99#'
100#' `cmc$async_update()` is similar, but it is asynchronous.
101#'
102#' `cmc$check_update()` checks if the metadata is current, and if it is
103#' not, it updates it.
104#'
105#' `cmc$async_check_update()` is similar, but it is asynchronous.
106#'
107#' `cmc$summary()` lists metadata about the cache, including its
108#' location and size.
109#'
110#' `cmc$cleanup()` deletes the cache files from the disk, and also from
111#' memory.
112#'
113#' @section Columns:
114#' The metadata tibble contains all available versions (i.e. sources and
115#' binaries) for all packages. It usually has the following columns,
116#' some might be missing on some platforms.
117#' * `package`: Package name.
118#' * `title`: Package title.
119#' * `version`: Package version.
120#' * `depends`: `Depends` field from `DESCRIPTION`, or `NA_character_`.
121#' * `suggests`: `Suggests` field from `DESCRIPTION`, or `NA_character_`.
122#' * `built`:  `Built` field from `DESCIPTION`, if a binary package,
123#'   or `NA_character_`.
124#' * `imports`: `Imports` field from `DESCRIPTION`, or `NA_character_`.
125#' * `archs`: `Archs` entries from `PACKAGES` files. Might be missing.
126#' * `repodir`: The directory of the file, inside the repository.
127#' * `platform`: This is a character vector. See [default_platforms()] for
128#'    more about platform names. In practice each value of the `platform`
129#'    column is either
130#'    * `"source"` for source packages,
131#'    * a platform string, e.g. `x86_64-apple-darwin17.0` for macOS
132#'      packages compatible with macOS High Sierra or newer.
133#' * `needscompilation`: Whether the package needs compilation.
134#' * `type`: `bioc` or `cran`  currently.
135#' * `target`: The path of the package file inside the repository.
136#' * `mirror`: URL of the CRAN/BioC mirror.
137#' * `sources`: List column with URLs to one or more possible locations
138#'   of the package file. For source CRAN packages, it contains URLs to
139#'   the `Archive` directory as well, in case the package has been
140#'   archived since the metadata was cached.
141#' * `filesize`: Size of the file, if known, in bytes, or `NA_integer_`.
142#' * `sha256`: The SHA256 hash of the file, if known, or `NA_character_`.
143#' * `deps`: All package dependencies, in a tibble.
144#' * `license`: Package license, might be `NA` for binary packages.
145#' * `linkingto`: `LinkingTo` field from `DESCRIPTION`, or `NA_character_`.
146#' * `enhances`: `Enhances` field from `DESCRIPTION`, or `NA_character_`.
147#' * `os_type`: `unix` or `windows` for OS specific packages. Usually `NA`.
148#' * `priority`: "optional", "recommended" or `NA`. (Base packages are
149#'   normally not included in the list, so "base" should not appear here.)
150#' * `md5sum`: MD5 sum, if available, may be `NA`.
151#' * `sysreqs`: For CRAN packages, the `SystemRequirements` field, the
152#'   required system libraries or other software for the package. For
153#'   non-CRAN packages it is `NA`.
154#' * `published`: The time the package was published at, in GMT,
155#'   `POSIXct` class.
156#'
157#' The tibble contains some extra columns as well, these are for internal
158#' use only.
159#'
160#' @export
161#' @examplesIf pkgcache:::run_examples()
162#' dir.create(cache_path <- tempfile())
163#' cmc <- cranlike_metadata_cache$new(cache_path, bioc = FALSE)
164#' cmc$list()
165#' cmc$list("pkgconfig")
166#' cmc$deps("pkgconfig")
167#' cmc$revdeps("pkgconfig", recursive = FALSE)
168
169cranlike_metadata_cache <- R6Class(
170  "cranlike_metadata_cache",
171
172  public = list(
173    initialize = function(primary_path = NULL,
174                          replica_path = tempfile(),
175                          platforms = default_platforms(),
176                          r_version = getRversion(), bioc = TRUE,
177                          cran_mirror = default_cran_mirror(),
178                          repos = getOption("repos"),
179                          update_after = as.difftime(7, units = "days"))
180      cmc_init(self, private,  primary_path, replica_path, platforms,
181               r_version, bioc, cran_mirror, repos, update_after),
182
183    deps = function(packages, dependencies = NA, recursive = TRUE)
184      synchronise(self$async_deps(packages, dependencies, recursive)),
185    async_deps = function(packages, dependencies = NA, recursive = TRUE)
186      cmc_async_deps(self, private, packages, dependencies, recursive),
187
188    revdeps = function(packages, dependencies = NA, recursive = TRUE)
189      synchronise(self$async_revdeps(packages, dependencies, recursive)),
190    async_revdeps = function(packages, dependencies = NA, recursive = TRUE)
191      cmc_async_revdeps(self, private, packages, dependencies, recursive),
192
193    list = function(packages = NULL)
194      synchronise(self$async_list(packages)),
195    async_list = function(packages = NULL)
196      cmc_async_list(self, private, packages),
197
198    update = function()
199      synchronise(self$async_update()),
200    async_update = function()
201      cmc_async_update(self, private),
202
203    check_update = function()
204      synchronise(self$async_check_update()),
205    async_check_update = function()
206      cmc_async_check_update(self, private),
207
208    summary = function()
209      cmc_summary(self, private),
210
211    cleanup = function(force = FALSE)
212      cmc_cleanup(self, private, force)
213  ),
214
215  private = list(
216    get_cache_files = function(which = c("primary", "replica"))
217      cmc__get_cache_files(self, private, match.arg(which)),
218
219    async_ensure_cache = function(max_age = private$update_after)
220      cmc__async_ensure_cache(self, private, max_age),
221
222    get_current_data = function(max_age)
223      cmc__get_current_data(self, private, max_age),
224    get_memory_cache = function(max_age)
225      cmc__get_memory_cache(self, private, max_age),
226    load_replica_rds = function(max_age)
227      cmc__load_replica_rds(self, private, max_age),
228    load_primary_rds = function(max_age)
229      cmc__load_primary_rds(self, private, max_age),
230    load_primary_pkgs = function(max_age)
231      cmc__load_primary_pkgs(self, private, max_age),
232
233    update_replica_pkgs = function()
234      cmc__update_replica_pkgs(self, private),
235    update_replica_rds = function(alert = TRUE)
236      cmc__update_replica_rds(self, private, alert),
237    update_primary = function(rds = TRUE, packages = TRUE, lock = TRUE)
238      cmc__update_primary(self, private, rds, packages, lock),
239    update_memory_cache = function()
240      cmc__update_memory_cache(self, private),
241
242    copy_to_replica = function(rds = TRUE, pkgs = FALSE, etags = FALSE)
243      cmc__copy_to_replica(self, private, rds, pkgs, etags),
244
245    ## We use this to make sure that different versions of pkgcache can
246    ## share the same metadata cache directory. It is used to calculate
247    ## the hash of the cached RDS file.
248    cache_version = "3",
249
250    data = NULL,
251    data_time = NULL,
252    data_messaged = NULL,
253
254    update_deferred = NULL,
255    chk_update_deferred = NULL,
256
257    primary_path = NULL,
258    replica_path = NULL,
259    platforms = NULL,
260    r_version = NULL,
261    bioc = NULL,
262    repos = NULL,
263    update_after = NULL,
264    dirs = NULL,
265    lock_timeout = 10000
266  )
267)
268
269#' @importFrom filelock lock unlock
270
271cmc_init <- function(self, private, primary_path, replica_path, platforms,
272                     r_version, bioc, cran_mirror, repos, update_after) {
273
274  "!!DEBUG Init metadata cache in '`replica_path`'"
275  r_version <- as.character(r_version)
276  private$primary_path <- primary_path %||% get_user_cache_dir()$root
277  private$replica_path <- replica_path
278  private$platforms <- platforms
279  private$r_version <- get_minor_r_version(r_version)
280  private$bioc <- bioc
281  private$repos <- cmc__get_repos(repos, bioc, cran_mirror, r_version)
282  private$update_after <- update_after
283  private$dirs <- get_all_package_dirs(platforms, r_version)
284  invisible(self)
285}
286
287cmc_async_deps <- function(self, private, packages, dependencies,
288                           recursive) {
289  assert_that(
290    is_character(packages),
291    is_dependencies(dependencies),
292    is_flag(recursive))
293
294  "!!DEBUG Getting deps"
295  private$async_ensure_cache()$
296    then(~ extract_deps(., packages, dependencies, recursive))
297}
298
299cmc_async_revdeps <- function(self, private, packages, dependencies,
300                              recursive) {
301  assert_that(
302    is_character(packages),
303    is_dependencies(dependencies),
304    is_flag(recursive))
305
306  "!!DEBUG Getting revdeps"
307  private$async_ensure_cache()$
308    then(~ extract_revdeps(., packages, dependencies, recursive))
309}
310
311cmc_async_list <- function(self, private, packages) {
312  assert_that(is.null(packages) || is_character(packages))
313
314  "!!DEBUG Listing packages"
315  private$async_ensure_cache()$
316    then(function(x) {
317      if (is.null(packages)) x$pkgs else x$pkgs[x$pkgs$package %in% packages,]
318    })
319}
320
321cmc_async_update <- function(self, private) {
322  self; private
323  if (!is.null(private$update_deferred)) return(private$update_deferred)
324
325  private$update_deferred <- async(private$update_replica_pkgs)()$
326    then(~ private$update_replica_rds())$
327    then(~ private$update_primary())$
328    then(~ private$data)$
329    catch(error = function(err) {
330      err$message <- msg_wrap(
331        conditionMessage(err), "\n\n",
332        "Could not load or update metadata cache. If you think your local ",
333        "cache is broken, try deleting it with `meta_cache_cleanup()`, or ",
334        "the `$cleanup()` method.")
335      stop(err)
336    })$
337    finally(function() private$update_deferred <- NULL)$
338    share()
339}
340
341cmc_async_check_update <- function(self, private) {
342  self; private
343
344  if (!is.null(private$update_deferred)) return(private$update_deferred)
345  if (!is.null(private$chk_update_deferred)) return(private$chk_update_deferred)
346
347  private$chk_update_deferred <- async(private$update_replica_pkgs)()$
348    then(function(ret) {
349      ## Some might be NULL, if failure was allowed and indeed it happened.
350      ## For these we just pretend that they did not change, so they do
351      ## not trigger an update. The metadata RDS builder is robust for
352      ## these files to be empty or non-existing.
353      stat <- viapply(ret, function(x) x$response$status_code %||% 304L)
354      rep_files <- private$get_cache_files("replica")
355      pkg_times <- file_get_time(rep_files$pkgs$path)
356      if (! file.exists(rep_files$rds) ||
357          any(file_get_time(rep_files$rds) < pkg_times) ||
358          any(stat < 300)) {
359        private$update_replica_rds(alert = FALSE)
360        private$update_primary()
361        private$data
362
363      } else {
364        private$async_ensure_cache()
365      }
366    })$
367    finally(function() private$chk_update_deferred <- NULL)$
368    share()
369}
370
371cmc_summary <- function(self, private) {
372  dirs <- private$get_cache_files("primary")
373  pgz <- dir(dirs$meta, recursive = TRUE, pattern = "^PACKAGES\\.gz$",
374             full.names = TRUE)
375  all <- dir(dirs$meta, recursive = TRUE, full.names = TRUE)
376  list(
377    cachepath = dirs$meta,
378    lockfile = dirs$lock,
379    current_rds = dirs$rds,
380    raw_files = pgz,
381    rds_files = dir(dirs$meta, pattern = "\\.rds$", full.names = TRUE),
382    size = sum(file.size(all))
383  )
384}
385
386#' @importFrom cli cli_alert_info
387
388cmc_cleanup <- function(self, private, force) {
389  if (!force && !interactive()) {
390    stop("Not cleaning up cache, please specify `force = TRUE`")
391  }
392  cache_dir <- private$get_cache_files("primary")$meta
393  if (!force) {
394    msg <- glue::glue(
395      "Are you sure you want to clean up the cache in `{cache_dir}` (y/N)? ")
396    ans <- readline(msg)
397    if (! ans %in% c("y", "Y")) stop("Aborted")
398  }
399
400  local_cache_dir <- private$get_cache_files("replica")
401  unlink(local_cache_dir, recursive = TRUE, force = TRUE)
402  private$data <- NULL
403  private$data_messaged <- NULL
404  cli_alert_info("Cleaning up cache directory {.path {cache_dir}}.")
405  unlink(cache_dir, recursive = TRUE, force = TRUE)
406}
407
408#' @importFrom digest digest
409#' @importFrom utils URLencode
410
411repo_encode <- function(repos) {
412  paste0(
413    vcapply(repos$name, URLencode, reserved = TRUE), "-",
414    substr(vcapply(repos$url, digest), 1, 10)
415  )
416}
417
418cran_metadata_url <- function() {
419  Sys.getenv(
420    "R_PKG_CRAN_METADATA_URL",
421    "https://cran.r-pkg.org/metadata/")
422}
423
424cmc__get_cache_files <- function(self, private, which) {
425  root <- private[[paste0(which, "_path")]]
426
427  repo_hash <- digest(list(repos = private$repos, dirs = private$dirs,
428                           version = private$cache_version))
429
430  str_platforms <- paste(private$platforms, collapse = "+")
431  rds_file <- paste0("pkgs-", substr(repo_hash, 1, 10), ".rds")
432
433  repo_enc <- rep(repo_encode(private$repos), each = nrow(private$dirs))
434  pkgs_dirs <- rep(private$dirs$contriburl, nrow(private$repos))
435  pkgs_files <- file.path(pkgs_dirs, "PACKAGES.gz")
436  pkgs_files2 <- file.path(pkgs_dirs, "PACKAGES")
437  mirror <- rep(private$repos$url, each = nrow(private$dirs))
438  name <- tolower(rep(private$repos$name, each = nrow(private$dirs)))
439  type <- rep(private$repos$type, each = nrow(private$dirs))
440  r_version <- rep(private$dirs$rversion, nrow(private$repos))
441  bioc_version <- rep(private$repos$bioc_version, each = nrow(private$dirs))
442
443  pkg_path <- file.path(root, "_metadata", repo_enc, pkgs_files)
444  meta_path <- ifelse(
445    type == "cran" | name == "rspm",
446    file.path(root, "_metadata", repo_enc, pkgs_dirs, "METADATA2.gz"),
447    NA_character_)
448  meta_etag <- ifelse(
449    !is.na(meta_path), paste0(meta_path, ".etag"), NA_character_)
450  meta_url <- ifelse(
451    !is.na(meta_path),
452    paste0(cran_metadata_url(), pkgs_dirs, "/METADATA2.gz"),
453    NA_character_)
454
455  list(
456    root = root,
457    meta = file.path(root, "_metadata"),
458    lock = file.path(root, "_metadata.lock"),
459    rds  = file.path(root, "_metadata", rds_file),
460    pkgs = tibble::tibble(
461      path = pkg_path,
462      etag = file.path(root, "_metadata", repo_enc, paste0(pkgs_files, ".etag")),
463      basedir = pkgs_dirs,
464      base = pkgs_files,
465      mirror = mirror,
466      url = paste0(mirror, "/", pkgs_files),
467      fallback_url = paste0(mirror, "/", pkgs_files2),
468      platform = rep(private$dirs$platform, nrow(private$repos)),
469      type = type,
470      r_version = r_version,
471      bioc_version = bioc_version,
472      meta_path = meta_path,
473      meta_etag = meta_etag,
474      meta_url = meta_url
475    )
476  )
477}
478
479#' Load the cache, asynchronously, with as little work as possible
480#'
481#' 1. If it is already loaded, and fresh return it.
482#' 2. Otherwise try the replica RDS.
483#' 3. Otherwise try the primary RDS.
484#' 4. Otherwise try the primary PACKAGES files.
485#' 5. Otherwise update the replica PACKAGES files,
486#'    the replica RDS, and then also the primary PACKAGES and RDS.
487#'
488#' @param self self
489#' @param private private self
490#' @param max_age Maximum age allowed to consider the data current.
491#' @return Metadata.
492#' @keywords internal
493
494cmc__async_ensure_cache <- function(self, private, max_age) {
495  max_age
496
497  r <- try_catch_null(private$get_current_data(max_age)) %||%
498    try_catch_null(private$get_memory_cache(max_age)) %||%
499    try_catch_null(private$load_replica_rds(max_age)) %||%
500    try_catch_null(private$load_primary_rds(max_age)) %||%
501    try_catch_null(private$load_primary_pkgs(max_age))
502
503  if (is.null(r)) {
504    self$async_update()
505  } else {
506    async_constant(r)
507  }
508}
509
510cmc__get_current_data <- function(self, private, max_age) {
511  "!!DEBUG Get current data?"
512  if (is.null(private$data)) stop("No data loaded")
513  if (is.null(private$data_time) ||
514      Sys.time() - private$data_time > max_age) {
515    stop("Loaded data outdated")
516  }
517
518  "!!DEBUG Got current data!"
519  if (! isTRUE(private$data_messaged)) {
520    private$data_messaged <- TRUE
521  }
522  private$data
523}
524
525cmc__get_memory_cache  <- function(self, private, max_age) {
526  "!!DEBUG Get from memory cache?"
527  rds <- private$get_cache_files("primary")$rds
528  hit <- cmc__data[[rds]]
529  if (is.null(hit)) stop("Not in the memory cache")
530  if (is.null(hit$data_time) || Sys.time() - hit$data_time > max_age) {
531    stop("Memory cache outdated")
532  }
533  private$data <- hit$data
534  private$data_time <- hit$data_time
535  private$data_messaged <- NULL
536
537  private$data
538}
539
540#' Try to load the package metadata asynchronously, from the replica RDS
541#'
542#' If the replica has the RDS data, it is loaded and returned.
543#' Otherwise throws an error.
544#'
545#' @param self Self.
546#' @param private Private self.
547#' @param max_age Maximum age allowed for the RDS file to be considered
548#'   as current.
549#' @return The metadata.
550#' @keywords internal
551#' @importFrom cli cli_process_start cli_process_done
552
553cmc__load_replica_rds <- function(self, private, max_age) {
554  "!!DEBUG Load replica RDS?"
555  rds <- private$get_cache_files("replica")$rds
556  if (!file.exists(rds)) stop("No replica RDS file in cache")
557
558  time <- file_get_time(rds)
559  if (Sys.time() - time > max_age) stop("Replica RDS cache file outdated")
560
561  sts <- cli_process_start("Loading metadata database")
562  private$data <- readRDS(rds)
563  private$data_time <- time
564  private$data_messaged <- NULL
565  "!!DEBUG Loaded replica RDS!"
566  private$update_memory_cache()
567  cli_process_done(sts)
568
569  private$data
570}
571
572#' Load the metadata from the primary cache's RDS file
573#'
574#' If it exists and current, then the replica RDS is updated to it as well,
575#' and the data is returned. Otherwise throws an error.
576#'
577#' @inheritParams cmc__load_replica_rds
578#' @return Metadata.
579#' @keywords internal
580#' @importFrom cli cli_process_start cli_process_done
581
582cmc__load_primary_rds <- function(self, private, max_age) {
583  "!!DEBUG Load primary RDS?"
584  pri_files <- private$get_cache_files("primary")
585  rep_files <- private$get_cache_files("replica")
586
587  mkdirp(dirname(pri_files$lock))
588  l <- lock(pri_files$lock, exclusive = FALSE, private$lock_timeout)
589  if (is.null(l)) stop("Cannot acquire lock to copy RDS")
590  on.exit(unlock(l), add = TRUE)
591
592  if (!file.exists(pri_files$rds)) stop("No primary RDS file in cache")
593  time <- file_get_time(pri_files$rds)
594  if (Sys.time() - time > max_age) stop("Primary RDS cache file outdated")
595
596  ## Metadata files might be missing or outdated, that's ok (?)
597  pkgs_times <- file_get_time(pri_files$pkgs$path)
598  if (any(is.na(pkgs_times)) || any(pkgs_times >= time)) {
599    unlink(pri_files$rds)
600    stop("Primary PACKAGES missing or newer than replica RDS, removing")
601  }
602
603  sts <- cli_process_start("Loading metadata database")
604  file_copy_with_time(pri_files$rds, rep_files$rds)
605  unlock(l)
606
607  private$data <- readRDS(rep_files$rds)
608  private$data_time <- time
609  private$data_messaged <- NULL
610
611  private$update_memory_cache()
612  cli_process_done(sts)
613
614  private$data
615}
616
617#' Load metadata from the primary cache's PACKAGES files
618#'
619#' If they are not available, or outdated, it throws an error.
620#' Otherwise they are copied to the replica cache, and then used
621#' to create the RDS file. The RDS file is then written back to the
622#' primary cache and also loaded.
623#'
624#' @param self self
625#' @param private private self
626#' @param max_age Max age to consider the files current.
627#' @return Metadata.
628#' @keywords internal
629#' @importFrom cli cli_process_start cli_process_done
630
631cmc__load_primary_pkgs <- function(self, private, max_age) {
632  "!!DEBUG Load replica PACKAGES*?"
633  pri_files <- private$get_cache_files("primary")
634  rep_files <- private$get_cache_files("replica")
635
636  ## Lock
637  mkdirp(dirname(pri_files$lock))
638  l <- lock(pri_files$lock, exclusive = FALSE, private$lock_timeout)
639  if (is.null(l)) stop("Cannot acquire lock to copy PACKAGES files")
640  on.exit(unlock(l), add = TRUE)
641
642  ## Check if PACKAGES exist and current. It is OK if metadata is missing
643  pkg_files <- pri_files$pkgs$path
644  if (!all(file.exists(pkg_files))) {
645    stop("Some primary PACKAGES files don't exist")
646  }
647  time <- file_get_time(pkg_files)
648  if (any(Sys.time() - time > max_age)) {
649    stop("Some primary PACKAGES files are outdated")
650  }
651
652  ## Copy to replica, if we cannot copy the etags, that's ok
653  sts <- cli_process_start("Loading metadata database")
654  private$copy_to_replica(rds = FALSE, pkgs = TRUE, etags = TRUE)
655
656  ## Update RDS in replica, this also loads it
657  private$update_replica_rds(alert = FALSE)
658
659  ## Update primary, but not the PACKAGES
660  private$update_primary(rds = TRUE, packages = FALSE, lock = FALSE)
661  cli_process_done(sts)
662
663  private$data
664}
665
666#' Update the PACKAGES files in the replica cache
667#'
668#' I.e. download them, if they have changed.
669#'
670#' @param self self
671#' @param private private self
672#' @keywords internal
673
674cmc__update_replica_pkgs <- function(self, private) {
675  "!!DEBUG Update replica PACKAGES"
676  tryCatch(
677    private$copy_to_replica(rds = TRUE, pkgs = TRUE, etags = TRUE),
678    error = function(e) e)
679
680  rep_files <- private$get_cache_files("replica")
681  pkgs <- rep_files$pkgs
682
683  meta <- !is.na(pkgs$meta_url)
684  dls <- data.frame(
685    stringsAsFactors = FALSE,
686    url = c(pkgs$url, pkgs$meta_url[meta]),
687    fallback_url = c(pkgs$fallback_url, rep(NA_character_, sum(meta))),
688    path = c(pkgs$path, pkgs$meta_path[meta]),
689    etag = c(pkgs$etag, pkgs$meta_etag[meta]),
690    timeout = rep(c(200, 100), c(nrow(pkgs), sum(meta))),
691    mayfail = TRUE
692  )
693
694  download_files(dls)$
695    then(function(result) {
696      missing_pkgs_note(pkgs, result)
697      result
698    })
699}
700
701# E.g. "R 4.1 macos packages are missing from CRAN and Bioconductor"
702
703missing_pkgs_note <- function(pkgs, result) {
704  bad <- vlapply(result[seq_len(nrow(pkgs))], inherits, "error")
705  if (!any(bad)) return()
706
707  repo_name <- function(type, url) {
708    if (type == "cran") return("CRAN")
709    if (type == "bioc") return("Bioconductor")
710    sub("^https?://([^/]*).*$", "\\1", url)
711  }
712
713  msgs <- lapply(which(bad), function(i) {
714    list(
715      paste0(
716        if (pkgs$r_version[i] != "*") paste0("R ", pkgs$r_version[i], " "),
717        pkgs$platform[i]
718      ),
719      repo_name(pkgs$type[i], pkgs$mirror[i])
720    )
721  })
722
723  what <- vcapply(msgs, "[[", 1)
724  where <- vcapply(msgs, "[[", 2)
725  for (wt in unique(what)) {
726    wh <- unique(where[what == wt])
727    cli_alert_info("{wt} packages are missing from {wh}")
728  }
729}
730
731#' Update the replica RDS from the PACKAGES files
732#'
733#' Also loads it afterwards.
734#'
735#' @param self self
736#' @param private private self
737#' @param alert whether to show message about the update
738#' @keywords internal
739#' @importFrom cli cli_process_start cli_process_done
740
741cmc__update_replica_rds <- function(self, private, alert) {
742  "!!DEBUG Update replica RDS"
743  if (alert) sts <- cli_process_start("Updating metadata database")
744  rep_files <- private$get_cache_files("replica")
745
746  data_list <- lapply_rows(
747    rep_files$pkgs,
748    function(r) {
749      rversion <- if (r$platform == "source") "*" else private$r_version
750      tryCatch(
751        read_packages_file(r$path, mirror = r$mirror,
752                           repodir = r$basedir, platform = r$platform,
753                           rversion = rversion, type = r$type,
754                           meta_path = r$meta_path),
755        error = function(x) {
756          message()
757          warning(msg_wrap(
758            "Cannot read metadata information from `", r$path, "`. ",
759            "The file is corrupt. Try deleting the metadata cache with ",
760            "`pkgcache::meta_cache_cleanup()` or the `$cleanup()` method"),
761            immediate. = TRUE)
762          NULL
763        }
764      )
765    })
766
767  data_list <- data_list[!vlapply(data_list, is.null)]
768
769  if (length(data_list) == 0) stop("No metadata available")
770
771  private$data <- merge_packages_data(.list = data_list)
772  saveRDS(private$data, file = rep_files$rds, version = 2)
773  private$data_time <- file_get_time(rep_files$rds)
774  private$data_messaged <- NULL
775
776  private$update_memory_cache()
777
778  if (alert) cli_process_done(sts)
779  private$data
780}
781
782#' Update the primary cache from the replica cache
783#'
784#' @param self self
785#' @param private private self
786#' @param rds Whether to update the RDS file.
787#' @param packages Whether to update the PACKAGES files (+ ETag files).
788#' @return Nothing.
789#'
790#' @keywords internal
791
792cmc__update_primary <- function(self, private, rds, packages, lock) {
793
794  "!!DEBUG Updata primary cache"
795  if (!rds && !packages) return()
796
797  pri_files <- private$get_cache_files("primary")
798  rep_files <- private$get_cache_files("replica")
799
800  if (lock) {
801    mkdirp(dirname(pri_files$lock))
802    l <- lock(pri_files$lock, exclusive = TRUE, private$lock_timeout)
803    if (is.null(l)) stop("Cannot acquire lock to update primary cache")
804    on.exit(unlock(l), add = TRUE)
805  }
806
807  if (rds) {
808    file_copy_with_time(rep_files$rds, pri_files$rds)
809  }
810  if (packages) {
811    file_copy_with_time(rep_files$pkgs$path, pri_files$pkgs$path)
812    file_copy_with_time(rep_files$pkgs$etag, pri_files$pkgs$etag)
813    file_copy_with_time(na_omit(rep_files$pkgs$meta_path),
814                        na_omit(pri_files$pkgs$meta_path))
815    file_copy_with_time(na_omit(rep_files$pkgs$meta_etag),
816                        na_omit(pri_files$pkgs$meta_etag))
817  }
818  invisible()
819}
820
821cmc__update_memory_cache <- function(self, private) {
822  rds <- private$get_cache_files("primary")$rds
823  cmc__data[[rds]] <- list(data = private$data, data_time = private$data_time)
824}
825
826cmc__copy_to_replica <- function(self, private, rds, pkgs, etags) {
827  pri_files <- private$get_cache_files("primary")
828  rep_files <- private$get_cache_files("replica")
829
830  mkdirp(dirname(pri_files$lock))
831  l <- lock(pri_files$lock, exclusive = FALSE, private$lock_timeout)
832  if (is.null(l)) stop("Cannot acquire lock to copy primary cache")
833  on.exit(unlock(l), add = TRUE)
834
835  if (rds) {
836    file_copy_with_time(pri_files$rds, rep_files$rds)
837  }
838
839  if (pkgs) {
840    file_copy_with_time(pri_files$pkgs$path, rep_files$pkgs$path)
841    file_copy_with_time(na_omit(pri_files$pkgs$meta_path),
842                        na_omit(rep_files$pkgs$meta_path))
843  }
844  if (etags) {
845    file_copy_with_time(pri_files$pkgs$etag, rep_files$pkgs$etag)
846    file_copy_with_time(na_omit(pri_files$pkgs$meta_etag),
847                        na_omit(rep_files$pkgs$meta_etag))
848  }
849}
850
851extract_deps <- function(pkgs, packages, dependencies, recursive) {
852
853  realdep <- interpret_dependencies(dependencies)
854  dep <- tolower(realdep$direct)
855
856  new <- packages
857  repeat {
858    new <- setdiff(
859      pkgs$deps$package[pkgs$deps$upstream %in% new &
860                        pkgs$deps$type %in% dep],
861      packages)
862    if (!length(new)) break
863    packages <- c(packages, new)
864    if (!recursive) break
865    dep <- tolower(realdep$indirect)
866  }
867
868  packages <- setdiff(packages, "R")
869  res <- pkgs$pkgs[pkgs$pkgs$package %in% packages, ]
870
871  base <- intersect(packages, base_packages())
872  attr(res, "base") <- base
873  attr(res, "unknown") <- setdiff(packages, c(res$package, base))
874
875  res
876}
877
878extract_revdeps <- function(pkgs, packages, dependencies, recursive) {
879
880  realdep <- interpret_dependencies(dependencies)
881  dep <- tolower(realdep$direct)
882
883  new <- packages
884  repeat {
885    new <- setdiff(
886      pkgs$deps$upstream[pkgs$deps$ref %in% new & pkgs$deps$type %in% dep],
887      packages)
888    if (!length(new)) break
889    packages <- c(packages, new)
890    if (!recursive) break
891    dep <- tolower(realdep$indirect)
892  }
893
894  packages <- setdiff(packages, "R")
895  res <- pkgs$pkgs[pkgs$pkgs$package %in% packages, ]
896
897  base <- intersect(packages, base_packages())
898  attr(res, "base") <- base
899  attr(res, "unknown") <- setdiff(packages, c(res$package, base))
900
901  res
902}
903
904cmc__get_repos <- function(repos, bioc, cran_mirror, r_version) {
905  repos[["CRAN"]] <- cran_mirror
906  repos <- unlist(repos)
907  bioc_names <- bioconductor$get_repos()
908  res <- tibble(
909    name = names(repos),
910    url = unname(repos),
911    type = ifelse(
912      names(repos) == "CRAN",
913      "cran",
914      ifelse(names(repos) %in% bioc_names, "bioc", "cranlike")
915    ),
916    r_version = "*",
917    bioc_version = NA_character_
918  )
919
920  if (bioc) {
921    for (rver in r_version) {
922      bioc_version <- as.character(bioconductor$get_bioc_version(rver))
923      bioc_repos <- bioconductor$get_repos(bioc_version)
924
925      bioc_res <- tibble(
926        name = names(bioc_repos),
927        url = unname(bioc_repos),
928        type = "bioc",
929        r_version = rver,
930        bioc_version = bioc_version
931      )
932      res <- rbind(res, bioc_res)
933    }
934  }
935
936  res <- res[!duplicated(res$url), ]
937
938  res
939}
940
941#' Query CRAN(like) package data
942#'
943#' It uses CRAN and BioConductor packages, for the current platform and
944#' R version, from the default repositories.
945#'
946#' `meta_cache_list()` lists all packages.
947#'
948#' `meta_cache_update()` updates all metadata. Note that metadata is
949#' automatically updated if it is older than seven days.
950#'
951#' `meta_cache_deps()` queries packages dependencies.
952#'
953#' `meta_cache_revdeps()` queries reverse package dependencies.
954#'
955#' `meta_cache_summary()` lists data about the cache, including its location
956#' and size.
957#'
958#' `meta_cache_cleanup()` deletes the cache files from the disk.
959#'
960#' @param packages Packages to query.
961#' @param dependencies Dependency types to query. See the `dependencies`
962#'   parameter of [utils::install.packages()].
963#' @param recursive Whether to query recursive dependencies.
964#' @param force Whether to force cleanup without asking the user.
965#' @return A data frame (tibble) of the dependencies. For
966#'   `meta_cache_deps()` and `meta_cache_revdeps()` it includes the
967#'   queried `packages` as well.
968#'
969#' @export
970#' @examplesIf pkgcache:::run_examples()
971#' meta_cache_list("pkgdown")
972#' meta_cache_deps("pkgdown", recursive = FALSE)
973#' meta_cache_revdeps("pkgdown", recursive = FALSE)
974
975meta_cache_deps <- function(packages, dependencies = NA,
976                            recursive = TRUE) {
977  get_cranlike_metadata_cache()$deps(packages, dependencies, recursive)
978}
979
980#' @export
981#' @rdname meta_cache_deps
982
983meta_cache_revdeps <- function(packages, dependencies = NA,
984                               recursive = TRUE) {
985  get_cranlike_metadata_cache()$revdeps(packages, dependencies, recursive)
986}
987
988#' @export
989#' @rdname meta_cache_deps
990
991meta_cache_update <- function() {
992  invisible(get_cranlike_metadata_cache()$update()$pkgs)
993}
994
995#' @export
996#' @rdname meta_cache_deps
997
998meta_cache_list <- function(packages = NULL) {
999  get_cranlike_metadata_cache()$list(packages)
1000}
1001
1002#' @export
1003#' @rdname meta_cache_deps
1004
1005meta_cache_cleanup <- function(force = FALSE) {
1006  get_cranlike_metadata_cache()$cleanup(force = force)
1007}
1008
1009#' @export
1010#' @rdname meta_cache_deps
1011
1012meta_cache_summary <- function() {
1013  get_cranlike_metadata_cache()$summary()
1014}
1015