1# File src/library/tools/R/packages.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2020 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19write_PACKAGES <- 20function(dir = ".", fields = NULL, 21 type = c("source", "mac.binary", "win.binary"), 22 verbose = FALSE, unpacked = FALSE, subdirs = FALSE, 23 latestOnly = TRUE, addFiles = FALSE, rds_compress = "xz", 24 validate = FALSE) 25{ 26 if(missing(type) && .Platform$OS.type == "windows") 27 type <- "win.binary" 28 type <- match.arg(type) 29 30 paths <- "" 31 if(is.logical(subdirs) && subdirs) { 32 owd <- setwd(dir) 33 paths <- list.dirs(".") 34 setwd(owd) 35 paths <- c("", paths[paths != "."]) 36 ## now strip leading ./ 37 paths <- sub("^[.]/", "", paths) 38 } else if(is.character(subdirs)) paths <- c("", subdirs) 39 40 ## Older versions created only plain text and gzipped DCF files with 41 ## the (non-missing and non-empty) package db entries, and hence did 42 ## so one path at a time. We now also serialize the db directly, 43 ## and hence first build the whole db, and then create the files in 44 ## case some packages were found. 45 46 db <- NULL 47 addPaths <- !identical(paths, "") 48 49 for(path in paths) { 50 this <- if(nzchar(path)) file.path(dir, path) else dir 51 desc <- .build_repository_package_db(this, fields, type, verbose, 52 unpacked, validate) 53 desc <- .process_repository_package_db_to_matrix(desc, 54 path, 55 addFiles, 56 addPaths, 57 latestOnly) 58 if(NROW(desc)) 59 db <- rbind(db, desc) 60 61 } 62 63 np <- .write_repository_package_db(db, dir, rds_compress) 64 65 invisible(np) 66} 67 68.write_repository_package_db <- 69function(db, dir, rds_compress) 70{ 71 np <- NROW(db) 72 if(np > 0L) { 73 ## To save space, empty entries are not written to the DCF, so 74 ## that read.dcf() on these will have the entries as missing. 75 ## Hence, change empty to missing in the db. 76 db[!is.na(db) & (db == "")] <- NA_character_ 77 con <- file(file.path(dir, "PACKAGES"), "wt") 78 write.dcf(db, con) 79 close(con) 80 con <- gzfile(file.path(dir, "PACKAGES.gz"), "wt") 81 write.dcf(db, con) 82 close(con) 83 rownames(db) <- db[, "Package"] 84 saveRDS(db, file.path(dir, "PACKAGES.rds"), compress = rds_compress) 85 } 86 87 invisible(np) 88} 89 90.process_repository_package_db_to_matrix <- 91function(desc, path, addFiles, addPaths, latestOnly) 92{ 93 desc <- Filter(length, desc) 94 95 if(length(desc)) { 96 Files <- names(desc) 97 fields <- names(desc[[1L]]) 98 desc <- matrix(unlist(desc), ncol = length(fields), byrow = TRUE) 99 colnames(desc) <- fields 100 if(addFiles) desc <- cbind(desc, File = Files) 101 if(addPaths) desc <- cbind(desc, Path = path) 102 if(latestOnly) desc <- .remove_stale_dups(desc) 103 104 ## Standardize licenses or replace by NA. 105 license_info <- analyze_licenses(desc[, "License"]) 106 desc[, "License"] <- 107 ifelse(license_info$is_standardizable, 108 license_info$standardization, 109 NA) 110 } 111 desc 112} 113 114## factored out so it can be used in multiple 115## places without threat of divergence 116.get_pkg_file_pattern = function(type = c("source", "mac.binary", "win.binary"), 117 ext.only = FALSE) 118{ 119 120 type <- match.arg(type) 121 ## FIXME: might the source pattern be more general? 122 ## was .tar.gz prior to 2.10.0 123 124 ret = switch(type, 125 "source" = "_.*\\.tar\\.[^_]*$", 126 "mac.binary" = "_.*\\.tgz$", 127 "win.binary" = "_.*\\.zip$") 128 if(ext.only) 129 ret = gsub("_.*", "", fixed = TRUE, ret) 130 ret 131} 132## this is OK provided all the 'fields' are ASCII -- so be careful 133## what you add. 134.build_repository_package_db <- 135function(dir, fields = NULL, 136 type = c("source", "mac.binary", "win.binary"), 137 verbose = getOption("verbose"), 138 unpacked = FALSE, validate = FALSE) 139{ 140 if(unpacked) 141 return(.build_repository_package_db_from_source_dirs(dir, 142 fields, 143 verbose, 144 validate)) 145 146 package_pattern <- .get_pkg_file_pattern(type) 147 files <- list.files(dir, pattern = package_pattern, full.names = TRUE) 148 149 if(!length(files)) 150 return(list()) 151 db <- .process_package_files_for_repository_db(files, 152 type, 153 fields, 154 verbose, 155 validate) 156 db 157} 158 159.process_package_files_for_repository_db <- 160function(files, type, fields, verbose, validate = FALSE) 161{ 162 163 files <- normalizePath(files, mustWork=TRUE) # files comes from list.files, mustWork ok 164 ## Add the standard set of fields required to build a repository's 165 ## PACKAGES file: 166 fields <- unique(c(.get_standard_repository_db_fields(type), fields)) 167 ## files was without path at this point in original code, 168 ## use filetbs instead to compute pkg names and set db names 169 filetbs <- basename(files) 170 packages <- sapply(strsplit(filetbs, "_", fixed = TRUE), "[", 1L) 171 db <- vector(length(files), mode = "list") 172 names(db) <- filetbs #files was not full paths before 173 ## Many (roughly length(files)) warnings are *expected*, hence 174 ## suppressed. 175 op <- options(warn = -1) 176 on.exit(options(op)) 177 if(verbose) message("Processing packages:") 178 if(type == "win.binary") { 179 for(i in seq_along(files)) { 180 if(verbose) message(paste0(" ", files[i])) 181 con <- unz(files[i], file.path(packages[i], "DESCRIPTION")) 182 temp <- tryCatch(read.dcf(con, fields = fields)[1L, ], 183 error = identity) 184 if(inherits(temp, "error")) { 185 close(con) 186 next 187 } 188 db[[i]] <- temp 189 close(con) 190 } 191 } else { 192 cwd <- getwd() 193 if (is.null(cwd)) 194 stop("current working directory cannot be ascertained") 195 td <- tempfile("PACKAGES") 196 if(!dir.create(td)) stop("unable to create ", td) 197 on.exit(unlink(td, recursive = TRUE), add = TRUE) 198 setwd(td) 199 for(i in seq_along(files)) { 200 if(verbose) message(paste0(" ", files[i])) 201 p <- file.path(packages[i], "DESCRIPTION") 202 ## temp <- try(system(paste("tar zxf", files[i], p))) 203 temp <- try(utils::untar(files[i], files = p)) 204 if(!inherits(temp, "try-error")) { 205 temp <- tryCatch(read.dcf(p, fields = fields)[1L, ], 206 error = identity) 207 if(!inherits(temp, "error")) { 208 if(validate) { 209 ## .check_package_description() by default goes via 210 ## .read_description() which re-encodes and insists on a 211 ## single entry unlike the above read.dcf() call. 212 ok <- .check_package_description(db = temp[!is.na(temp)]) 213 ## FIXME: no format.check_package_description yet. 214 if(any(as.integer(lengths(ok)) > 0L)) { 215 message(paste(gettextf("Invalid DESCRIPTION file for package %s", 216 sQuote(basename(dirname(p)))), 217 paste(format(ok), collapse = "\n\n"), 218 sep = "\n\n"), 219 domain = NA) 220 next 221 } 222 } 223 if("NeedsCompilation" %in% fields && 224 is.na(temp["NeedsCompilation"])) { 225 l <- utils::untar(files[i], list = TRUE) 226 temp["NeedsCompilation"] <- 227 if(any(l == file.path(packages[i], "src/"))) "yes" else "no" 228 } 229 temp["MD5sum"] <- md5sum(files[i]) 230 db[[i]] <- temp 231 } else { 232 message(gettextf("reading DESCRIPTION for package %s failed with message:\n %s", 233 sQuote(basename(dirname(p))), 234 conditionMessage(temp)), 235 domain = NA) 236 } 237 } 238 unlink(packages[i], recursive = TRUE) 239 } 240 setwd(cwd) 241 } 242 if(verbose) message("done") 243 244 db 245} 246 247.build_repository_package_db_from_source_dirs <- 248function(dir, fields = NULL, verbose = getOption("verbose"), 249 validate = FALSE) 250{ 251 dir <- file_path_as_absolute(dir) 252 fields <- unique(c(.get_standard_repository_db_fields(), fields)) 253 paths <- list.files(dir, full.names = TRUE) 254 paths <- paths[dir.exists(paths) & 255 file_test("-f", file.path(paths, "DESCRIPTION"))] 256 db <- vector(length(paths), mode = "list") 257 if(verbose) message("Processing packages:") 258 for(i in seq_along(paths)) { 259 if(verbose) message(paste0(" ", basename(paths[i]))) 260 temp <- tryCatch(read.dcf(file.path(paths[i], "DESCRIPTION"), 261 fields = fields)[1L, ], 262 error = identity) 263 if(!inherits(temp, "error")) { 264 if(validate) { 265 ## .check_package_description() by default goes via 266 ## .read_description() which re-encodes and insists on a 267 ## single entry unlike the above read.dcf() call. 268 ok <- .check_package_description(db = temp[!is.na(temp)]) 269 ## FIXME: no format.check_package_description yet. 270 if(any(as.integer(lengths(ok)) > 0L)) { 271 warning(paste(gettextf("Invalid DESCRIPTION file for package %s", 272 sQuote(basename(paths[i]))), 273 paste(format(ok), collapse = "\n\n"), 274 sep = "\n\n"), 275 domain = NA, 276 call. = FALSE) 277 next 278 } 279 } 280 if(is.na(temp["NeedsCompilation"])) { 281 temp["NeedsCompilation"] <- 282 if(dir.exists(file.path(paths[i], "src"))) "yes" else "no" 283 } 284 ## Cannot compute MD5 sum of the source tar.gz when working 285 ## on the unpacked sources ... 286 db[[i]] <- temp 287 } else { 288 warning(gettextf("reading DESCRIPTION for package %s failed with message:\n %s", 289 sQuote(basename(paths[i])), 290 conditionMessage(temp)), 291 domain = NA) 292 } 293 } 294 if(verbose) message("done") 295 names(db) <- basename(paths) 296 db 297} 298 299dependsOnPkgs <- 300function(pkgs, dependencies = "strong", 301 recursive = TRUE, lib.loc = NULL, 302 installed = utils::installed.packages(lib.loc, fields = "Enhances")) 303{ 304 dependencies <- .expand_dependency_type_spec(dependencies) 305 306 av <- installed[, dependencies, drop = FALSE] 307 rn <- as.character(installed[, "Package"]) 308 need <- apply(av, 1L, function(x) 309 any(pkgs %in% utils:::.clean_up_dependencies(x)) ) 310 uses <- rn[need] 311 if(recursive) { 312 p <- pkgs 313 repeat { 314 p <- unique(c(p, uses)) 315 need <- apply(av, 1L, function(x) 316 any(p %in% utils:::.clean_up_dependencies(x)) ) 317 uses <- unique(c(p, rn[need])) 318 if(length(uses) <= length(p)) break 319 } 320 } 321 setdiff(uses, pkgs) 322} 323 324.remove_stale_dups <- 325function(ap) 326{ 327 ## Given a matrix from available.packages, return a copy 328 ## with no duplicate packages, being sure to keep the packages 329 ## with highest version number. 330 ## (Also works for data frame package repository dbs.) 331 pkgs <- ap[ , "Package"] 332 dup_pkgs <- pkgs[duplicated(pkgs)] 333 stale_dups <- integer(length(dup_pkgs)) 334 i <- 1L 335 for (dp in dup_pkgs) { 336 wh <- which(dp == pkgs) 337 vers <- package_version(ap[wh, "Version"]) 338 keep_ver <- max(vers) 339 keep_idx <- which.max(vers == keep_ver) # they might all be max 340 wh <- wh[-keep_idx] 341 end_i <- i + length(wh) - 1L 342 stale_dups[i:end_i] <- wh 343 i <- end_i + 1L 344 } 345 ## Possible to have only one package in a repository 346 if(length(stale_dups)) ap[-stale_dups, , drop = FALSE] else ap 347} 348 349package_dependencies <- 350function(packages = NULL, db = NULL, which = "strong", 351 recursive = FALSE, reverse = FALSE, 352 verbose = getOption("verbose")) 353{ 354 ## <FIXME> 355 ## What about duplicated entries? 356 ## </FIXME> 357 358 if(is.null(db)) db <- utils::available.packages() 359 360 fields <- which <- .expand_dependency_type_spec(which) 361 if(is.character(recursive)) { 362 recursive <- .expand_dependency_type_spec(recursive) 363 if(identical(which, recursive)) 364 recursive <- TRUE 365 else 366 fields <- unique(c(fields, recursive)) 367 } 368 369 ## For given packages which are not found in the db, return "list 370 ## NAs" (i.e., NULL entries), as opposed to character() entries 371 ## which indicate no dependencies. 372 out_of_db_packages <- character() 373 374 ## For forward non-recursive depends, we can simplify matters by 375 ## subscripting the db right away---modulo boundary cases. 376 if(!is.character(recursive) && !recursive && !reverse) { 377 if(!is.null(packages)) { 378 ind <- match(packages, db[, "Package"], nomatch = 0L) 379 db <- db[ind, , drop = FALSE] 380 out_of_db_packages <- packages[ind == 0L] 381 } 382 } 383 384 db <- as.data.frame(db[, c("Package", fields), drop = FALSE]) 385 ## Avoid recomputing package dependency names in recursive 386 ## invocations. 387 for(f in fields) { 388 if(!is.list(d <- db[[f]])) 389 db[[f]] <- lapply(d, .extract_dependency_package_names) 390 } 391 392 if(is.character(recursive)) { 393 ## Direct dependencies: 394 d_d <- Recall(packages, db, which, FALSE, 395 reverse, verbose) 396 ## Recursive dependencies of all these: 397 d_r <- Recall(unique(unlist(d_d)), db, recursive, TRUE, 398 reverse, verbose) 399 ## Now glue together: 400 return(lapply(d_d, 401 function(p) { 402 sort(unique(c(p, unlist(d_r[p], 403 use.names = FALSE)))) 404 })) 405 } 406 407 depends <- 408 do.call(Map, 409 c(list("c"), 410 db[which], 411 list(USE.NAMES = FALSE))) 412 413 depends <- lapply(depends, unique) 414 415 if(!recursive && !reverse) { 416 names(depends) <- db$Package 417 if(length(out_of_db_packages)) { 418 depends <- 419 c(depends, 420 structure(vector("list", length(out_of_db_packages)), 421 names = out_of_db_packages)) 422 } 423 return(depends) 424 } 425 426 all_packages <- sort(unique(c(db$Package, unlist(depends)))) 427 428 if(!recursive) { 429 ## Need to invert. 430 depends <- 431 split(rep.int(db$Package, lengths(depends)), 432 factor(unlist(depends), levels = all_packages)) 433 if(!is.null(packages)) { 434 depends <- depends[match(packages, names(depends))] 435 names(depends) <- packages 436 } 437 return(depends) 438 } 439 440 ## Recursive dependencies. 441 ## We need to compute the transitive closure of the dependency 442 ## relation, but e.g. Warshall's algorithm (O(n^3)) is 443 ## computationally infeasible. 444 ## Hence, in principle, we do the following. 445 ## Take the current list of pairs (i,j) in the relation. 446 ## Iterate over all j and whenever i R j and j R k add (i,k). 447 ## Repeat this until no new pairs get added. 448 ## To do this in R, we use a 2-column matrix of (i,j) rows. 449 ## We then create two lists which for all j contain the i and k 450 ## with i R j and j R k, respectively, and combine these. 451 ## This works reasonably well, but of course more efficient 452 ## implementations should be possible. 453 matchP <- match(rep.int(db$Package, lengths(depends)), 454 all_packages) 455 matchD <- match(unlist(depends), all_packages) 456 tab <- if(reverse) 457 split(matchP, 458 factor(matchD, levels = seq_along(all_packages))) 459 else 460 split(matchD, 461 factor(matchP, levels = seq_along(all_packages))) 462 if(is.null(packages)) { 463 if(reverse) { 464 packages <- all_packages 465 p_L <- seq_along(all_packages) 466 } else { 467 packages <- db$Package 468 p_L <- match(packages, all_packages) 469 } 470 } else { 471 p_L <- match(packages, all_packages, nomatch = 0L) 472 if(any(ind <- (p_L == 0L))) { 473 out_of_db_packages <- packages[ind] 474 packages <- packages[!ind] 475 p_L <- p_L[!ind] 476 } 477 } 478 p_R <- tab[p_L] 479 pos <- cbind(rep.int(p_L, lengths(p_R)), unlist(p_R)) 480 ctr <- 0L 481 repeat { 482 if(verbose) cat("Cycle:", (ctr <- ctr + 1L)) 483 p_L <- split(pos[, 1L], pos[, 2L]) 484 new <- do.call(rbind, 485 Map(function(i, k) 486 cbind(rep.int(i, length(k)), 487 rep(k, each = length(i))), 488 p_L, tab[as.integer(names(p_L))])) 489 npos <- unique(rbind(pos, new)) 490 nnew <- nrow(npos) - nrow(pos) 491 if(verbose) cat(" NNew:", nnew, "\n") 492 if(!nnew) break 493 pos <- npos 494 } 495 depends <- 496 split(all_packages[pos[, 2L]], 497 factor(all_packages[pos[, 1L]], 498 levels = unique(packages))) 499 if(length(out_of_db_packages)) { 500 depends <- 501 c(depends, 502 structure(vector("list", length(out_of_db_packages)), 503 names = out_of_db_packages)) 504 } 505 depends 506} 507 508.expand_dependency_type_spec <- 509function(x) 510{ 511 if(identical(x, "strong")) 512 c("Depends", "Imports", "LinkingTo") 513 else if(identical(x, "most")) 514 c("Depends", "Imports", "LinkingTo", "Suggests") 515 else if(identical(x, "all")) 516 c("Depends", "Imports", "LinkingTo", "Suggests", "Enhances") 517 else 518 x 519 ## (Could also intersect x with the possible types.) 520} 521 522## .extract_dependency_package_names <- 523## function(x) 524## { 525## ## Assume a character *string*. 526## if(is.na(x)) return(character()) 527## x <- strsplit(x, ",", fixed = TRUE)[[1L]] 528## ## FIXME: The following is much faster on Linux but apparently not 529## ## on Windows: 530## ## x <- sub("(?s)[[:space:]]*([[:alnum:].]+).*", "\\1", x, perl = TRUE) 531## x <- sub("[[:space:]]*([[:alnum:].]+).*", "\\1", x) 532## x[nzchar(x) & (x != "R")] 533## } 534 535.extract_dependency_package_names <- 536function(x) 537 .Call(C_package_dependencies_scan, x) 538