1#  File src/library/tools/R/packages.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2020 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19write_PACKAGES <-
20function(dir = ".", fields = NULL,
21         type = c("source", "mac.binary", "win.binary"),
22         verbose = FALSE, unpacked = FALSE, subdirs = FALSE,
23         latestOnly = TRUE, addFiles = FALSE, rds_compress = "xz",
24         validate = FALSE)
25{
26    if(missing(type) && .Platform$OS.type == "windows")
27        type <- "win.binary"
28    type <- match.arg(type)
29
30    paths <- ""
31    if(is.logical(subdirs) && subdirs) {
32        owd <- setwd(dir)
33        paths <- list.dirs(".")
34        setwd(owd)
35        paths <- c("", paths[paths != "."])
36        ## now strip leading ./
37        paths <- sub("^[.]/", "", paths)
38    } else if(is.character(subdirs)) paths <- c("", subdirs)
39
40    ## Older versions created only plain text and gzipped DCF files with
41    ## the (non-missing and non-empty) package db entries, and hence did
42    ## so one path at a time.  We now also serialize the db directly,
43    ## and hence first build the whole db, and then create the files in
44    ## case some packages were found.
45
46    db <- NULL
47    addPaths <- !identical(paths, "")
48
49    for(path in paths) {
50        this <- if(nzchar(path)) file.path(dir, path) else dir
51        desc <- .build_repository_package_db(this, fields, type, verbose,
52                                             unpacked, validate)
53        desc <- .process_repository_package_db_to_matrix(desc,
54                                                         path,
55                                                         addFiles,
56                                                         addPaths,
57                                                         latestOnly)
58        if(NROW(desc))
59            db <- rbind(db, desc)
60
61    }
62
63    np <- .write_repository_package_db(db, dir, rds_compress)
64
65    invisible(np)
66}
67
68.write_repository_package_db <-
69function(db, dir, rds_compress)
70{
71   np <- NROW(db)
72   if(np > 0L) {
73       ## To save space, empty entries are not written to the DCF, so
74       ## that read.dcf() on these will have the entries as missing.
75       ## Hence, change empty to missing in the db.
76       db[!is.na(db) & (db == "")] <- NA_character_
77       con <- file(file.path(dir, "PACKAGES"), "wt")
78       write.dcf(db, con)
79       close(con)
80       con <- gzfile(file.path(dir, "PACKAGES.gz"), "wt")
81       write.dcf(db, con)
82       close(con)
83       rownames(db) <- db[, "Package"]
84       saveRDS(db, file.path(dir, "PACKAGES.rds"), compress = rds_compress)
85   }
86
87   invisible(np)
88}
89
90.process_repository_package_db_to_matrix <-
91function(desc, path, addFiles, addPaths, latestOnly)
92{
93    desc <- Filter(length, desc)
94
95    if(length(desc)) {
96        Files <- names(desc)
97        fields <- names(desc[[1L]])
98        desc <- matrix(unlist(desc), ncol = length(fields), byrow = TRUE)
99        colnames(desc) <- fields
100        if(addFiles) desc <- cbind(desc, File = Files)
101        if(addPaths) desc <- cbind(desc, Path = path)
102        if(latestOnly) desc <- .remove_stale_dups(desc)
103
104        ## Standardize licenses or replace by NA.
105        license_info <- analyze_licenses(desc[, "License"])
106            desc[, "License"] <-
107                ifelse(license_info$is_standardizable,
108                       license_info$standardization,
109                       NA)
110        }
111    desc
112}
113
114## factored out so it can be used in multiple
115## places without threat of divergence
116.get_pkg_file_pattern = function(type = c("source", "mac.binary", "win.binary"),
117                                 ext.only = FALSE)
118{
119
120    type <- match.arg(type)
121    ## FIXME: might the source pattern be more general?
122    ## was .tar.gz prior to 2.10.0
123
124    ret = switch(type,
125                 "source" = "_.*\\.tar\\.[^_]*$",
126                 "mac.binary" = "_.*\\.tgz$",
127                 "win.binary" = "_.*\\.zip$")
128    if(ext.only)
129        ret = gsub("_.*", "", fixed = TRUE, ret)
130    ret
131}
132## this is OK provided all the 'fields' are ASCII -- so be careful
133## what you add.
134.build_repository_package_db <-
135function(dir, fields = NULL,
136         type = c("source", "mac.binary", "win.binary"),
137         verbose = getOption("verbose"),
138         unpacked = FALSE, validate = FALSE)
139{
140    if(unpacked)
141        return(.build_repository_package_db_from_source_dirs(dir,
142                                                             fields,
143                                                             verbose,
144                                                             validate))
145
146       package_pattern <- .get_pkg_file_pattern(type)
147    files <- list.files(dir, pattern = package_pattern, full.names = TRUE)
148
149    if(!length(files))
150        return(list())
151    db <- .process_package_files_for_repository_db(files,
152                                                   type,
153                                                   fields,
154                                                   verbose,
155                                                   validate)
156    db
157}
158
159.process_package_files_for_repository_db <-
160function(files, type, fields, verbose, validate = FALSE)
161{
162
163    files <- normalizePath(files, mustWork=TRUE) # files comes from list.files, mustWork ok
164    ## Add the standard set of fields required to build a repository's
165    ## PACKAGES file:
166    fields <- unique(c(.get_standard_repository_db_fields(type), fields))
167    ## files was without path at this point in original code,
168    ## use filetbs instead to compute pkg names and set db names
169    filetbs <- basename(files)
170    packages <- sapply(strsplit(filetbs, "_", fixed = TRUE), "[", 1L)
171    db <- vector(length(files), mode = "list")
172    names(db) <- filetbs #files was not full paths before
173    ## Many (roughly length(files)) warnings are *expected*, hence
174    ## suppressed.
175    op <- options(warn = -1)
176    on.exit(options(op))
177    if(verbose) message("Processing packages:")
178    if(type == "win.binary") {
179        for(i in seq_along(files)) {
180            if(verbose) message(paste0("  ", files[i]))
181            con <- unz(files[i], file.path(packages[i], "DESCRIPTION"))
182            temp <- tryCatch(read.dcf(con, fields = fields)[1L, ],
183                             error = identity)
184            if(inherits(temp, "error")) {
185                close(con)
186                next
187            }
188            db[[i]] <- temp
189            close(con)
190        }
191    } else {
192        cwd <- getwd()
193        if (is.null(cwd))
194            stop("current working directory cannot be ascertained")
195        td <- tempfile("PACKAGES")
196        if(!dir.create(td)) stop("unable to create ", td)
197        on.exit(unlink(td, recursive = TRUE), add = TRUE)
198        setwd(td)
199        for(i in seq_along(files)) {
200            if(verbose) message(paste0("  ", files[i]))
201            p <- file.path(packages[i], "DESCRIPTION")
202            ## temp <- try(system(paste("tar zxf", files[i], p)))
203            temp <- try(utils::untar(files[i], files = p))
204            if(!inherits(temp, "try-error")) {
205                temp <- tryCatch(read.dcf(p, fields = fields)[1L, ],
206                                 error = identity)
207                if(!inherits(temp, "error")) {
208                    if(validate) {
209                        ## .check_package_description() by default goes via
210                        ## .read_description() which re-encodes and insists on a
211                        ## single entry unlike the above read.dcf() call.
212                        ok <- .check_package_description(db = temp[!is.na(temp)])
213                        ## FIXME: no format.check_package_description yet.
214                        if(any(as.integer(lengths(ok)) > 0L)) {
215                            message(paste(gettextf("Invalid DESCRIPTION file for package %s",
216                                                   sQuote(basename(dirname(p)))),
217                                          paste(format(ok), collapse = "\n\n"),
218                                          sep = "\n\n"),
219                                    domain = NA)
220                            next
221                        }
222                    }
223                    if("NeedsCompilation" %in% fields &&
224                       is.na(temp["NeedsCompilation"])) {
225                        l <- utils::untar(files[i], list = TRUE)
226                        temp["NeedsCompilation"] <-
227                            if(any(l == file.path(packages[i], "src/"))) "yes" else "no"
228                    }
229                    temp["MD5sum"] <- md5sum(files[i])
230                    db[[i]] <- temp
231                } else {
232                    message(gettextf("reading DESCRIPTION for package %s failed with message:\n  %s",
233                                     sQuote(basename(dirname(p))),
234                                     conditionMessage(temp)),
235                            domain = NA)
236                }
237            }
238            unlink(packages[i], recursive = TRUE)
239        }
240        setwd(cwd)
241    }
242    if(verbose) message("done")
243
244    db
245}
246
247.build_repository_package_db_from_source_dirs <-
248function(dir, fields = NULL, verbose = getOption("verbose"),
249         validate = FALSE)
250{
251    dir <- file_path_as_absolute(dir)
252    fields <- unique(c(.get_standard_repository_db_fields(), fields))
253    paths <- list.files(dir, full.names = TRUE)
254    paths <- paths[dir.exists(paths) &
255                   file_test("-f", file.path(paths, "DESCRIPTION"))]
256    db <- vector(length(paths), mode = "list")
257    if(verbose) message("Processing packages:")
258    for(i in seq_along(paths)) {
259        if(verbose) message(paste0("  ", basename(paths[i])))
260        temp <- tryCatch(read.dcf(file.path(paths[i], "DESCRIPTION"),
261                                  fields = fields)[1L, ],
262                         error = identity)
263        if(!inherits(temp, "error")) {
264            if(validate) {
265                ## .check_package_description() by default goes via
266                ## .read_description() which re-encodes and insists on a
267                ## single entry unlike the above read.dcf() call.
268                ok <- .check_package_description(db = temp[!is.na(temp)])
269                ## FIXME: no format.check_package_description yet.
270                if(any(as.integer(lengths(ok)) > 0L)) {
271                    warning(paste(gettextf("Invalid DESCRIPTION file for package %s",
272                                           sQuote(basename(paths[i]))),
273                                  paste(format(ok), collapse = "\n\n"),
274                                  sep = "\n\n"),
275                            domain = NA,
276                            call. = FALSE)
277                    next
278                }
279            }
280            if(is.na(temp["NeedsCompilation"])) {
281                temp["NeedsCompilation"] <-
282                    if(dir.exists(file.path(paths[i], "src"))) "yes" else "no"
283            }
284            ## Cannot compute MD5 sum of the source tar.gz when working
285            ## on the unpacked sources ...
286            db[[i]] <- temp
287        } else {
288            warning(gettextf("reading DESCRIPTION for package %s failed with message:\n  %s",
289                             sQuote(basename(paths[i])),
290                             conditionMessage(temp)),
291                    domain = NA)
292        }
293    }
294    if(verbose) message("done")
295    names(db) <- basename(paths)
296    db
297}
298
299dependsOnPkgs <-
300function(pkgs, dependencies = "strong",
301         recursive = TRUE, lib.loc = NULL,
302         installed = utils::installed.packages(lib.loc, fields = "Enhances"))
303{
304    dependencies <- .expand_dependency_type_spec(dependencies)
305
306    av <- installed[, dependencies, drop = FALSE]
307    rn <- as.character(installed[, "Package"])
308    need <- apply(av, 1L, function(x)
309                  any(pkgs %in% utils:::.clean_up_dependencies(x)) )
310    uses <- rn[need]
311    if(recursive) {
312        p <- pkgs
313        repeat {
314            p <- unique(c(p, uses))
315            need <- apply(av, 1L, function(x)
316                          any(p %in% utils:::.clean_up_dependencies(x)) )
317            uses <- unique(c(p, rn[need]))
318            if(length(uses) <= length(p)) break
319        }
320    }
321    setdiff(uses, pkgs)
322}
323
324.remove_stale_dups <-
325function(ap)
326{
327    ## Given a matrix from available.packages, return a copy
328    ## with no duplicate packages, being sure to keep the packages
329    ## with highest version number.
330    ## (Also works for data frame package repository dbs.)
331    pkgs <- ap[ , "Package"]
332    dup_pkgs <- pkgs[duplicated(pkgs)]
333    stale_dups <- integer(length(dup_pkgs))
334    i <- 1L
335    for (dp in dup_pkgs) {
336        wh <- which(dp == pkgs)
337        vers <- package_version(ap[wh, "Version"])
338        keep_ver <- max(vers)
339	keep_idx <- which.max(vers == keep_ver) # they might all be max
340        wh <- wh[-keep_idx]
341        end_i <- i + length(wh) - 1L
342        stale_dups[i:end_i] <- wh
343        i <- end_i + 1L
344    }
345    ## Possible to have only one package in a repository
346    if(length(stale_dups)) ap[-stale_dups, , drop = FALSE] else ap
347}
348
349package_dependencies <-
350function(packages = NULL, db = NULL, which = "strong",
351         recursive = FALSE, reverse = FALSE,
352         verbose = getOption("verbose"))
353{
354    ## <FIXME>
355    ## What about duplicated entries?
356    ## </FIXME>
357
358    if(is.null(db)) db <- utils::available.packages()
359
360    fields <- which <- .expand_dependency_type_spec(which)
361    if(is.character(recursive)) {
362        recursive <- .expand_dependency_type_spec(recursive)
363        if(identical(which, recursive))
364            recursive <- TRUE
365        else
366            fields <- unique(c(fields, recursive))
367    }
368
369    ## For given packages which are not found in the db, return "list
370    ## NAs" (i.e., NULL entries), as opposed to character() entries
371    ## which indicate no dependencies.
372    out_of_db_packages <- character()
373
374    ## For forward non-recursive depends, we can simplify matters by
375    ## subscripting the db right away---modulo boundary cases.
376    if(!is.character(recursive) && !recursive && !reverse) {
377        if(!is.null(packages)) {
378            ind <- match(packages, db[, "Package"], nomatch = 0L)
379            db <- db[ind, , drop = FALSE]
380            out_of_db_packages <- packages[ind == 0L]
381        }
382    }
383
384    db <- as.data.frame(db[, c("Package", fields), drop = FALSE])
385    ## Avoid recomputing package dependency names in recursive
386    ## invocations.
387    for(f in fields) {
388        if(!is.list(d <- db[[f]]))
389            db[[f]] <- lapply(d, .extract_dependency_package_names)
390    }
391
392    if(is.character(recursive)) {
393        ## Direct dependencies:
394        d_d <- Recall(packages, db, which, FALSE,
395                      reverse, verbose)
396        ## Recursive dependencies of all these:
397        d_r <- Recall(unique(unlist(d_d)), db, recursive, TRUE,
398                      reverse, verbose)
399        ## Now glue together:
400        return(lapply(d_d,
401                      function(p) {
402                          sort(unique(c(p, unlist(d_r[p],
403                                                  use.names = FALSE))))
404                      }))
405    }
406
407    depends <-
408        do.call(Map,
409                c(list("c"),
410                  db[which],
411                  list(USE.NAMES = FALSE)))
412
413    depends <- lapply(depends, unique)
414
415    if(!recursive && !reverse) {
416        names(depends) <- db$Package
417        if(length(out_of_db_packages)) {
418            depends <-
419                c(depends,
420                  structure(vector("list", length(out_of_db_packages)),
421                            names = out_of_db_packages))
422        }
423        return(depends)
424    }
425
426    all_packages <- sort(unique(c(db$Package, unlist(depends))))
427
428    if(!recursive) {
429        ## Need to invert.
430        depends <-
431            split(rep.int(db$Package, lengths(depends)),
432                  factor(unlist(depends), levels = all_packages))
433        if(!is.null(packages)) {
434            depends <- depends[match(packages, names(depends))]
435            names(depends) <- packages
436        }
437        return(depends)
438    }
439
440    ## Recursive dependencies.
441    ## We need to compute the transitive closure of the dependency
442    ## relation, but e.g. Warshall's algorithm (O(n^3)) is
443    ## computationally infeasible.
444    ## Hence, in principle, we do the following.
445    ## Take the current list of pairs (i,j) in the relation.
446    ## Iterate over all j and whenever i R j and j R k add (i,k).
447    ## Repeat this until no new pairs get added.
448    ## To do this in R, we use a 2-column matrix of (i,j) rows.
449    ## We then create two lists which for all j contain the i and k
450    ## with i R j and j R k, respectively, and combine these.
451    ## This works reasonably well, but of course more efficient
452    ## implementations should be possible.
453    matchP <- match(rep.int(db$Package, lengths(depends)),
454		    all_packages)
455    matchD <- match(unlist(depends), all_packages)
456    tab <- if(reverse)
457	split(matchP,
458	      factor(matchD, levels = seq_along(all_packages)))
459    else
460	split(matchD,
461	      factor(matchP, levels = seq_along(all_packages)))
462    if(is.null(packages)) {
463        if(reverse) {
464            packages <- all_packages
465            p_L <- seq_along(all_packages)
466        } else {
467            packages <- db$Package
468            p_L <- match(packages, all_packages)
469        }
470    } else {
471        p_L <- match(packages, all_packages, nomatch = 0L)
472        if(any(ind <- (p_L == 0L))) {
473            out_of_db_packages <- packages[ind]
474            packages <- packages[!ind]
475            p_L <- p_L[!ind]
476        }
477    }
478    p_R <- tab[p_L]
479    pos <- cbind(rep.int(p_L, lengths(p_R)), unlist(p_R))
480    ctr <- 0L
481    repeat {
482        if(verbose) cat("Cycle:", (ctr <- ctr + 1L))
483        p_L <- split(pos[, 1L], pos[, 2L])
484        new <- do.call(rbind,
485                       Map(function(i, k)
486                           cbind(rep.int(i, length(k)),
487                                 rep(k, each = length(i))),
488                           p_L, tab[as.integer(names(p_L))]))
489        npos <- unique(rbind(pos, new))
490        nnew <- nrow(npos) - nrow(pos)
491        if(verbose) cat(" NNew:", nnew, "\n")
492        if(!nnew) break
493        pos <- npos
494    }
495    depends <-
496        split(all_packages[pos[, 2L]],
497              factor(all_packages[pos[, 1L]],
498                     levels = unique(packages)))
499    if(length(out_of_db_packages)) {
500        depends <-
501            c(depends,
502              structure(vector("list", length(out_of_db_packages)),
503                        names = out_of_db_packages))
504    }
505    depends
506}
507
508.expand_dependency_type_spec <-
509function(x)
510{
511    if(identical(x, "strong"))
512        c("Depends", "Imports", "LinkingTo")
513    else if(identical(x, "most"))
514        c("Depends", "Imports", "LinkingTo", "Suggests")
515    else if(identical(x, "all"))
516        c("Depends", "Imports", "LinkingTo", "Suggests", "Enhances")
517    else
518        x
519    ## (Could also intersect x with the possible types.)
520}
521
522## .extract_dependency_package_names <-
523## function(x)
524## {
525##     ## Assume a character *string*.
526##     if(is.na(x)) return(character())
527##     x <- strsplit(x, ",", fixed = TRUE)[[1L]]
528##     ## FIXME: The following is much faster on Linux but apparently not
529##     ## on Windows:
530##     ## x <- sub("(?s)[[:space:]]*([[:alnum:].]+).*", "\\1", x, perl = TRUE)
531##     x <- sub("[[:space:]]*([[:alnum:].]+).*", "\\1", x)
532##     x[nzchar(x) & (x != "R")]
533## }
534
535.extract_dependency_package_names <-
536function(x)
537    .Call(C_package_dependencies_scan, x)
538