1#  File src/library/utils/R/help.search.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2018 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19.hsearch_db <-
20local({
21    hdb <- NULL
22    function(new) {
23	if(!missing(new))
24	    hdb <<- new
25	else
26	    hdb
27    }
28})
29
30merge_vignette_index <-
31function(hDB, path, pkg)
32{
33    ## Vignettes in the hsearch index started in R 2.14.0
34    ## Most packages don't have them, so the following should not be
35    ## too inefficient
36    if(file.exists(v_file <- file.path(path, "Meta", "vignette.rds"))
37       && !is.null(vDB <- readRDS(v_file))
38       && nrow(vDB)) {
39	## Make it look like an hDB base matrix and append it
40	base <- matrix("", nrow = nrow(vDB), ncol = 8L)
41	colnames(base) <- colnames(hDB[[1L]])
42	base[, "Package"] <- pkg
43	base[, "LibPath"] <- path
44	id <- as.character(1:nrow(vDB) + NROW(hDB[[1L]]))
45	base[, "ID"] <- id
46	base[, "Name"] <- tools::file_path_sans_ext(basename(vDB$PDF))
47        ## As spotted by Henrik Bengtsson <henrik.bengtsson@gmail.com>,
48        ## using tools::file_path_sans_ext(basename(vDB$File) does not
49        ## work as intended, as non-Sweave vignettes could have nested
50        ## extensions (e.g., 'foo.tex.rsp' or 'foo.pdf.asis').
51        ## The docs say that the 'name' is the "base of the vignette
52        ## filename", which can be interpreted as above for the case of
53        ## nested extensions (and in fact, tools:::httpd() does so).
54	base[, "Topic"] <- base[, "Name"]
55	base[, "Title"] <- vDB$Title
56	base[, "Type"] <- "vignette"
57	hDB[[1L]] <- rbind(hDB[[1L]], base)
58	aliases <- matrix("", nrow = nrow(vDB), ncol = 3L)
59	colnames(aliases) <- colnames(hDB[[2L]])
60	aliases[, "Alias"] <- base[, "Name"]
61	aliases[, "ID"] <- id
62	aliases[, "Package"] <- pkg
63	hDB[[2L]] <- rbind(hDB[[2L]], aliases)
64	nkeywords <- sum(lengths(vDB$Keywords))
65	if (nkeywords) {
66	    keywords <- matrix("", nrow = nkeywords, ncol = 3L)
67	    colnames(keywords) <- colnames(hDB[[4L]])
68	    keywords[,"Concept"] <- unlist(vDB$Keywords)
69	    keywords[,"ID"] <- unlist(lapply(1:nrow(vDB),
70		   function(i) rep.int(id[i], length(vDB$Keywords[[i]]))))
71	    keywords[,"Package"] <- pkg
72	    hDB[[4L]] <- rbind(hDB[[4L]], keywords)
73	}
74    }
75    hDB
76}
77
78merge_demo_index <-
79function(hDB, path, pkg)
80{
81    ## Demos in the hsearch index started in R 2.14.0
82    if(file.exists(d_file <- file.path(path, "Meta", "demo.rds"))
83       && !is.null(dDB <- readRDS(d_file))
84       && nrow(dDB)) {
85	## Make it look like an hDB base matrix and append it
86	base <- matrix("", nrow = nrow(dDB), ncol = 8L)
87	colnames(base) <- colnames(hDB[[1]])
88	base[, "Package"] <- pkg
89	base[, "LibPath"] <- path
90	id <- as.character(1:nrow(dDB) + NROW(hDB[[1L]]))
91	base[, "ID"] <- id
92	base[, "Name"] <- dDB[, 1L]
93	base[, "Topic"] <- base[, "Name"]
94	base[, "Title"] <- dDB[, 2L]
95	base[, "Type"] <- "demo"
96	hDB[[1L]] <- rbind(hDB[[1L]], base)
97	aliases <- matrix("", nrow = nrow(dDB), ncol = 3L)
98	colnames(aliases) <- colnames(hDB[[2L]])
99	aliases[, "Alias"] <- base[, "Name"]
100	aliases[, "ID"] <- id
101	aliases[, "Package"] <- pkg
102	hDB[[2L]] <- rbind(hDB[[2L]], aliases)
103    }
104    hDB
105}
106
107hsearch_db_fields <-
108    c("alias", "concept", "keyword", "name", "title")
109hsearch_db_types <-
110    c("help", "vignette", "demo")
111
112## FIXME: use UTF-8, either always or optionally
113## (Needs UTF-8-savvy & fast agrep, and PCRE regexps.)
114help.search <-
115function(pattern, fields = c("alias", "concept", "title"),
116         apropos, keyword, whatis, ignore.case = TRUE,
117         package = NULL, lib.loc = NULL,
118         help.db = getOption("help.db"),
119         verbose = getOption("verbose"),
120         rebuild = FALSE, agrep = NULL, use_UTF8 = FALSE,
121         types = getOption("help.search.types"))
122{
123    ### Argument handling.
124    .wrong_args <- function(args)
125	gettextf("argument %s must be a single character string", sQuote(args))
126    if(is.logical(verbose)) verbose <- 2 * as.integer(verbose)
127    fuzzy <- agrep
128    if(!missing(pattern)) {
129	if(!is.character(pattern) || (length(pattern) > 1L))
130	    stop(.wrong_args("pattern"), domain = NA)
131	i <- pmatch(fields, hsearch_db_fields)
132	if(anyNA(i))
133	    stop("incorrect field specification")
134	else
135	    fields <- hsearch_db_fields[i]
136    } else if(!missing(apropos)) {
137	if(!is.character(apropos) || (length(apropos) > 1L))
138	    stop(.wrong_args("apropos"), domain = NA)
139	else {
140	    pattern <- apropos
141	    fields <- c("alias", "title")
142	}
143    } else if(!missing(keyword)) {
144	if(!is.character(keyword) || (length(keyword) > 1L))
145	    stop(.wrong_args("keyword"), domain = NA)
146	else {
147	    pattern <- keyword
148	    fields <- "keyword"
149	    if(is.null(fuzzy)) fuzzy <- FALSE
150	}
151    } else if(!missing(whatis)) {
152	if(!is.character(whatis) || (length(whatis) > 1))
153	    stop(.wrong_args("whatis"), domain = NA)
154	else {
155	    pattern <- whatis
156	    fields <- "alias"
157	}
158    } else {
159	stop("do not know what to search")
160    }
161
162    if(!missing(help.db))
163	warning("argument 'help.db' is deprecated")
164
165    ## This duplicates expansion in hsearch_db(), but there is no simple
166    ## way to avoid this.
167    i <- pmatch(types, hsearch_db_types)
168    if (anyNA(i))
169	stop("incorrect type specification")
170    else
171	types <- hsearch_db_types[i]
172
173    ### Set up the hsearch db.
174    db <- hsearch_db(package, lib.loc, types, verbose, rebuild,
175                     use_UTF8)
176    ## Argument lib.loc was expanded when building the hsearch db, so
177    ## get from there.
178    lib.loc <- attr(db, "LibPaths")
179
180    ## Subset to the requested help types if necessary.
181    if(!identical(sort(types), sort(attr(db, "Types")))) {
182        db$Base <- db$Base[!is.na(match(db$Base$Type, types)), ]
183        db[-1L] <-
184            lapply(db[-1L],
185                   function(e) {
186                       e[!is.na(match(e$ID, db$Base$ID)), ]
187                   })
188    }
189
190    if(!is.null(package)) {
191	## Argument 'package' was given.  Need to check that all given
192	## packages exist in the db, and only search the given ones.
193	pos_in_hsearch_db <-
194	    match(package, unique(db$Base[, "Package"]), nomatch = 0L)
195        ## This should not happen for R >= 2.4.0
196	if(any(pos_in_hsearch_db) == 0L)
197	    stop(gettextf("no information in the database for package %s: need 'rebuild = TRUE'?",
198			  sQuote(package[pos_in_hsearch_db == 0][1L])),
199                 domain = NA)
200	db[] <-
201	    lapply(db,
202		   function(e) {
203		       e[!is.na(match(e$Package, package)), ]
204		   })
205    }
206
207    ### Matching.
208    if(verbose >= 2L) {
209	message("Database of ",
210                NROW(db$Base), " help objects (",
211                NROW(db$Aliases), " aliases, ",
212                NROW(db$Concepts), " concepts, ",
213                NROW(db$Keywords), " keywords)",
214                domain = NA)
215        flush.console()
216    }
217
218    ## <FIXME>
219    ## No need continuing if there are no objects in the data base.
220    ## But shouldn't we return something of class "hsearch"?
221    if(!length(db$Base)) return(invisible())
222    ## </FIXME>
223
224    ## If agrep is NULL (default), we want to use fuzzy matching iff
225    ## 'pattern' contains no characters special to regular expressions.
226    ## We use the following crude approximation: if pattern contains
227    ## only alphanumeric characters or whitespace or a '-', it is taken
228    ## 'as is', and fuzzy matching is used unless turned off explicitly,
229    ## or pattern has very few (currently, less than 5) characters.
230    if(is.null(fuzzy) || is.na(fuzzy))
231	fuzzy <-
232	    (grepl("^([[:alnum:]]|[[:space:]]|-)+$", pattern)
233	     && (nchar(pattern, type="c") > 4L))
234    if(is.logical(fuzzy)) {
235	if(fuzzy)
236	    max.distance <- 0.1
237    }
238    else if(is.numeric(fuzzy) || is.list(fuzzy)) {
239	max.distance <- fuzzy
240	fuzzy <- TRUE
241    }
242    else
243	stop("incorrect 'agrep' specification")
244
245    dbBase <- db$Base
246    search_fun <- if(fuzzy) {
247        function(x) {
248	    agrep(pattern, x, ignore.case = ignore.case,
249		  max.distance = max.distance)
250        }
251    } else {
252        function(x) {
253            grep(pattern, x, ignore.case = ignore.case,
254                 perl = use_UTF8)
255        }
256    }
257    search_db_results <- function(p, f, e)
258        data.frame(Position = p, Field = f, Entry = e,
259                   stringsAsFactors = FALSE)
260    search_db_field <- function(field) {
261	switch(field,
262	       alias = {
263		   aliases <- db$Aliases$Alias
264                   matched <- search_fun(aliases)
265                   search_db_results(match(db$Aliases$ID[matched],
266                                           dbBase$ID),
267                                     rep.int(field, length(matched)),
268                                     aliases[matched])
269	       },
270	       concept = {
271		   concepts <- db$Concepts$Concept
272                   matched <- search_fun(concepts)
273                   search_db_results(match(db$Concepts$ID[matched],
274                                           dbBase$ID),
275                                     rep.int(field, length(matched)),
276                                     concepts[matched])
277	       },
278	       keyword = {
279		   keywords <- db$Keywords$Keyword
280                   matched <- search_fun(keywords)
281                   search_db_results(match(db$Keywords$ID[matched],
282                                           dbBase$ID),
283                                     rep.int(field, length(matched)),
284                                     keywords[matched])
285	       },
286               ## Alternatively, generically use field mapped to title
287               ## case.
288               name = {
289                   matched <- search_fun(dbBase$Name)
290                   search_db_results(matched,
291                                     rep.int("Name", length(matched)),
292                                     dbBase$Name[matched])
293               },
294               title = {
295                   matched <- search_fun(dbBase$Title)
296                   search_db_results(matched,
297                                     rep.int("Title", length(matched)),
298                                     dbBase$Title[matched])
299               }
300               )
301    }
302
303    matches <- NULL
304    for(f in fields)
305        matches <- rbind(matches, search_db_field(f))
306    matches <- matches[order(matches$Position), ]
307    db <- cbind(dbBase[matches$Position,
308                       c("Topic", "Title", "Name", "ID",
309                         "Package", "LibPath", "Type"),
310                       drop = FALSE],
311                matches[c("Field", "Entry")])
312    rownames(db) <- NULL
313    if(verbose>= 2L) {
314        n_of_objects_matched <- length(unique(db[, "ID"]))
315        message(sprintf(ngettext(n_of_objects_matched,
316                                 "matched %d object.",
317                                 "matched %d objects."),
318                        n_of_objects_matched),
319                domain = NA)
320        flush.console()
321    }
322
323    ## Retval.
324    y <- list(pattern = pattern, fields = fields,
325	      type = if(fuzzy) "fuzzy" else "regexp",
326	      agrep = agrep,
327	      ignore.case = ignore.case, types = types,
328	      package = package, lib.loc = lib.loc,
329	      matches = db)
330    class(y) <- "hsearch"
331    y
332}
333
334hsearch_db <-
335function(package = NULL, lib.loc = NULL,
336         types = getOption("help.search.types"),
337         verbose = getOption("verbose"),
338         rebuild = FALSE, use_UTF8 = FALSE)
339{
340    WINDOWS <- .Platform$OS.type == "windows"
341    if(is.logical(verbose)) verbose <- 2 * as.integer(verbose)
342    if(is.null(lib.loc))
343	lib.loc <- .libPaths()
344    i <- pmatch(types, hsearch_db_types)
345    if (anyNA(i))
346	stop("incorrect type specification")
347    else
348	types <- hsearch_db_types[i]
349
350    db <- eval(.hsearch_db())
351    if(is.null(db))
352	rebuild <- TRUE
353    else if(!rebuild) {
354	## Need to find out whether this has the info we need.
355	## Note that when looking for packages in libraries we always
356	## use the first location found.  Hence if the library search
357	## path changes we might find different versions of a package.
358	## Thus we need to rebuild the hsearch db in case the specified
359	## library path is different from the one used when building the
360	## hsearch db (stored as its "LibPaths" attribute).
361	if(!identical(lib.loc, attr(db, "LibPaths")) ||
362	   anyNA(match(types, attr(db, "Types"))) ||
363	   ## We also need to rebuild the hsearch db in case an existing
364	   ## dir in the library path was modified more recently than
365	   ## the db, as packages might have been installed or removed.
366           any(attr(db, "mtime") < file.mtime(lib.loc[file.exists(lib.loc)])) ||
367	   ## Or if the user changed the locale character type ...
368	   !identical(attr(db, "ctype"), Sys.getlocale("LC_CTYPE"))
369           )
370	    rebuild <- TRUE
371        ## We also need to rebuild if 'packages' was used before and has
372        ## changed.
373        if(!is.null(package) &&
374           any(is.na(match(package, db$Base[, "Package"]))))
375            rebuild <- TRUE
376    }
377    if(rebuild) {
378	if(verbose > 0L) {
379            message("Rebuilding the help.search() database", " ", "...",
380                    if(verbose > 1L) "...", domain = NA)
381            flush.console()
382        }
383
384        want_type_help <- any(types == "help")
385        want_type_demo <- any(types == "demo")
386        want_type_vignette <- any(types == "vignette")
387
388	if(!is.null(package)) {
389	    packages_in_hsearch_db <- package
390            package_paths <- NULL
391	} else {
392            ## local version of .packages(all.available = TRUE),
393            ## recording paths
394            ans <- character(0L); paths <- character(0L)
395            lib.loc <- lib.loc[file.exists(lib.loc)]
396            valid_package_version_regexp <-
397                .standard_regexps()$valid_package_version
398            for (lib in lib.loc) {
399                a <- list.files(lib, all.files = FALSE, full.names = FALSE)
400                for (nam in a) {
401                    pfile <- file.path(lib, nam, "Meta", "package.rds")
402                    if (file.exists(pfile))
403                        info <- readRDS(pfile)$DESCRIPTION[c("Package", "Version")]
404                    else next
405                    if ( (length(info) != 2L) || anyNA(info) ) next
406                    if (!grepl(valid_package_version_regexp, info["Version"])) next
407                    ans <- c(ans, nam)
408                    paths <- c(paths, file.path(lib, nam))
409                }
410            }
411            un <- !duplicated(ans)
412	    packages_in_hsearch_db <-  ans[un]
413            package_paths <- paths[un]
414            names(package_paths) <- ans[un]
415        }
416
417	## Create the hsearch db.
418	np <- 0L
419	if(verbose >= 2L) {
420	    message("Packages {readRDS() sequentially}:", domain = NA)
421            flush.console()
422        }
423        tot <- length(package_paths)
424        incr <- 0L
425        if(verbose && WINDOWS) {
426            pb <- winProgressBar("R: creating the help.search() DB", max = tot)
427            on.exit(close(pb))
428        } else if(verbose == 1L) incr <- ifelse(tot > 500L, 100L, 10L)
429
430	## Starting with R 1.8.0, prebuilt hsearch indices are available
431	## in Meta/hsearch.rds, and the code to build this from the Rd
432	## contents (as obtained from both new and old style Rd indices)
433	## has been moved to tools:::.build_hsearch_index() which
434	## creates a per-package list of base, aliases and keywords
435	## information.	 When building the global index, it seems (see
436	## e.g. also the code in tools:::Rdcontents()), most efficient to
437	## create a list *matrix* (dbMat below), stuff the individual
438	## indices into its rows, and finally create the base, alias,
439	## keyword, and concept information in rbind() calls on the
440	## columns.  This is *much* more efficient than building
441	## incrementally.
442	dbMat <- vector("list", length(packages_in_hsearch_db) * 4L)
443	dim(dbMat) <- c(length(packages_in_hsearch_db), 4L)
444
445        ## Empty hsearch index:
446        hDB0 <- tools:::.build_hsearch_index(NULL)
447
448	for(p in packages_in_hsearch_db) {
449            if(incr && np %% incr == 0L) {
450                message(".", appendLF = FALSE, domain = NA)
451                flush.console()
452            }
453	    np <- np + 1L
454            if(verbose && WINDOWS) setWinProgressBar(pb, np)
455	    if(verbose >= 2L) {
456		message(" ", p, appendLF = ((np %% 5L) == 0L), domain=NA)
457                flush.console()
458            }
459            path <- if(!is.null(package_paths)) package_paths[p]
460	    else find.package(p, lib.loc, quiet = TRUE)
461	    if(length(path) == 0L) {
462                if(is.null(package)) next
463		else stop(packageNotFoundError(p, lib.loc, sys.call()))
464            }
465	    ## Hsearch 'Meta/hsearch.rds' indices were introduced in
466	    ## R 1.8.0.	 If they are missing, we really cannot use
467	    ## the package (as library() will refuse to load it).
468	    ## We always load hsearch.rds to establish the format,
469	    ## sometimes vignette.rds.
470
471            hDB <- NULL
472            if(want_type_help) {
473                if(file.exists(hs_file <-
474                    file.path(path, "Meta", "hsearch.rds"))) {
475                    hDB <- readRDS(hs_file)
476                    if(!is.null(hDB)) {
477                        ## Fill up possibly missing information.
478                        if(is.na(match("Encoding", colnames(hDB[[1L]]))))
479                            hDB[[1L]] <- cbind(hDB[[1L]], Encoding = "")
480                        ## <FIXME>
481                        ## Transition fro old-style to new-style colnames.
482                        ## Remove eventually.
483                        for(i in seq_along(hDB)) {
484                            colnames(hDB[[i]]) <-
485                                tools:::hsearch_index_colnames[[i]]
486                        }
487                        ## </FIXME>
488                    } else if(verbose >= 2L) {
489                        message(gettextf("package %s has empty hsearch data - strangely",
490                                         sQuote(p)),
491                                domain = NA)
492                        flush.console()
493                    }
494                } else if(!is.null(package))
495                      warning("no hsearch.rds meta data for package ", p,
496                              domain = NA)
497            }
498            if(is.null(hDB))
499                hDB <- hDB0
500            nh <- NROW(hDB[[1L]])
501            hDB[[1L]] <- cbind(hDB[[1L]], Type = rep.int("help", nh))
502            if(nh)
503                hDB[[1L]][, "LibPath"] <- path
504            if(want_type_vignette)
505                hDB <- merge_vignette_index(hDB, path, p)
506            if(want_type_demo)
507                hDB <- merge_demo_index(hDB, path, p)
508            ## Put the hsearch index for the np-th package into the
509            ## np-th row of the matrix used for aggregating.
510            dbMat[np, seq_along(hDB)] <- hDB
511	}
512
513	if(verbose >= 2L)  {
514	    message(ifelse(np %% 5L == 0L, "\n", "\n\n"),
515                    sprintf("Built dbMat[%d,%d]", nrow(dbMat), ncol(dbMat)),
516                    domain = NA)
517            flush.console()
518            ## DEBUG save(dbMat, file="~/R/hsearch_dbMat.rda", compress=TRUE)
519        }
520
521	## Create the global base, aliases, keywords and concepts tables
522	## via calls to rbind() on the columns of the matrix used for
523	## aggregating.
524	db <- list(Base     = do.call("rbind", dbMat[, 1]),
525		   Aliases  = do.call("rbind", dbMat[, 2]),
526		   Keywords = do.call("rbind", dbMat[, 3]),
527		   Concepts = do.call("rbind", dbMat[, 4]))
528        rownames(db$Base) <- NULL
529        ## <FIXME>
530        ## Remove eventually ...
531	if(is.null(db$Concepts)) {
532	    db$Concepts <-
533                matrix(character(), ncol = 3L,
534                       dimnames =
535                           list(NULL,
536                                tools:::hsearch_index_colnames$Concepts))
537        }
538        ## </FIXME>
539
540        ## Make the IDs globally unique by prefixing them with the
541	## number of the package in the global index.
542	for(i in which(vapply(db, NROW, 0L) > 0L)) {
543	    db[[i]][, "ID"] <-
544		paste(rep.int(seq_along(packages_in_hsearch_db),
545			      vapply(dbMat[, i], NROW, 0L)),
546		      db[[i]][, "ID"],
547		      sep = "/")
548	}
549	## And maybe re-encode ...
550	if(!identical(Sys.getlocale("LC_CTYPE"), "C")) {
551	    if(verbose >= 2L) {
552                message("reencoding ...", appendLF = FALSE, domain = NA)
553                flush.console()
554            }
555	    encoding <- db$Base[, "Encoding"]
556            target <- ifelse(use_UTF8 && !l10n_info()$`UTF-8`, "UTF-8", "")
557	    ## As iconv is not vectorized in the 'from' argument, loop
558	    ## over groups of identical encodings.
559	    for(enc in unique(encoding)) {
560                if(enc != target) next
561		IDs <- db$Base[encoding == enc, "ID"]
562		for(i in seq_along(db)) {
563		    ind <- db[[i]][, "ID"] %in% IDs
564		    db[[i]][ind, ] <- iconv(db[[i]][ind, ], enc, "")
565		}
566	    }
567	    if(verbose >= 2L) {
568                message(" ", "done", domain = NA)
569                flush.console()
570            }
571	}
572	bad_IDs <-
573	    unlist(lapply(db,
574			  function(u)
575                              u[rowSums(is.na(nchar(u, "chars",
576                                                    allowNA = TRUE,
577                                                    keepNA = FALSE))) > 0,
578                                "ID"]))
579        ## FIXME: drop this fallback
580	if(length(bad_IDs)) {           # try latin1
581            for(i in seq_along(db)) {
582                ind <- db[[i]][, "ID"] %in% bad_IDs
583                db[[i]][ind, ] <- iconv(db[[i]][ind, ], "latin1", "")
584            }
585            bad_IDs <-
586                unlist(lapply(db,
587                              function(u)
588                                  u[rowSums(is.na(nchar(u, "chars",
589                                                        allowNA = TRUE,
590                                                        keepNA = FALSE))) > 0,
591                                    "ID"]))
592        }
593	## If there are any invalid multi-byte character data
594	## left, we simple remove all Rd objects with at least one
595	## invalid entry, and warn.
596        if(length(bad_IDs)) {
597	    warning("removing all entries with invalid multi-byte character data")
598	    for(i in seq_along(db)) {
599		ind <- db[[i]][, "ID"] %in% bad_IDs
600		db[[i]] <- db[[i]][!ind, ]
601	    }
602	}
603
604        ## Drop entries without topic as these cannot be accessed.
605        ## (These come from help pages without \alias.)
606        bad_IDs <- db$Base[is.na(db$Base[, "Topic"]), "ID"]
607        if(length(bad_IDs)) {
608	    for(i in seq_along(db)) {
609		ind <- db[[i]][, "ID"] %in% bad_IDs
610		db[[i]] <- db[[i]][!ind, ]
611	    }
612	}
613
614        ## Remove keywords which are empty.
615        ind <- nzchar(db$Keywords[, "Keyword"])
616        db$Keywords <- db$Keywords[ind, , drop = FALSE]
617        ## Remove concepts which are empty.
618        ind <- nzchar(db$Concepts[, "Concept"])
619        db$Concepts <- db$Concepts[ind, , drop = FALSE]
620
621        ## Map non-standard keywords to concepts, and use the
622        ## descriptions of the standard keywords as concepts, with the
623        ## exception of keyword 'internal'.
624        standard <- .get_standard_Rd_keywords_with_descriptions()
625        keywords <- standard$Keywords
626        concepts <- standard$Descriptions
627        pos <- match(db$Keywords[, "Keyword"], keywords)
628        ind <- !is.na(pos) & (keywords[pos] != "internal")
629        db$Concepts <-
630            rbind(db$Concepts,
631                  db$Keywords[is.na(pos), , drop = FALSE],
632                  cbind(concepts[pos[ind]],
633                        db$Keywords[ind, -1L, drop = FALSE]))
634        db$Keywords <- db$Keywords[!is.na(pos), , drop = FALSE]
635
636        ## Doing this earlier will not work: in particular, re-encoding
637        ## is written for character matrices.
638        db <- lapply(db, as.data.frame,
639                     stringsAsFactors = FALSE, row.names = NULL)
640
641        if(verbose >= 2L) {
642            message("saving the database ...", appendLF = FALSE, domain = NA)
643            flush.console()
644        }
645        attr(db, "LibPaths") <- lib.loc
646        attr(db, "mtime") <- Sys.time()
647        attr(db, "ctype") <- Sys.getlocale("LC_CTYPE")
648        attr(db, "Types") <- unique(c("help", types))
649        class(db) <- "hsearch_db"
650        .hsearch_db(db)
651        if(verbose >= 2L) {
652            message(" ", "done", domain = NA)
653            flush.console()
654        }
655        if(verbose > 0L) {
656            message("... database rebuilt", domain = NA)
657            if(WINDOWS) {
658                close(pb)
659                on.exit()               # clear closing of progress bar
660            }
661            flush.console()
662        }
663    }
664
665    db
666}
667
668## Cf. tools:::.get_standard_Rd_keywords().
669.get_standard_Rd_keywords_with_descriptions <-
670function()
671{
672    lines <- readLines(file.path(R.home("doc"), "KEYWORDS.db"))
673    ## Strip top-level entries.
674    lines <- grep("^.*\\|([^:]*):.*", lines, value = TRUE)
675    ## Strip comments.
676    lines <- sub("[[:space:]]*#.*", "", lines)
677    list(Keywords = sub("^.*\\|([^:]*):.*", "\\1", lines),
678         Descriptions = sub(".*:[[:space:]]*", "", lines))
679}
680
681## This extra indirection allows the Mac GUI to replace this
682## yet call the printhsearchInternal function.
683print.hsearch <-
684function(x, ...)
685    printhsearchInternal(x, ...)
686
687printhsearchInternal <-
688function(x, ...)
689{
690    help_type <- getOption("help_type", default = "text")
691    types <- x$types
692    if (help_type == "html") {
693        browser <- getOption("browser")
694        port <- tools::startDynamicHelp(NA)
695	if (port > 0L) {
696            tools:::.httpd_objects(port, x)
697            url <- sprintf("http://127.0.0.1:%d/doc/html/Search?objects=1&port=%d",
698                           port, port)
699            ## <NOTE>
700            ## Older versions used the following, which invokes the
701            ## dynamic HTML help system in a way that this calls
702            ## help.search() to give the results to be displayed.
703            ## This is now avoided by passing the (already available)
704            ## results to the dynamic help system using the dynamic
705            ## variable .httpd_objects().
706	    ## url <-
707            ##     paste0("http://127.0.0.1:", port,
708            ##            "/doc/html/Search?pattern=",
709            ##            tools:::escapeAmpersand(x$pattern),
710            ##            paste0("&fields.", x$fields, "=1",
711            ##                   collapse = ""),
712            ##            if (!is.null(x$agrep)) paste0("&agrep=", x$agrep),
713            ##            if (!x$ignore.case) "&ignore.case=0",
714            ##            if (!identical(types,
715            ##                           getOption("help.search.types")))
716            ##                paste0("&types.", types, "=1",
717            ##                       collapse = ""),
718            ##            if (!is.null(x$package))
719            ##                paste0("&package=",
720            ##                       paste(x$package, collapse=";")),
721            ##            if (!identical(x$lib.loc, .libPaths()))
722            ##                paste0("&lib.loc=",
723            ##                       paste(x$lib.loc, collapse=";"))
724            ##            )
725            ## </NOTE>
726            browseURL(url, browser)
727            return(invisible(x))
728        }
729    }
730    hfields <- paste(x$fields, collapse = " or ")
731    vfieldnames <-
732        c(alias = "name", concept = "keyword", keyword = NA,
733          name = "name", title = "title")
734    vfieldnames <- vfieldnames[x$fields]
735    vfields <- paste(unique(vfieldnames[!is.na(vfieldnames)]),
736                     collapse = " or ")
737    dfieldnames <-
738        c(alias = "name", concept = NA, keyword = NA,
739          name = "name", title = "title")
740    dfieldnames <- dfieldnames[x$fields]
741    dfields <- paste(unique(dfieldnames[!is.na(dfieldnames)]),
742                     collapse = " or ")
743    fields_used <-
744        list(help = hfields, vignette = vfields, demo = dfields)
745    matchtype <- switch(x$type, fuzzy = "fuzzy", "regular expression")
746    typenames <-
747        c(vignette = "Vignettes", help = "Help files", demo = "Demos")
748    fields_for_match_details <-
749        list(help = c("alias", "concept", "keyword"),
750             vignette = c("concept"),
751             demo = character())
752    field_names_for_details <-
753        c(alias = "Aliases", concept = "Concepts", keyword = "Keywords")
754
755    db <- x$matches
756    if(NROW(db) == 0) {
757    	typenames <- paste(tolower(typenames[types]), collapse= " or ")
758	writeLines(strwrap(paste("No", typenames,
759                                 "found with", fields_used$help,
760				 "matching", sQuote(x$pattern),
761				 "using", matchtype,
762                                 "matching.")))
763        return(invisible(x))
764    }
765
766    outFile <- tempfile()
767    outConn <- file(outFile, open = "w")
768    typeinstruct <-
769        c(vignette =
770              paste("Type 'vignette(\"FOO\", package=\"PKG\")' to",
771                    "inspect entries 'PKG::FOO'."),
772          help =
773              paste("Type '?PKG::FOO' to",
774                    "inspect entries 'PKG::FOO',",
775                    "or 'TYPE?PKG::FOO' for entries like",
776                    "'PKG::FOO-TYPE'."),
777          demo =
778              paste("Type 'demo(PKG::FOO)' to",
779                    "run demonstration 'PKG::FOO'."))
780
781    for(type in types) {
782	if(NROW(dbtemp <- db[db[, "Type"] == type, , drop = FALSE]) > 0) {
783	    writeLines(c(strwrap(paste(typenames[type], "with",
784                                       fields_used[[type]], "matching",
785                                       sQuote(x$pattern), "using",
786                                       matchtype, "matching:")),
787			 "\n"),
788		       outConn)
789            fields <- fields_for_match_details[[type]]
790            chunks <- split.data.frame(dbtemp,
791                                       paste0(dbtemp[, "Package"],
792                                              "::",
793                                              dbtemp[ , "Topic"]))
794            nms <- names(chunks)
795            for(i in seq_along(nms)) {
796                chunk <- chunks[[i]]
797                writeLines(formatDL(nms[i], chunk[1L, "Title"]),
798                           outConn)
799                matches <- Filter(length,
800                                  split(chunk[, "Entry"],
801                                        chunk[, "Field"])[fields])
802                if(length(matches)) {
803                    tags <- field_names_for_details[names(matches)]
804                    vals <- vapply(matches, paste, "", collapse = ", ")
805                    writeLines(strwrap(paste0(tags, ": ", vals),
806                                       indent = 2L, exdent = 4L),
807                               outConn)
808                }
809            }
810	    writeLines(c("\n",
811			 strwrap(typeinstruct[type]),
812			 "\n\n"),
813		       outConn)
814	}
815    }
816    close(outConn)
817    file.show(outFile, delete.file = TRUE)
818    invisible(x)
819}
820
821hsearch_db_concepts <-
822function(db = hsearch_db())
823{
824    ## <NOTE>
825    ## This should perhaps get an ignore.case = TRUE argument.
826    ## </NOTE>
827    pos <- match(db$Concepts[, "ID"], db$Base[, "ID"])
828    entries <- split(as.data.frame(db$Base[pos, ],
829                                   stringsAsFactors = FALSE),
830                     db$Concepts[, "Concept"])
831    enums <- vapply(entries, NROW, 0L)
832    pnums <- vapply(entries, function(e) length(unique(e$Package)), 0L)
833    pos <- order(enums, pnums, decreasing = TRUE)
834    data.frame(Concept = names(entries)[pos],
835               Frequency = enums[pos],
836               Packages = pnums[pos],
837               stringsAsFactors = FALSE,
838               row.names = NULL)
839}
840
841hsearch_db_keywords <-
842function(db = hsearch_db())
843{
844    pos <- match(db$Keywords[, "ID"], db$Base[, "ID"])
845    entries <- split(as.data.frame(db$Base[pos, ],
846                                   stringsAsFactors = FALSE),
847                     db$Keywords[, "Keyword"])
848    enums <- vapply(entries, NROW, 0L)
849    pnums <- vapply(entries, function(e) length(unique(e$Package)), 0L)
850    standard <- .get_standard_Rd_keywords_with_descriptions()
851    concepts <- standard$Descriptions[match(names(entries),
852                                            standard$Keywords)]
853    pos <- order(enums, pnums, decreasing = TRUE)
854    data.frame(Keyword = names(entries)[pos],
855               Concept = concepts[pos],
856               Frequency = enums[pos],
857               Packages = pnums[pos],
858               stringsAsFactors = FALSE,
859               row.names = NULL)
860}
861
862print.hsearch_db <-
863function(x, ...)
864{
865    writeLines(c("A help search database:",
866                 sprintf("Objects: %d, Aliases: %d, Keywords: %d, Concepts: %d",
867                         NROW(x$Base),
868                         NROW(x$Aliases),
869                         NROW(x$Keywords),
870                         NROW(x$Concepts))))
871    invisible(x)
872}
873