utils/R/aspell.R

#  File src/library/utils/R/aspell.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2019 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

aspell <-
function(files, filter, control = list(), encoding = "unknown",
         program = NULL, dictionaries = character())
{
    ## Take the given files and feed them through spell checker in
    ## Ispell pipe mode.

    ## Think about options and more command line options eventually.

    program <- aspell_find_program(program)
    if(is.na(program))
        stop("No suitable spell-checker program found")

    ## Be nice.
    if(inherits(files, "Rd"))
        files <- list(files)

    files_are_names <- is.character(files)

    filter_args <- list()
    if(missing(filter) || is.null(filter)) {
        filter <- if(!files_are_names) {
            function(ifile, encoding) {
                if(inherits(ifile, "srcfile"))
                    readLines(ifile$filename, encoding = encoding,
                              warn = FALSE)
                else if(inherits(ifile, "connection"))
                    readLines(ifile, encoding = encoding, warn = FALSE)
                else {
                    ## What should this do with encodings?
                    as.character(ifile)
                }
            }
        }
        else NULL
    }
    else if(is.character(filter)) {
        ## Look up filter in aspell filter db.
        filter_name <- filter[1L]
        filter <- aspell_filter_db[[filter_name]]
        ## Warn if the filter was not found in the db.
        if(is.null(filter))
            warning(gettextf("Filter '%s' is not available.",
                             filter_name),
                    domain = NA)
    }
    else if(is.list(filter)) {
        ## Support
        ##   list("Rd", drop = "\\references"
        ## at least for now.
        filter_name <- filter[[1L]][1L]
        filter_args <- filter[-1L]
        filter <- aspell_filter_db[[filter_name]]
        ## Warn if the filter was not found in the db.
        if(is.null(filter))
            warning(gettextf("Filter '%s' is not available.",
                             filter_name),
                    domain = NA)
    }
    else if(!is.function(filter))
        stop("Invalid 'filter' argument.")

    encoding <- rep_len(encoding, length(files))

    verbose <- getOption("verbose")

    db <- data.frame(Original = character(), File = character(),
                     Line = integer(), Column = integer(),
                     stringsAsFactors = FALSE)
    db$Suggestions <- list()

    tfile <- tempfile("aspell")
    on.exit(unlink(tfile))

    if(length(dictionaries)) {
        paths <- aspell_find_dictionaries(dictionaries)
        ind <- paths == ""
        if(any(ind)) {
            warning(gettextf("The following dictionaries were not found:\n%s",
                             paste(sprintf("  %s", dictionaries[ind]),
                                   collapse = "\n")),
                    domain = NA)
            paths <- paths[!ind]
        }
        if(length(paths)) {
            words <- unlist(lapply(paths, readRDS), use.names = FALSE)
            personal <- tempfile("aspell_personal")
            on.exit(unlink(personal), add = TRUE)
            ## <FIXME>
            ## How can we get the right language set (if needed)?
            ## Maybe aspell() needs an additional 'language' arg?
            aspell_write_personal_dictionary_file(words, personal,
                                                  program = program)
            ## </FIXME>
            control <- c(control, "-p", shQuote(personal))
        }
    }

    ## No special expansion of control argument for now.
    control <- as.character(control)

    fnames <- names(files)
    files <- as.list(files)

    for (i in seq_along(files)) {

        file <- files[[i]]
        if(files_are_names)
            fname <- file
        else {
            ## Try srcfiles and srcrefs ...
            fname <- if(inherits(file, "srcfile"))
                file$filename
            else
                attr(attr(file, "srcref"), "srcfile")$filename
            ## As a last resort, try the names of the files argument.
            if(is.null(fname))
                fname <- fnames[i]
            ## If unknown ...
            if(is.null(fname))
                fname <- "<unknown>"
        }

        enc <- encoding[i]

        if(verbose)
            message(gettextf("Processing file %s", fname),
                    domain = NA)

        lines <- if(is.null(filter))
            readLines(file, encoding = enc, warn = FALSE)
        else {
            ## Assume that filter takes an input file (and additional
            ## arguments) and return a character vector.
            do.call(filter, c(list(file, encoding = enc), filter_args))
        }

        ## Allow filters to pass additional control arguments, in case
        ## these need to be inferred from the file contents.
        control <- c(control, attr(lines, "control"))

        ## Need to escape all lines with carets to ensure Aspell handles
        ## them as data: the Aspell docs say
        ##   It is recommended that programmatic interfaces prefix every
        ##   data line with an uparrow to protect themselves against
        ##   future changes in Aspell.
        writeLines(paste0("^", lines), tfile)
        ## Note that this re-encodes character strings with marked
        ## encodings to the current encoding (which is definitely fine
        ## if this is UTF-8 and Aspell was compiled with full UTF-8
        ## support).  Alternatively, we could try using something along
        ## the lines of
        ##   writeLines(paste0("^", lines), tfile,
        ##              useBytes = TRUE)
        ## and pass the encoding info to Aspell in case we know it.

        out <- tools:::.system_with_capture(program, c("-a", control),
                                            stdin = tfile)

	if(out$status != 0L)
	    stop(gettextf("Running aspell failed with diagnostics:\n%s",
			  paste(out$stderr, collapse = "\n")),
                 domain = NA)

	## Hopefully everything worked ok.
	lines <- out$stdout[-1L]
	pos <- cumsum(lines == "") + 1L

	## Format is as follows.
	## First line is a header.
	## Blank lines separate the results for each line.
	## Results for the word on each line are given as follows.
	## * If the word was found in the main dictionary, or your personal
	##   dictionary, then the line contains only a `*'.
	## * If the word is not in the dictionary, but there are
	##   suggestions, then the line contains an `&', a space, the
	##   misspelled word, a space, the number of near misses, the number
	##   of characters between the beginning of the line and the
	##   beginning of the misspelled word, a colon, another space, and a
	##   list of the suggestions separated by commas and spaces.
	## * If the word does not appear in the dictionary, and there are no
	##   suggestions, then the line contains a `#', a space, the
	##   misspelled word, a space, and the character offset from the
	##   beginning of the line.
	## This can be summarized as follows:
	##   OK: *
	##   Suggestions: & original count offset: miss, miss, ...
	##   None: # original offset

	## Look at words not in dictionary with suggestions.

	if(any(ind <- startsWith(lines, "&"))) {
	    info <- strsplit(lines[ind], ": ", fixed = TRUE)
	    one <- strsplit(sapply(info, `[`, 1L), " ",  fixed = TRUE)
	    two <- strsplit(sapply(info, `[`, 2L), ", ", fixed = TRUE)
	    db1 <- data.frame(Original =
			      as.character(sapply(one, `[`, 2L)),
			      File = fname,
			      Line = pos[ind],
			      Column =
			      as.integer(sapply(one, `[`, 4L)),
			      stringsAsFactors = FALSE)
	    db1$Suggestions <- two
	    db <- rbind(db, db1)
	}
	## Looks at words not in dictionary with no suggestions.
	if(any(ind <- startsWith(lines, "#"))) {
	    one <- strsplit(lines[ind], " ", fixed = TRUE)
	    db1 <- data.frame(Original =
			      as.character(sapply(one, `[`, 2L)),
			      File = fname,
			      Line = pos[ind],
			      Column =
			      as.integer(sapply(one, `[`, 3L)),
			      stringsAsFactors = FALSE)
	    db1$Suggestions <- vector("list", length(one))
	    db <- rbind(db, db1)
	}
    }

    class(db) <- c("aspell", "data.frame")
    db
}

format.aspell <-
function(x, sort = TRUE, verbose = FALSE, indent = 2L, ...)
{
    if(!nrow(x)) return(character())

    if(sort)
        x <- x[order(x$Original, x$File, x$Line, x$Column), ]

    from <- split(sprintf("%s:%d:%d", x$File, x$Line, x$Column),
                  x$Original)

    if(verbose) {
        unlist(Map(function(w, f, s) {
            sprintf("Word: %s\nFrom: %s\n%s",
                    w,
                    paste0(c("", rep.int("      ", length(f) - 1L)),
                           f, collapse = "\n"),
                    paste(strwrap(paste("Suggestions:",
                                        paste(s[[1L]], collapse = " ")),
                                  exdent = 6L, indent = 0L),
                          collapse = "\n"))
        },
                   names(from),
                   from,
                   split(x$Suggestions, x$Original)))
    } else {
        sep <- sprintf("\n%s", strrep(" ", indent))
        paste(names(from),
              vapply(from, paste, "", collapse = sep),
              sep = sep)
    }
}

print.aspell <-
function(x, ...)
{
    if(nrow(x))
        writeLines(paste(format(x, ...), collapse = "\n\n"))
    invisible(x)
}

summary.aspell <-
function(object, ...)
{
    words <- sort(unique(object$Original))
    if(length(words)) {
        writeLines("Possibly mis-spelled words:")
        print(words)
    }
    invisible(words)
}

aspell_filter_db <- new.env(hash = FALSE) # small
aspell_filter_db$Rd <- tools::RdTextFilter
aspell_filter_db$Sweave <- tools::SweaveTeXFilter

aspell_find_program <-
function(program = NULL)
{
    check <- !is.null(program) || !is.null(names(program))
    if(is.null(program))
        program <- getOption("aspell_program")
    if(is.null(program))
        program <- c("aspell", "hunspell", "ispell")
    program <- Filter(nzchar, Sys.which(program))[1L]
    if(!is.na(program) && check) {
        out <- c(system(sprintf("%s -v", program),
                        intern = TRUE), "")[1L]
        if(grepl("really Aspell", out))
            names(program) <- "aspell"
        else if(grepl("really Hunspell", out))
            names(program) <- "hunspell"
        else if(grepl("International Ispell", out))
            names(program) <- "ispell"
        else
            names(program) <- NA_character_
    }
    program
}

aspell_dictionaries_R <- "en_stats"

aspell_find_dictionaries <-
function(dictionaries, dirnames = character())
{
    dictionaries <- as.character(dictionaries)
    if(!(n <- length(dictionaries))) return(character())

    ## Always search the R system dictionary directory first.
    dirnames <- c(file.path(R.home("share"), "dictionaries"), dirnames)

    ## For now, all dictionary files should be .rds files.
    if(any(ind <- !endsWith(dictionaries, ".rds")))
        dictionaries[ind] <- sprintf("%s.rds", dictionaries[ind])

    out <- character(n)
    ## Dictionaries with no path separators are looked for in the given
    ## dictionary directories (by default, the R system dictionary
    ## directory).
    ind <- grepl(.Platform$file.sep, dictionaries, fixed = TRUE)
    ## (Equivalently, could check where paths == basename(paths).)
    if(length(pos <- which(ind))) {
        pos <- pos[file_test("-f", dictionaries[pos])]
        out[pos] <- normalizePath(dictionaries[pos], "/")
    }
    if(length(pos <- which(!ind))) {
        out[pos] <- find_files_in_directories(dictionaries[pos],
                                              dirnames)
    }
    out
}

### Utilities.

aspell_inspect_context <-
function(x)
{
    x <- split(x, x$File)
    y <- Map(function(f, x) {
        lines <- readLines(f, warn = FALSE)[x$Line]
        cbind(f,
              x$Line,
              substring(lines, 1L, x$Column - 1L),
              x$Original,
              substring(lines, x$Column + nchar(x$Original)))
    },
             names(x), x)
    y <- data.frame(do.call(rbind, y), stringsAsFactors = FALSE)
    names(y) <- c("File", "Line", "Left", "Original", "Right")
    class(y) <- c("aspell_inspect_context", "data.frame")
    y
}

print.aspell_inspect_context <-
function(x, ...)
{
    s <- split(x, x$File)
    nms <- names(s)
    for(i in seq_along(s)) {
        e <- s[[i]]
        writeLines(c(sprintf("File '%s':", nms[i]),
                     sprintf("  Line %s: \"%s\", \"%s\", \"%s\"",
                             format(e$Line),
                             gsub("\"", "\\\"", e$Left ), e$Original,
                             gsub("\"", "\\\"", e$Right)),
                     ""))
    }
    invisible(x)
}


## For spell-checking the R manuals:

## This can really only be done with Aspell as the other checkers have
## no texinfo mode.

aspell_control_R_manuals <-
    list(aspell =
         c("--master=en_US",
           "--add-extra-dicts=en_GB",
           "--mode=texinfo",
           "--add-texinfo-ignore=acronym",
           "--add-texinfo-ignore=deftypefun",
           "--add-texinfo-ignore=deftypefunx",
           "--add-texinfo-ignore=findex",
           "--add-texinfo-ignore=enindex",
           "--add-texinfo-ignore=include",
           "--add-texinfo-ignore=ifclear",
           "--add-texinfo-ignore=ifset",
           "--add-texinfo-ignore=math",
           "--add-texinfo-ignore=macro",
           "--add-texinfo-ignore=multitable",
           "--add-texinfo-ignore=node",
           "--add-texinfo-ignore=pkg",
           "--add-texinfo-ignore=printindex",
           "--add-texinfo-ignore=set",
           "--add-texinfo-ignore=vindex",
           "--add-texinfo-ignore-env=menu",
           "--add-texinfo-ignore=CRANpkg"
           ),
         hunspell =
         c("-d en_US,en_GB"))

aspell_R_manuals <-
function(which = NULL, dir = NULL, program = NULL,
         dictionaries = aspell_dictionaries_R)
{
    if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
    ## Allow specifying 'R-exts' and alikes, or full paths.
    files <- if(is.null(which)) {
        Sys.glob(file.path(dir, "doc", "manual", "*.texi"))
    } else {
        ind <- which(which ==
                     basename(tools::file_path_sans_ext(which)))
        which[ind] <-
            file.path(dir, "doc", "manual",
                      sprintf("%s.texi", which[ind]))
        which
    }

    program <- aspell_find_program(program)

    aspell(files,
           control = aspell_control_R_manuals[[names(program)]],
           program = program,
           dictionaries = dictionaries)
}

## For spell-checking the R Rd files:

aspell_control_R_Rd_files <-
    list(aspell =
         c("--master=en_US",
           "--add-extra-dicts=en_GB"),
         hunspell =
         c("-d en_US,en_GB"))

aspell_R_Rd_files <-
function(which = NULL, dir = NULL, drop = "\\references",
         program = NULL, dictionaries = aspell_dictionaries_R)
{
    files <- character()

    if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()

    if(is.null(which)) {
        which <- tools:::.get_standard_package_names()$base
        # CHANGES.Rd could be dropped from checks in the future;
        # it will not be updated post 2.15.0
        files <- c(file.path(dir, "doc", "NEWS.Rd"),
                   file.path(dir, "src", "gnuwin32", "CHANGES.Rd"))
        files <- files[file_test("-f", files)]
    }

    files <-
        c(files,
          unlist(lapply(file.path(dir, "src", "library", which, "man"),
                        tools::list_files_with_type,
                        "docs", OS_subdirs = c("unix", "windows")),
                 use.names = FALSE))

    program <- aspell_find_program(program)

    aspell(files,
           filter = list("Rd", drop = drop),
           control = aspell_control_R_Rd_files[[names(program)]],
           program = program,
           dictionaries = dictionaries)
}

## For spell-checking Rd files in a package:

aspell_package_Rd_files <-
function(dir, drop = c("\\author", "\\references"),
         control = list(), program = NULL, dictionaries = character())
{
    dir <- normalizePath(dir, "/")

    subdir <- file.path(dir, "man")
    files <- if(dir.exists(subdir))
        tools::list_files_with_type(subdir,
                                    "docs",
                                    OS_subdirs = c("unix", "windows"))
    else character()

    meta <- tools:::.get_package_metadata(dir, installed = FALSE)
    if(is.na(encoding <- meta["Encoding"]))
        encoding <- "unknown"

    defaults <- .aspell_package_defaults(dir, encoding)$Rd_files
    if(!is.null(defaults)) {
        ## Direct settings currently override (could add a list add =
        ## TRUE mechanism eventually).
        if(!is.null(d <- defaults$drop))
            drop <- d
        if(!is.null(d <- defaults$control))
            control <- d
        if(!is.null(d <- defaults$program))
            program <- d
        if(!is.null(d <- defaults$dictionaries)) {
            dictionaries <-
                aspell_find_dictionaries(d, file.path(dir, ".aspell"))
        }
        ## <FIXME>
        ## Deprecated in favor of specifying R level dictionaries.
        ## Maybe give a warning (in particular if both are given)?
        if(!is.null(d <- defaults$personal))
            control <- c(control,
                         sprintf("-p %s",
                                 shQuote(file.path(dir, ".aspell", d))))
        ## </FIXME>
    }

    macros <- tools::loadPkgRdMacros(dir,
                                     macros = file.path(R.home("share"),
                                                        "Rd", "macros",
                                                        "system.Rd"))

    aspell(files,
           filter = list("Rd", drop = drop, macros = macros),
           control = control,
           encoding = encoding,
           program = program,
           dictionaries = dictionaries)
}

## For spell-checking the R vignettes:

## This should really be done with Aspell as the other checkers have far
## less powerful TeX modes.

aspell_control_R_vignettes <-
    list(aspell =
         c("--mode=tex",
           "--master=en_US",
           "--add-extra-dicts=en_GB",
           "--add-tex-command='code p'",
           "--add-tex-command='pkg p'",
           "--add-tex-command='CRANpkg p'"
           ),
         hunspell =
         c("-t", "-d en_US,en_GB"))

aspell_R_vignettes <-
function(program = NULL, dictionaries = aspell_dictionaries_R)
{
    files <- Sys.glob(file.path(tools:::.R_top_srcdir_from_Rd(),
                                "src", "library", "*", "vignettes",
                                "*.Rnw"))

    program <- aspell_find_program(program)

    aspell(files,
           filter = "Sweave",
           control = aspell_control_R_vignettes[[names(program)]],
           program = program,
           dictionaries = dictionaries)
}

## For spell-checking vignettes in a package:

## This should really be done with Aspell as the other checkers have far
## less powerful TeX modes.

aspell_control_package_vignettes <-
    list(aspell =
         c("--add-tex-command='citep oop'",
           "--add-tex-command='Sexpr p'",
           "--add-tex-command='code p'",
           "--add-tex-command='pkg p'",
           "--add-tex-command='proglang p'",
           "--add-tex-command='samp p'"
           ))

aspell_package_vignettes <-
function(dir,
         control = list(), program = NULL, dictionaries = character())
{
    dir <- tools::file_path_as_absolute(dir)

    vinfo <- tools::pkgVignettes(dir = dir)
    files <- vinfo$docs
    if(!length(files)) return(aspell(character()))

    ## We need the package encoding to read the defaults file ...
    meta <- tools:::.get_package_metadata(dir, installed = FALSE)
    if(is.na(encoding <- meta["Encoding"]))
        encoding <- "unknown"

    defaults <- .aspell_package_defaults(dir, encoding)$vignettes
    if(!is.null(defaults)) {
        if(!is.null(d <- defaults$control))
            control <- d
        if(!is.null(d <- defaults$program))
            program <- d
        if(!is.null(d <- defaults$dictionaries)) {
            dictionaries <-
                aspell_find_dictionaries(d, file.path(dir, ".aspell"))
        }
        ## <FIXME>
        ## Deprecated in favor of specifying R level dictionaries.
        ## Maybe give a warning (in particular if both are given)?
        if(!is.null(d <- defaults$personal))
            control <- c(control,
                         sprintf("-p %s",
                                 shQuote(file.path(dir, ".aspell", d))))
        ## </FIXME>
    }

    program <- aspell_find_program(program)

    fgroups <- split(files, vinfo$engines)
    egroups <- split(vinfo$encodings, vinfo$engines)

    do.call(rbind,
            Map(function(fgroup, egroup, engine) {
                engine <- tools::vignetteEngine(engine)
                aspell(fgroup,
                       filter = engine$aspell$filter,
                       control =
                       c(engine$aspell$control,
                         aspell_control_package_vignettes[[names(program)]],
                         control),
                       encoding = egroup,
                       program = program,
                       dictionaries = dictionaries)
            },
                fgroups,
                egroups,
                names(fgroups)
                )
            )
}

## Spell-checking R files.

aspell_filter_db$R <-
function(ifile, encoding = "unknown", ignore = character())
{
    pd <- get_parse_data_for_message_strings(ifile, encoding)
    if(is.null(pd) || !NROW(pd)) return(character())

    ## Strip the string delimiters.
    pd$text <- substring(pd$text, 2L, nchar(pd$text) - 1L)
    ## Replace whitespace C backslash escape sequences by whitespace.
    pd$text <- gsub("(^|[^\\])\\\\[fnrt]", "\\1  ", pd$text)
    pd$text <- gsub(  "([^\\])\\\\[fnrt]", "\\1  ", pd$text)
    ## (Do this twice for now because in e.g.
    ##    \n\t\tInformation on package %s
    ## the first \t is not matched the first time.  Alternatively, we
    ## could match with
    ##    (^|[^\\])((\\\\[fnrt])+)
    ## but then computing the replacement (\\1 plus as many blanks as
    ## the characters in \\2) is not straightforward.
    ## For gettextf() calls, replace basic percent escape sequences by
    ## whitespace.
    ind <- pd$caller == "gettextf"
    if(any(ind)) {
        pd$text[ind] <-
            gsub("(^|[^%])%[dioxXfeEgGaAs]", "\\1  ", pd$text[ind])
        pd$text[ind] <-
            gsub("  ([^%])%[dioxXfeEgGaAs]", "\\1  ", pd$text[ind])
        ## (See above for doing this twice.)
    }

    lines <- readLines(ifile, encoding = encoding, warn = FALSE)

    ## Column positions in the parse data have tabs expanded to tab
    ## stops using a tab width of 8, so for lines with tabs we need to
    ## map the column positions back to character positions.
    lines_in_pd <- sort(unique(c(pd$line1, pd$line2)))
    tab <- Map(function(tp, nc) {
        if(tp[1L] == -1L) return(NULL)
        widths <- rep.int(1, nc)
        for(i in tp) {
            cols <- cumsum(widths)
            widths[i] <- 8 - (cols[i] - 1) %% 8
        }
        cumsum(widths)
    },
               gregexpr("\t", lines[lines_in_pd], fixed = TRUE),
               nchar(lines[lines_in_pd]))
    names(tab) <- lines_in_pd

    lines[lines_in_pd] <- gsub("[^\t]", " ", lines[lines_in_pd])
    lines[-lines_in_pd] <- ""

    for(entry in split(pd, seq_len(NROW(pd)))) {
        line1 <- entry$line1
        line2 <- entry$line2
        col1 <- entry$col1
        col2 <- entry$col2
        if(line1 == line2) {
            if(length(ptab <- tab[[as.character(line1)]])) {
                col1 <- which(ptab == col1) + 1L
                col2 <- which(ptab == col2) - 1L
            }
            substring(lines[line1], col1, col2) <- entry$text
        } else {
            texts <- unlist(strsplit(entry$text, "\n", fixed = TRUE))
            n <- length(texts)
            if(length(ptab <- tab[[as.character(line1)]])) {
                col1 <- which(ptab == col1) + 1L
            }
            substring(lines[line1], col1) <- texts[1L]
            pos <- seq.int(from = 2L, length.out = n - 2L)
            if(length(pos))
                lines[line1 + pos - 1] <- texts[pos]
            if(length(ptab <- tab[[as.character(line2)]])) {
                col2 <- which(ptab == col2) - 1L
            }
            substring(lines[line2], 1L, col2) <- texts[n]
        }
    }

    blank_out_ignores_in_lines(lines, ignore)
}

get_parse_data_for_message_strings <-
function(file, encoding = "unknown")
{
    ## The message strings considered are the string constants subject to
    ## translation in gettext-family calls (see below for details).

    exprs <-
        suppressWarnings(tools:::.parse_code_file(file = file,
                                                  encoding = encoding,
                                                  keep.source = TRUE))
    if(!length(exprs)) return(NULL)

    pd <- getParseData(exprs)

    ## Function for computing grandparent ids.
    parents <- pd$parent
    names(parents) <- pd$id
    gpids <- function(ids)
        parents[as.character(parents[as.character(ids)])]

    ind <- (pd$token == "SYMBOL_FUNCTION_CALL") &
        !is.na(match(pd$text,
                     c("warning", "stop",
                       "message", "packageStartupMessage",
                       "gettext", "gettextf", "ngettext")))

    funs <- pd$text[ind]

    ids <- gpids(pd$id[ind])
    calls <- getParseText(pd, ids)

    table <- pd[pd$token == "STR_CONST", ]
    ## Could have run into truncation ...
    table$text <- getParseText(table, table$id)
    pos <- match(gpids(table$id), ids)
    ind <- !is.na(pos)
    table <- split(table[ind, ], factor(pos[ind], seq_along(ids)))

    ## We have synopses
    ##   message(..., domain = NULL, appendLF = TRUE)
    ##   packageStartupMessage(..., domain = NULL, appendLF = TRUE)
    ##   warning(..., call. = TRUE, immediate. = FALSE, domain = NULL)
    ##   stop(..., call. = TRUE, domain = NULL)
    ##   gettext(..., domain = NULL)
    ##   ngettext(n, msg1, msg2, domain = NULL)
    ##   gettextf(fmt, ..., domain = NULL)
    ## For the first five, we simply take all unnamed strings.
    ## (Could make this more precise, of course.)
    ## For the latter two, we take the msg1/msg2 and fmt arguments,
    ## provided these are strings.

    ## <NOTE>
    ## Using domain = NA inhibits translation: perhaps it should
    ## optionally also inhibit spell checking?
    ## </NOTE>

    extract_message_strings <- function(fun, call, table) {
        ## Matching a call containing ... gives
        ##   Error in match.call(message, call) :
        ##   ... used in a situation where it doesn't exist
        ## so eliminate these.
        ## (Note that we also drop "..." strings.)
        call <- str2lang(call)
        call <- call[ as.character(call) != "..." ]
        mc <- as.list(match.call(get(fun, envir = .BaseNamespaceEnv),
                                 call))
        args <- if(fun == "gettextf")
            mc["fmt"]
        else if(fun == "ngettext")
            mc[c("msg1", "msg2")]
        else {
            if(!is.null(names(mc)))
                mc <- mc[!nzchar(names(mc))]
            mc[-1L]
        }
        strings <- as.character(args[vapply(args, is.character, TRUE)])
        ## Need to canonicalize to match string constants before and
        ## after parsing ...
        texts <- vapply(str2expression(table$text), as.character, "")
        pos <- which(!is.na(match(texts, strings)))
        cbind(table[pos, ], caller = rep.int(fun, length(pos)))
    }

    do.call(rbind,
            Map(extract_message_strings,
                as.list(funs), as.list(calls), table))
}

## For spell-checking the R R files.

aspell_R_R_files <-
function(which = NULL, dir = NULL,
         ignore = c("[ \t]'[^']*'[ \t[:punct:]]",
                    "[ \t][[:alnum:]_.]*\\(\\)[ \t[:punct:]]"),
         program = NULL, dictionaries = aspell_dictionaries_R)
{
    if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
    if(is.null(which))
        which <- tools:::.get_standard_package_names()$base

    files <-
        unlist(lapply(file.path(dir, "src", "library", which, "R"),
                      tools::list_files_with_type,
                      "code",
                      OS_subdirs = c("unix", "windows")),
               use.names = FALSE)

    program <- aspell_find_program(program)

    aspell(files,
           filter = list("R", ignore = ignore),
           control = aspell_control_R_Rd_files[[names(program)]],
           program = program,
           dictionaries = dictionaries)
}

## For spell-checking R files in a package.

aspell_package_R_files <-
function(dir, ignore = character(),
         control = list(), program = NULL, dictionaries = character())
{
    dir <- tools::file_path_as_absolute(dir)

    subdir <- file.path(dir, "R")
    files <- if(dir.exists(subdir))
        tools::list_files_with_type(subdir,
                                    "code",
                                    OS_subdirs = c("unix", "windows"))
    else character()

    meta <- tools:::.get_package_metadata(dir, installed = FALSE)
    if(is.na(encoding <- meta["Encoding"]))
        encoding <- "unknown"

    defaults <- .aspell_package_defaults(dir, encoding)$R_files
    if(!is.null(defaults)) {
        if(!is.null(d <- defaults$ignore))
            ignore <- d
        if(!is.null(d <- defaults$control))
            control <- d
        if(!is.null(d <- defaults$program))
            program <- d
        if(!is.null(d <- defaults$dictionaries)) {
            dictionaries <-
                aspell_find_dictionaries(d, file.path(dir, ".aspell"))
        }
    }

    program <- aspell_find_program(program)

    aspell(files,
           filter = list("R", ignore = ignore),
           control = control,
           encoding = encoding,
           program = program,
           dictionaries = dictionaries)
}

## Spell-checking pot files.

## (Of course, directly analyzing the message strings would be more
## useful, but require writing appropriate text filters.)

## See also tools:::checkPoFile().

aspell_filter_db$pot <-
function (ifile, encoding = "unknown", ignore = character())
{
    lines <- readLines(ifile, encoding = encoding, warn = FALSE)

    ind <- grepl("^msgid[ \t]", lines)

    do_entry <- function(s) {
        out <- character(length(s))
        i <- 1L
        out[i] <- blank_out_regexp_matches(s[i], "^msgid[ \t]+\"")
        while(startsWith(s[i <- i + 1L], '"'))
            out[i] <- sub("^\"", " ", s[i])
        if(grepl("^msgid_plural[ \t]", s[i])) {
            out[i] <- blank_out_regexp_matches(s[i], "^msgid_plural[ \t]+\"")
            while(startsWith(s[i <- i + 1L], '"'))
                out[i] <- sub("^\"", " ", s[i])
        }
        out
    }

    entries <- split(lines, cumsum(ind))
    lines <- c(character(length(entries[[1L]])),
               as.character(do.call(c, lapply(entries[-1L], do_entry))))

    lines <- sub("\"[ \t]*$", " ", lines)

    ## <FIXME>
    ## Could replace backslash escapes for blanks and percent escapes by
    ## blanks, similar to what the R text filter does.
    ## </FIXME>

    blank_out_ignores_in_lines(lines, ignore)
}

## For spell-checking all pot files in a package.

aspell_package_pot_files <-
function(dir, ignore = character(),
         control = list(), program = NULL, dictionaries = character())
{
    dir <- tools::file_path_as_absolute(dir)
    subdir <- file.path(dir, "po")
    files <- if(dir.exists(subdir))
        Sys.glob(file.path(subdir, "*.pot"))
    else character()

    meta <- tools:::.get_package_metadata(dir, installed = FALSE)
    if(is.na(encoding <- meta["Encoding"]))
        encoding <- "unknown"

    program <- aspell_find_program(program)

    aspell(files,
           filter = list("pot", ignore = ignore),
           control = control,
           encoding = encoding,
           program = program,
           dictionaries = dictionaries)
}

## For spell-checking the R C files.

aspell_R_C_files <-
function(which = NULL, dir = NULL,
         ignore = c("[ \t]'[[:alnum:]_.]*'[ \t[:punct:]]",
                    "[ \t][[:alnum:]_.]*\\(\\)[ \t[:punct:]]"),
         program = NULL, dictionaries = aspell_dictionaries_R)
{
    if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
    if(is.null(which))
        which <- tools:::.get_standard_package_names()$base
    if(!is.na(pos <- match("base", which)))
        which[pos] <- "R"

    files <- sprintf("%s.pot",
                     file.path(dir, "src", "library",
                               which, "po", which))
    files <- files[file_test("-f", files)]

    program <- aspell_find_program(program)

    aspell(files,
           filter = list("pot", ignore = ignore),
           control = aspell_control_R_Rd_files[[names(program)]],
           program = program,
           dictionaries = dictionaries)
}

## For spell-checking package C files.

aspell_package_C_files <-
function(dir, ignore = character(),
         control = list(), program = NULL, dictionaries = character())
{
    dir <- tools::file_path_as_absolute(dir)
    ## Assume that the package C message template file is shipped as
    ## 'po/PACKAGE.pot'.
    files <- file.path(dir, "po",
                       paste(basename(dir), "pot", collapse = "."))
    files <- files[file_test("-f", files)]

    meta <- tools:::.get_package_metadata(dir, installed = FALSE)
    if(is.na(encoding <- meta["Encoding"]))
        encoding <- "unknown"

    defaults <- .aspell_package_defaults(dir, encoding)$C_files
    if(!is.null(defaults)) {
        if(!is.null(d <- defaults$ignore))
            ignore <- d
        if(!is.null(d <- defaults$control))
            control <- d
        if(!is.null(d <- defaults$program))
            program <- d
        if(!is.null(d <- defaults$dictionaries)) {
            dictionaries <-
                aspell_find_dictionaries(d, file.path(dir, ".aspell"))
        }
    }

    program <- aspell_find_program(program)

    aspell(files,
           filter = list("pot", ignore = ignore),
           control = control,
           encoding = encoding,
           program = program,
           dictionaries = dictionaries)
}

## Spell-checking DCF files.

aspell_filter_db$dcf <-
function(ifile, encoding, keep = c("Title", "Description"),
         ignore = character())
{
    lines <- readLines(ifile, encoding = encoding, warn = FALSE)
    line_has_tags <- grepl("^[^[:blank:]][^:]*:", lines)
    tags <- sub(":.*", "", lines[line_has_tags])
    lines[line_has_tags] <-
        blank_out_regexp_matches(lines[line_has_tags], "^[^:]*:")
    lines <- split(lines, cumsum(line_has_tags))
    ind <- is.na(match(tags, keep))
    lines[ind] <- lapply(lines[ind], function(s) rep.int("", length(s)))
    ind <- !ind
    lines[ind] <- lapply(lines[ind], paste0, " ")
    lines <- unlist(lines, use.names = FALSE)
    blank_out_ignores_in_lines(lines, ignore)
}

## For spell-checking package DESCRIPTION files.

aspell_package_description <-
function(dir, ignore = character(),
         control = list(), program = NULL, dictionaries = character())
{
    dir <- tools::file_path_as_absolute(dir)
    files <- file.path(dir, "DESCRIPTION")

    meta <- tools:::.get_package_metadata(dir, installed = FALSE)
    if(is.na(encoding <- meta["Encoding"]))
        encoding <- "unknown"

    ## Allow providing package defaults but make this controllable via
    ##   _R_ASPELL_USE_DEFAULTS_FOR_PACKAGE_DESCRIPTION_
    ## to safeguard against possible mis-use for CRAN incoming checks.
    defaults <-
        Sys.getenv("_R_ASPELL_USE_DEFAULTS_FOR_PACKAGE_DESCRIPTION_",
                   "TRUE")
    defaults <- if(tools:::config_val_to_logical(defaults)) {
                    .aspell_package_defaults(dir, encoding)$description
                } else NULL
    if(!is.null(defaults)) {
        if(!is.null(d <- defaults$ignore))
            ignore <- d
        if(!is.null(d <- defaults$control))
            control <- d
        if(!is.null(d <- defaults$program))
            program <- d
        if(!is.null(d <- defaults$dictionaries)) {
            dictionaries <-
                aspell_find_dictionaries(d, file.path(dir, ".aspell"))
        }
    }

    program <- aspell_find_program(program)

    aspell(files,
           filter = list("dcf", ignore = ignore),
           control = control,
           encoding = encoding,
           program = program,
           dictionaries = dictionaries)
}

## Spell-checking Markdown files.

aspell_filter_db$md <-
function(ifile, encoding = "UTF-8")
{
    x <- readLines(ifile, encoding = encoding, warn = FALSE)
    n <- nchar(x)
    y <- strrep(rep.int(" ", length(x)), n)
    ## Determine positions of 'texts' along the lines of
    ## spelling::parse_text_md () by Jeroen Ooms.
    md <- commonmark::markdown_xml(x, extensions = TRUE,
                                   sourcepos = TRUE)
    doc <- xml2::xml_ns_strip(xml2::read_xml(md))
    pos <- strsplit(xml2::xml_attr(xml2::xml_find_all(doc,
                                                      "//text[@sourcepos]"),
                                   "sourcepos"),
                    "[:-]")
    ## Now use the following idea.
    ## Each elt of pos now has positions for l1:c1 to l2:c2.
    ## If l1 < l2
    ##   Lines in (l1, l2) are taken as a whole
    ##   Line l1 from c1 to nchar for l1
    ##   Line l2 from  1 to c1
    ## otherwise
    ##   Line l1 from c1 to c2.
    for(p in pos) {
        p <- as.integer(p)
        ## Legibility ...
        l1 <- p[1L]; c1 <- p[2L]; l2 <- p[3L]; c2 <- p[4L]
        if(l1 < l2) {
            substring(y[l1], c1, n[l1]) <- substring(x[l1], c1, n[l1])
            if(l1 + 1L < l2) {
                w <- seq.int(from = l1 + 1L, to = l2 - 1L)
                y[w] <- x[w]
            }
            substring(y[l2], 1L, c2) <- substring(x[l2], 1L, c2)
        } else {
            substring(y[l1], c1, c2) <- substring(x[l1], c1, c2)
        }
    }
    y
}

## For spell checking packages.

aspell_package <-
function(dir,
         control = list(), program = NULL, dictionaries = character())
{
    args <- list(dir = dir,
                 program = program,
                 control = control,
                 dictionaries = dictionaries)
    a <- rbind(do.call(aspell_package_description, args),
               do.call(aspell_package_Rd_files, args),
               do.call(aspell_package_vignettes, args),
               do.call(aspell_package_R_files, args),
               do.call(aspell_package_C_files, args))
    if(nrow(a)) {
        a$File <- tools:::.file_path_relative_to_dir(a$File,
                                                     dirname(dir))
    }
    a
}

## For writing personal dictionaries:

aspell_write_personal_dictionary_file <-
function(x, out, language = "en", program = NULL)
{
    if(inherits(x, "aspell"))
        x <- sort(unique(x$Original))

    program <- aspell_find_program(program)
    if(is.na(program))
        stop("No suitable spell check program found.")

    ## <NOTE>
    ## Ispell and Hunspell take simple word lists as personal dictionary
    ## files, but Aspell requires a special format, see e.g.
    ## http://aspell.net/man-html/Format-of-the-Personal-and-Replacement-Dictionaries.html
    ## and one has to create these by hand, as
    ##   aspell --lang=en create personal ./foo "a b c"
    ## gives: Sorry "create/merge personal" is currently unimplemented.

    ## Encodings are a nightmare.
    ## Try to canonicalize to UTF-8 for Aspell (which allows recording
    ## the encoding in the personal dictionary).
    ## <FIXME>
    ## What should we do for Hunspell (which can handle UTF-8, but has
    ## no encoding information in the personal dictionary), or Ispell
    ## (which cannot handle UTF-8)?
    ## </FIXME>

    if(names(program) == "aspell") {
        header <- sprintf("personal_ws-1.1 %s %d UTF-8",
                          language, length(x))
        x <- enc2utf8(x)
    }
    else {
        header <- NULL
    }

    writeLines(c(header, x), out, useBytes = TRUE)
}

## For reading package defaults:

.aspell_package_defaults <-
function(dir, encoding = "unknown")
{
    dfile <- file.path(dir, ".aspell", "defaults.R")
    if(!file_test("-f", dfile))
        return(NULL)
    exprs <- parse(dfile, encoding = encoding)
    envir <- new.env()
    for(e in exprs) eval(e, envir)
    as.list(envir)
}

## Utilities.

blank_out_regexp_matches <-
function(s, re, ...)
{
    m <- gregexpr(re, s, ...)
    regmatches(s, m) <-
        Map(function(n) strrep(" ", n),
            lapply(regmatches(s, m), nchar))
    s
}

blank_out_ignores_in_lines <-
function(lines, ignore)
{
    args <- list()
    if(is.list(ignore)) {
        args <- ignore[-1L]
        ignore <- ignore[[1L]]
    }
    for(re in ignore[nzchar(ignore)])
        lines <- do.call(blank_out_regexp_matches,
                         c(list(lines, re), args))
    lines
}

find_files_in_directories <-
function(basenames, dirnames)
{
    dirnames <- dirnames[dir.exists(dirnames)]
    dirnames <- normalizePath(dirnames, "/")

    out <- character(length(basenames))
    pos <- seq_along(out)

    for(dir in dirnames) {
        paths <- file.path(dir, basenames[pos])
        ind <- file_test("-f", paths)
        out[pos[ind]] <- paths[ind]
        pos <- pos[!ind]
        if(!length(pos)) break
    }

    out
}