1# File src/library/utils/R/aspell.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2019 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19aspell <- 20function(files, filter, control = list(), encoding = "unknown", 21 program = NULL, dictionaries = character()) 22{ 23 ## Take the given files and feed them through spell checker in 24 ## Ispell pipe mode. 25 26 ## Think about options and more command line options eventually. 27 28 program <- aspell_find_program(program) 29 if(is.na(program)) 30 stop("No suitable spell-checker program found") 31 32 ## Be nice. 33 if(inherits(files, "Rd")) 34 files <- list(files) 35 36 files_are_names <- is.character(files) 37 38 filter_args <- list() 39 if(missing(filter) || is.null(filter)) { 40 filter <- if(!files_are_names) { 41 function(ifile, encoding) { 42 if(inherits(ifile, "srcfile")) 43 readLines(ifile$filename, encoding = encoding, 44 warn = FALSE) 45 else if(inherits(ifile, "connection")) 46 readLines(ifile, encoding = encoding, warn = FALSE) 47 else { 48 ## What should this do with encodings? 49 as.character(ifile) 50 } 51 } 52 } 53 else NULL 54 } 55 else if(is.character(filter)) { 56 ## Look up filter in aspell filter db. 57 filter_name <- filter[1L] 58 filter <- aspell_filter_db[[filter_name]] 59 ## Warn if the filter was not found in the db. 60 if(is.null(filter)) 61 warning(gettextf("Filter '%s' is not available.", 62 filter_name), 63 domain = NA) 64 } 65 else if(is.list(filter)) { 66 ## Support 67 ## list("Rd", drop = "\\references" 68 ## at least for now. 69 filter_name <- filter[[1L]][1L] 70 filter_args <- filter[-1L] 71 filter <- aspell_filter_db[[filter_name]] 72 ## Warn if the filter was not found in the db. 73 if(is.null(filter)) 74 warning(gettextf("Filter '%s' is not available.", 75 filter_name), 76 domain = NA) 77 } 78 else if(!is.function(filter)) 79 stop("Invalid 'filter' argument.") 80 81 encoding <- rep_len(encoding, length(files)) 82 83 verbose <- getOption("verbose") 84 85 db <- data.frame(Original = character(), File = character(), 86 Line = integer(), Column = integer(), 87 stringsAsFactors = FALSE) 88 db$Suggestions <- list() 89 90 tfile <- tempfile("aspell") 91 on.exit(unlink(tfile)) 92 93 if(length(dictionaries)) { 94 paths <- aspell_find_dictionaries(dictionaries) 95 ind <- paths == "" 96 if(any(ind)) { 97 warning(gettextf("The following dictionaries were not found:\n%s", 98 paste(sprintf(" %s", dictionaries[ind]), 99 collapse = "\n")), 100 domain = NA) 101 paths <- paths[!ind] 102 } 103 if(length(paths)) { 104 words <- unlist(lapply(paths, readRDS), use.names = FALSE) 105 personal <- tempfile("aspell_personal") 106 on.exit(unlink(personal), add = TRUE) 107 ## <FIXME> 108 ## How can we get the right language set (if needed)? 109 ## Maybe aspell() needs an additional 'language' arg? 110 aspell_write_personal_dictionary_file(words, personal, 111 program = program) 112 ## </FIXME> 113 control <- c(control, "-p", shQuote(personal)) 114 } 115 } 116 117 ## No special expansion of control argument for now. 118 control <- as.character(control) 119 120 fnames <- names(files) 121 files <- as.list(files) 122 123 for (i in seq_along(files)) { 124 125 file <- files[[i]] 126 if(files_are_names) 127 fname <- file 128 else { 129 ## Try srcfiles and srcrefs ... 130 fname <- if(inherits(file, "srcfile")) 131 file$filename 132 else 133 attr(attr(file, "srcref"), "srcfile")$filename 134 ## As a last resort, try the names of the files argument. 135 if(is.null(fname)) 136 fname <- fnames[i] 137 ## If unknown ... 138 if(is.null(fname)) 139 fname <- "<unknown>" 140 } 141 142 enc <- encoding[i] 143 144 if(verbose) 145 message(gettextf("Processing file %s", fname), 146 domain = NA) 147 148 lines <- if(is.null(filter)) 149 readLines(file, encoding = enc, warn = FALSE) 150 else { 151 ## Assume that filter takes an input file (and additional 152 ## arguments) and return a character vector. 153 do.call(filter, c(list(file, encoding = enc), filter_args)) 154 } 155 156 ## Allow filters to pass additional control arguments, in case 157 ## these need to be inferred from the file contents. 158 control <- c(control, attr(lines, "control")) 159 160 ## Need to escape all lines with carets to ensure Aspell handles 161 ## them as data: the Aspell docs say 162 ## It is recommended that programmatic interfaces prefix every 163 ## data line with an uparrow to protect themselves against 164 ## future changes in Aspell. 165 writeLines(paste0("^", lines), tfile) 166 ## Note that this re-encodes character strings with marked 167 ## encodings to the current encoding (which is definitely fine 168 ## if this is UTF-8 and Aspell was compiled with full UTF-8 169 ## support). Alternatively, we could try using something along 170 ## the lines of 171 ## writeLines(paste0("^", lines), tfile, 172 ## useBytes = TRUE) 173 ## and pass the encoding info to Aspell in case we know it. 174 175 out <- tools:::.system_with_capture(program, c("-a", control), 176 stdin = tfile) 177 178 if(out$status != 0L) 179 stop(gettextf("Running aspell failed with diagnostics:\n%s", 180 paste(out$stderr, collapse = "\n")), 181 domain = NA) 182 183 ## Hopefully everything worked ok. 184 lines <- out$stdout[-1L] 185 pos <- cumsum(lines == "") + 1L 186 187 ## Format is as follows. 188 ## First line is a header. 189 ## Blank lines separate the results for each line. 190 ## Results for the word on each line are given as follows. 191 ## * If the word was found in the main dictionary, or your personal 192 ## dictionary, then the line contains only a `*'. 193 ## * If the word is not in the dictionary, but there are 194 ## suggestions, then the line contains an `&', a space, the 195 ## misspelled word, a space, the number of near misses, the number 196 ## of characters between the beginning of the line and the 197 ## beginning of the misspelled word, a colon, another space, and a 198 ## list of the suggestions separated by commas and spaces. 199 ## * If the word does not appear in the dictionary, and there are no 200 ## suggestions, then the line contains a `#', a space, the 201 ## misspelled word, a space, and the character offset from the 202 ## beginning of the line. 203 ## This can be summarized as follows: 204 ## OK: * 205 ## Suggestions: & original count offset: miss, miss, ... 206 ## None: # original offset 207 208 ## Look at words not in dictionary with suggestions. 209 210 if(any(ind <- startsWith(lines, "&"))) { 211 info <- strsplit(lines[ind], ": ", fixed = TRUE) 212 one <- strsplit(sapply(info, `[`, 1L), " ", fixed = TRUE) 213 two <- strsplit(sapply(info, `[`, 2L), ", ", fixed = TRUE) 214 db1 <- data.frame(Original = 215 as.character(sapply(one, `[`, 2L)), 216 File = fname, 217 Line = pos[ind], 218 Column = 219 as.integer(sapply(one, `[`, 4L)), 220 stringsAsFactors = FALSE) 221 db1$Suggestions <- two 222 db <- rbind(db, db1) 223 } 224 ## Looks at words not in dictionary with no suggestions. 225 if(any(ind <- startsWith(lines, "#"))) { 226 one <- strsplit(lines[ind], " ", fixed = TRUE) 227 db1 <- data.frame(Original = 228 as.character(sapply(one, `[`, 2L)), 229 File = fname, 230 Line = pos[ind], 231 Column = 232 as.integer(sapply(one, `[`, 3L)), 233 stringsAsFactors = FALSE) 234 db1$Suggestions <- vector("list", length(one)) 235 db <- rbind(db, db1) 236 } 237 } 238 239 class(db) <- c("aspell", "data.frame") 240 db 241} 242 243format.aspell <- 244function(x, sort = TRUE, verbose = FALSE, indent = 2L, ...) 245{ 246 if(!nrow(x)) return(character()) 247 248 if(sort) 249 x <- x[order(x$Original, x$File, x$Line, x$Column), ] 250 251 from <- split(sprintf("%s:%d:%d", x$File, x$Line, x$Column), 252 x$Original) 253 254 if(verbose) { 255 unlist(Map(function(w, f, s) { 256 sprintf("Word: %s\nFrom: %s\n%s", 257 w, 258 paste0(c("", rep.int(" ", length(f) - 1L)), 259 f, collapse = "\n"), 260 paste(strwrap(paste("Suggestions:", 261 paste(s[[1L]], collapse = " ")), 262 exdent = 6L, indent = 0L), 263 collapse = "\n")) 264 }, 265 names(from), 266 from, 267 split(x$Suggestions, x$Original))) 268 } else { 269 sep <- sprintf("\n%s", strrep(" ", indent)) 270 paste(names(from), 271 vapply(from, paste, "", collapse = sep), 272 sep = sep) 273 } 274} 275 276print.aspell <- 277function(x, ...) 278{ 279 if(nrow(x)) 280 writeLines(paste(format(x, ...), collapse = "\n\n")) 281 invisible(x) 282} 283 284summary.aspell <- 285function(object, ...) 286{ 287 words <- sort(unique(object$Original)) 288 if(length(words)) { 289 writeLines("Possibly mis-spelled words:") 290 print(words) 291 } 292 invisible(words) 293} 294 295aspell_filter_db <- new.env(hash = FALSE) # small 296aspell_filter_db$Rd <- tools::RdTextFilter 297aspell_filter_db$Sweave <- tools::SweaveTeXFilter 298 299aspell_find_program <- 300function(program = NULL) 301{ 302 check <- !is.null(program) || !is.null(names(program)) 303 if(is.null(program)) 304 program <- getOption("aspell_program") 305 if(is.null(program)) 306 program <- c("aspell", "hunspell", "ispell") 307 program <- Filter(nzchar, Sys.which(program))[1L] 308 if(!is.na(program) && check) { 309 out <- c(system(sprintf("%s -v", program), 310 intern = TRUE), "")[1L] 311 if(grepl("really Aspell", out)) 312 names(program) <- "aspell" 313 else if(grepl("really Hunspell", out)) 314 names(program) <- "hunspell" 315 else if(grepl("International Ispell", out)) 316 names(program) <- "ispell" 317 else 318 names(program) <- NA_character_ 319 } 320 program 321} 322 323aspell_dictionaries_R <- "en_stats" 324 325aspell_find_dictionaries <- 326function(dictionaries, dirnames = character()) 327{ 328 dictionaries <- as.character(dictionaries) 329 if(!(n <- length(dictionaries))) return(character()) 330 331 ## Always search the R system dictionary directory first. 332 dirnames <- c(file.path(R.home("share"), "dictionaries"), dirnames) 333 334 ## For now, all dictionary files should be .rds files. 335 if(any(ind <- !endsWith(dictionaries, ".rds"))) 336 dictionaries[ind] <- sprintf("%s.rds", dictionaries[ind]) 337 338 out <- character(n) 339 ## Dictionaries with no path separators are looked for in the given 340 ## dictionary directories (by default, the R system dictionary 341 ## directory). 342 ind <- grepl(.Platform$file.sep, dictionaries, fixed = TRUE) 343 ## (Equivalently, could check where paths == basename(paths).) 344 if(length(pos <- which(ind))) { 345 pos <- pos[file_test("-f", dictionaries[pos])] 346 out[pos] <- normalizePath(dictionaries[pos], "/") 347 } 348 if(length(pos <- which(!ind))) { 349 out[pos] <- find_files_in_directories(dictionaries[pos], 350 dirnames) 351 } 352 out 353} 354 355### Utilities. 356 357aspell_inspect_context <- 358function(x) 359{ 360 x <- split(x, x$File) 361 y <- Map(function(f, x) { 362 lines <- readLines(f, warn = FALSE)[x$Line] 363 cbind(f, 364 x$Line, 365 substring(lines, 1L, x$Column - 1L), 366 x$Original, 367 substring(lines, x$Column + nchar(x$Original))) 368 }, 369 names(x), x) 370 y <- data.frame(do.call(rbind, y), stringsAsFactors = FALSE) 371 names(y) <- c("File", "Line", "Left", "Original", "Right") 372 class(y) <- c("aspell_inspect_context", "data.frame") 373 y 374} 375 376print.aspell_inspect_context <- 377function(x, ...) 378{ 379 s <- split(x, x$File) 380 nms <- names(s) 381 for(i in seq_along(s)) { 382 e <- s[[i]] 383 writeLines(c(sprintf("File '%s':", nms[i]), 384 sprintf(" Line %s: \"%s\", \"%s\", \"%s\"", 385 format(e$Line), 386 gsub("\"", "\\\"", e$Left ), e$Original, 387 gsub("\"", "\\\"", e$Right)), 388 "")) 389 } 390 invisible(x) 391} 392 393 394## For spell-checking the R manuals: 395 396## This can really only be done with Aspell as the other checkers have 397## no texinfo mode. 398 399aspell_control_R_manuals <- 400 list(aspell = 401 c("--master=en_US", 402 "--add-extra-dicts=en_GB", 403 "--mode=texinfo", 404 "--add-texinfo-ignore=acronym", 405 "--add-texinfo-ignore=deftypefun", 406 "--add-texinfo-ignore=deftypefunx", 407 "--add-texinfo-ignore=findex", 408 "--add-texinfo-ignore=enindex", 409 "--add-texinfo-ignore=include", 410 "--add-texinfo-ignore=ifclear", 411 "--add-texinfo-ignore=ifset", 412 "--add-texinfo-ignore=math", 413 "--add-texinfo-ignore=macro", 414 "--add-texinfo-ignore=multitable", 415 "--add-texinfo-ignore=node", 416 "--add-texinfo-ignore=pkg", 417 "--add-texinfo-ignore=printindex", 418 "--add-texinfo-ignore=set", 419 "--add-texinfo-ignore=vindex", 420 "--add-texinfo-ignore-env=menu", 421 "--add-texinfo-ignore=CRANpkg" 422 ), 423 hunspell = 424 c("-d en_US,en_GB")) 425 426aspell_R_manuals <- 427function(which = NULL, dir = NULL, program = NULL, 428 dictionaries = aspell_dictionaries_R) 429{ 430 if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd() 431 ## Allow specifying 'R-exts' and alikes, or full paths. 432 files <- if(is.null(which)) { 433 Sys.glob(file.path(dir, "doc", "manual", "*.texi")) 434 } else { 435 ind <- which(which == 436 basename(tools::file_path_sans_ext(which))) 437 which[ind] <- 438 file.path(dir, "doc", "manual", 439 sprintf("%s.texi", which[ind])) 440 which 441 } 442 443 program <- aspell_find_program(program) 444 445 aspell(files, 446 control = aspell_control_R_manuals[[names(program)]], 447 program = program, 448 dictionaries = dictionaries) 449} 450 451## For spell-checking the R Rd files: 452 453aspell_control_R_Rd_files <- 454 list(aspell = 455 c("--master=en_US", 456 "--add-extra-dicts=en_GB"), 457 hunspell = 458 c("-d en_US,en_GB")) 459 460aspell_R_Rd_files <- 461function(which = NULL, dir = NULL, drop = "\\references", 462 program = NULL, dictionaries = aspell_dictionaries_R) 463{ 464 files <- character() 465 466 if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd() 467 468 if(is.null(which)) { 469 which <- tools:::.get_standard_package_names()$base 470 # CHANGES.Rd could be dropped from checks in the future; 471 # it will not be updated post 2.15.0 472 files <- c(file.path(dir, "doc", "NEWS.Rd"), 473 file.path(dir, "src", "gnuwin32", "CHANGES.Rd")) 474 files <- files[file_test("-f", files)] 475 } 476 477 files <- 478 c(files, 479 unlist(lapply(file.path(dir, "src", "library", which, "man"), 480 tools::list_files_with_type, 481 "docs", OS_subdirs = c("unix", "windows")), 482 use.names = FALSE)) 483 484 program <- aspell_find_program(program) 485 486 aspell(files, 487 filter = list("Rd", drop = drop), 488 control = aspell_control_R_Rd_files[[names(program)]], 489 program = program, 490 dictionaries = dictionaries) 491} 492 493## For spell-checking Rd files in a package: 494 495aspell_package_Rd_files <- 496function(dir, drop = c("\\author", "\\references"), 497 control = list(), program = NULL, dictionaries = character()) 498{ 499 dir <- normalizePath(dir, "/") 500 501 subdir <- file.path(dir, "man") 502 files <- if(dir.exists(subdir)) 503 tools::list_files_with_type(subdir, 504 "docs", 505 OS_subdirs = c("unix", "windows")) 506 else character() 507 508 meta <- tools:::.get_package_metadata(dir, installed = FALSE) 509 if(is.na(encoding <- meta["Encoding"])) 510 encoding <- "unknown" 511 512 defaults <- .aspell_package_defaults(dir, encoding)$Rd_files 513 if(!is.null(defaults)) { 514 ## Direct settings currently override (could add a list add = 515 ## TRUE mechanism eventually). 516 if(!is.null(d <- defaults$drop)) 517 drop <- d 518 if(!is.null(d <- defaults$control)) 519 control <- d 520 if(!is.null(d <- defaults$program)) 521 program <- d 522 if(!is.null(d <- defaults$dictionaries)) { 523 dictionaries <- 524 aspell_find_dictionaries(d, file.path(dir, ".aspell")) 525 } 526 ## <FIXME> 527 ## Deprecated in favor of specifying R level dictionaries. 528 ## Maybe give a warning (in particular if both are given)? 529 if(!is.null(d <- defaults$personal)) 530 control <- c(control, 531 sprintf("-p %s", 532 shQuote(file.path(dir, ".aspell", d)))) 533 ## </FIXME> 534 } 535 536 macros <- tools::loadPkgRdMacros(dir, 537 macros = file.path(R.home("share"), 538 "Rd", "macros", 539 "system.Rd")) 540 541 aspell(files, 542 filter = list("Rd", drop = drop, macros = macros), 543 control = control, 544 encoding = encoding, 545 program = program, 546 dictionaries = dictionaries) 547} 548 549## For spell-checking the R vignettes: 550 551## This should really be done with Aspell as the other checkers have far 552## less powerful TeX modes. 553 554aspell_control_R_vignettes <- 555 list(aspell = 556 c("--mode=tex", 557 "--master=en_US", 558 "--add-extra-dicts=en_GB", 559 "--add-tex-command='code p'", 560 "--add-tex-command='pkg p'", 561 "--add-tex-command='CRANpkg p'" 562 ), 563 hunspell = 564 c("-t", "-d en_US,en_GB")) 565 566aspell_R_vignettes <- 567function(program = NULL, dictionaries = aspell_dictionaries_R) 568{ 569 files <- Sys.glob(file.path(tools:::.R_top_srcdir_from_Rd(), 570 "src", "library", "*", "vignettes", 571 "*.Rnw")) 572 573 program <- aspell_find_program(program) 574 575 aspell(files, 576 filter = "Sweave", 577 control = aspell_control_R_vignettes[[names(program)]], 578 program = program, 579 dictionaries = dictionaries) 580} 581 582## For spell-checking vignettes in a package: 583 584## This should really be done with Aspell as the other checkers have far 585## less powerful TeX modes. 586 587aspell_control_package_vignettes <- 588 list(aspell = 589 c("--add-tex-command='citep oop'", 590 "--add-tex-command='Sexpr p'", 591 "--add-tex-command='code p'", 592 "--add-tex-command='pkg p'", 593 "--add-tex-command='proglang p'", 594 "--add-tex-command='samp p'" 595 )) 596 597aspell_package_vignettes <- 598function(dir, 599 control = list(), program = NULL, dictionaries = character()) 600{ 601 dir <- tools::file_path_as_absolute(dir) 602 603 vinfo <- tools::pkgVignettes(dir = dir) 604 files <- vinfo$docs 605 if(!length(files)) return(aspell(character())) 606 607 ## We need the package encoding to read the defaults file ... 608 meta <- tools:::.get_package_metadata(dir, installed = FALSE) 609 if(is.na(encoding <- meta["Encoding"])) 610 encoding <- "unknown" 611 612 defaults <- .aspell_package_defaults(dir, encoding)$vignettes 613 if(!is.null(defaults)) { 614 if(!is.null(d <- defaults$control)) 615 control <- d 616 if(!is.null(d <- defaults$program)) 617 program <- d 618 if(!is.null(d <- defaults$dictionaries)) { 619 dictionaries <- 620 aspell_find_dictionaries(d, file.path(dir, ".aspell")) 621 } 622 ## <FIXME> 623 ## Deprecated in favor of specifying R level dictionaries. 624 ## Maybe give a warning (in particular if both are given)? 625 if(!is.null(d <- defaults$personal)) 626 control <- c(control, 627 sprintf("-p %s", 628 shQuote(file.path(dir, ".aspell", d)))) 629 ## </FIXME> 630 } 631 632 program <- aspell_find_program(program) 633 634 fgroups <- split(files, vinfo$engines) 635 egroups <- split(vinfo$encodings, vinfo$engines) 636 637 do.call(rbind, 638 Map(function(fgroup, egroup, engine) { 639 engine <- tools::vignetteEngine(engine) 640 aspell(fgroup, 641 filter = engine$aspell$filter, 642 control = 643 c(engine$aspell$control, 644 aspell_control_package_vignettes[[names(program)]], 645 control), 646 encoding = egroup, 647 program = program, 648 dictionaries = dictionaries) 649 }, 650 fgroups, 651 egroups, 652 names(fgroups) 653 ) 654 ) 655} 656 657## Spell-checking R files. 658 659aspell_filter_db$R <- 660function(ifile, encoding = "unknown", ignore = character()) 661{ 662 pd <- get_parse_data_for_message_strings(ifile, encoding) 663 if(is.null(pd) || !NROW(pd)) return(character()) 664 665 ## Strip the string delimiters. 666 pd$text <- substring(pd$text, 2L, nchar(pd$text) - 1L) 667 ## Replace whitespace C backslash escape sequences by whitespace. 668 pd$text <- gsub("(^|[^\\])\\\\[fnrt]", "\\1 ", pd$text) 669 pd$text <- gsub( "([^\\])\\\\[fnrt]", "\\1 ", pd$text) 670 ## (Do this twice for now because in e.g. 671 ## \n\t\tInformation on package %s 672 ## the first \t is not matched the first time. Alternatively, we 673 ## could match with 674 ## (^|[^\\])((\\\\[fnrt])+) 675 ## but then computing the replacement (\\1 plus as many blanks as 676 ## the characters in \\2) is not straightforward. 677 ## For gettextf() calls, replace basic percent escape sequences by 678 ## whitespace. 679 ind <- pd$caller == "gettextf" 680 if(any(ind)) { 681 pd$text[ind] <- 682 gsub("(^|[^%])%[dioxXfeEgGaAs]", "\\1 ", pd$text[ind]) 683 pd$text[ind] <- 684 gsub(" ([^%])%[dioxXfeEgGaAs]", "\\1 ", pd$text[ind]) 685 ## (See above for doing this twice.) 686 } 687 688 lines <- readLines(ifile, encoding = encoding, warn = FALSE) 689 690 ## Column positions in the parse data have tabs expanded to tab 691 ## stops using a tab width of 8, so for lines with tabs we need to 692 ## map the column positions back to character positions. 693 lines_in_pd <- sort(unique(c(pd$line1, pd$line2))) 694 tab <- Map(function(tp, nc) { 695 if(tp[1L] == -1L) return(NULL) 696 widths <- rep.int(1, nc) 697 for(i in tp) { 698 cols <- cumsum(widths) 699 widths[i] <- 8 - (cols[i] - 1) %% 8 700 } 701 cumsum(widths) 702 }, 703 gregexpr("\t", lines[lines_in_pd], fixed = TRUE), 704 nchar(lines[lines_in_pd])) 705 names(tab) <- lines_in_pd 706 707 lines[lines_in_pd] <- gsub("[^\t]", " ", lines[lines_in_pd]) 708 lines[-lines_in_pd] <- "" 709 710 for(entry in split(pd, seq_len(NROW(pd)))) { 711 line1 <- entry$line1 712 line2 <- entry$line2 713 col1 <- entry$col1 714 col2 <- entry$col2 715 if(line1 == line2) { 716 if(length(ptab <- tab[[as.character(line1)]])) { 717 col1 <- which(ptab == col1) + 1L 718 col2 <- which(ptab == col2) - 1L 719 } 720 substring(lines[line1], col1, col2) <- entry$text 721 } else { 722 texts <- unlist(strsplit(entry$text, "\n", fixed = TRUE)) 723 n <- length(texts) 724 if(length(ptab <- tab[[as.character(line1)]])) { 725 col1 <- which(ptab == col1) + 1L 726 } 727 substring(lines[line1], col1) <- texts[1L] 728 pos <- seq.int(from = 2L, length.out = n - 2L) 729 if(length(pos)) 730 lines[line1 + pos - 1] <- texts[pos] 731 if(length(ptab <- tab[[as.character(line2)]])) { 732 col2 <- which(ptab == col2) - 1L 733 } 734 substring(lines[line2], 1L, col2) <- texts[n] 735 } 736 } 737 738 blank_out_ignores_in_lines(lines, ignore) 739} 740 741get_parse_data_for_message_strings <- 742function(file, encoding = "unknown") 743{ 744 ## The message strings considered are the string constants subject to 745 ## translation in gettext-family calls (see below for details). 746 747 exprs <- 748 suppressWarnings(tools:::.parse_code_file(file = file, 749 encoding = encoding, 750 keep.source = TRUE)) 751 if(!length(exprs)) return(NULL) 752 753 pd <- getParseData(exprs) 754 755 ## Function for computing grandparent ids. 756 parents <- pd$parent 757 names(parents) <- pd$id 758 gpids <- function(ids) 759 parents[as.character(parents[as.character(ids)])] 760 761 ind <- (pd$token == "SYMBOL_FUNCTION_CALL") & 762 !is.na(match(pd$text, 763 c("warning", "stop", 764 "message", "packageStartupMessage", 765 "gettext", "gettextf", "ngettext"))) 766 767 funs <- pd$text[ind] 768 769 ids <- gpids(pd$id[ind]) 770 calls <- getParseText(pd, ids) 771 772 table <- pd[pd$token == "STR_CONST", ] 773 ## Could have run into truncation ... 774 table$text <- getParseText(table, table$id) 775 pos <- match(gpids(table$id), ids) 776 ind <- !is.na(pos) 777 table <- split(table[ind, ], factor(pos[ind], seq_along(ids))) 778 779 ## We have synopses 780 ## message(..., domain = NULL, appendLF = TRUE) 781 ## packageStartupMessage(..., domain = NULL, appendLF = TRUE) 782 ## warning(..., call. = TRUE, immediate. = FALSE, domain = NULL) 783 ## stop(..., call. = TRUE, domain = NULL) 784 ## gettext(..., domain = NULL) 785 ## ngettext(n, msg1, msg2, domain = NULL) 786 ## gettextf(fmt, ..., domain = NULL) 787 ## For the first five, we simply take all unnamed strings. 788 ## (Could make this more precise, of course.) 789 ## For the latter two, we take the msg1/msg2 and fmt arguments, 790 ## provided these are strings. 791 792 ## <NOTE> 793 ## Using domain = NA inhibits translation: perhaps it should 794 ## optionally also inhibit spell checking? 795 ## </NOTE> 796 797 extract_message_strings <- function(fun, call, table) { 798 ## Matching a call containing ... gives 799 ## Error in match.call(message, call) : 800 ## ... used in a situation where it doesn't exist 801 ## so eliminate these. 802 ## (Note that we also drop "..." strings.) 803 call <- str2lang(call) 804 call <- call[ as.character(call) != "..." ] 805 mc <- as.list(match.call(get(fun, envir = .BaseNamespaceEnv), 806 call)) 807 args <- if(fun == "gettextf") 808 mc["fmt"] 809 else if(fun == "ngettext") 810 mc[c("msg1", "msg2")] 811 else { 812 if(!is.null(names(mc))) 813 mc <- mc[!nzchar(names(mc))] 814 mc[-1L] 815 } 816 strings <- as.character(args[vapply(args, is.character, TRUE)]) 817 ## Need to canonicalize to match string constants before and 818 ## after parsing ... 819 texts <- vapply(str2expression(table$text), as.character, "") 820 pos <- which(!is.na(match(texts, strings))) 821 cbind(table[pos, ], caller = rep.int(fun, length(pos))) 822 } 823 824 do.call(rbind, 825 Map(extract_message_strings, 826 as.list(funs), as.list(calls), table)) 827} 828 829## For spell-checking the R R files. 830 831aspell_R_R_files <- 832function(which = NULL, dir = NULL, 833 ignore = c("[ \t]'[^']*'[ \t[:punct:]]", 834 "[ \t][[:alnum:]_.]*\\(\\)[ \t[:punct:]]"), 835 program = NULL, dictionaries = aspell_dictionaries_R) 836{ 837 if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd() 838 if(is.null(which)) 839 which <- tools:::.get_standard_package_names()$base 840 841 files <- 842 unlist(lapply(file.path(dir, "src", "library", which, "R"), 843 tools::list_files_with_type, 844 "code", 845 OS_subdirs = c("unix", "windows")), 846 use.names = FALSE) 847 848 program <- aspell_find_program(program) 849 850 aspell(files, 851 filter = list("R", ignore = ignore), 852 control = aspell_control_R_Rd_files[[names(program)]], 853 program = program, 854 dictionaries = dictionaries) 855} 856 857## For spell-checking R files in a package. 858 859aspell_package_R_files <- 860function(dir, ignore = character(), 861 control = list(), program = NULL, dictionaries = character()) 862{ 863 dir <- tools::file_path_as_absolute(dir) 864 865 subdir <- file.path(dir, "R") 866 files <- if(dir.exists(subdir)) 867 tools::list_files_with_type(subdir, 868 "code", 869 OS_subdirs = c("unix", "windows")) 870 else character() 871 872 meta <- tools:::.get_package_metadata(dir, installed = FALSE) 873 if(is.na(encoding <- meta["Encoding"])) 874 encoding <- "unknown" 875 876 defaults <- .aspell_package_defaults(dir, encoding)$R_files 877 if(!is.null(defaults)) { 878 if(!is.null(d <- defaults$ignore)) 879 ignore <- d 880 if(!is.null(d <- defaults$control)) 881 control <- d 882 if(!is.null(d <- defaults$program)) 883 program <- d 884 if(!is.null(d <- defaults$dictionaries)) { 885 dictionaries <- 886 aspell_find_dictionaries(d, file.path(dir, ".aspell")) 887 } 888 } 889 890 program <- aspell_find_program(program) 891 892 aspell(files, 893 filter = list("R", ignore = ignore), 894 control = control, 895 encoding = encoding, 896 program = program, 897 dictionaries = dictionaries) 898} 899 900## Spell-checking pot files. 901 902## (Of course, directly analyzing the message strings would be more 903## useful, but require writing appropriate text filters.) 904 905## See also tools:::checkPoFile(). 906 907aspell_filter_db$pot <- 908function (ifile, encoding = "unknown", ignore = character()) 909{ 910 lines <- readLines(ifile, encoding = encoding, warn = FALSE) 911 912 ind <- grepl("^msgid[ \t]", lines) 913 914 do_entry <- function(s) { 915 out <- character(length(s)) 916 i <- 1L 917 out[i] <- blank_out_regexp_matches(s[i], "^msgid[ \t]+\"") 918 while(startsWith(s[i <- i + 1L], '"')) 919 out[i] <- sub("^\"", " ", s[i]) 920 if(grepl("^msgid_plural[ \t]", s[i])) { 921 out[i] <- blank_out_regexp_matches(s[i], "^msgid_plural[ \t]+\"") 922 while(startsWith(s[i <- i + 1L], '"')) 923 out[i] <- sub("^\"", " ", s[i]) 924 } 925 out 926 } 927 928 entries <- split(lines, cumsum(ind)) 929 lines <- c(character(length(entries[[1L]])), 930 as.character(do.call(c, lapply(entries[-1L], do_entry)))) 931 932 lines <- sub("\"[ \t]*$", " ", lines) 933 934 ## <FIXME> 935 ## Could replace backslash escapes for blanks and percent escapes by 936 ## blanks, similar to what the R text filter does. 937 ## </FIXME> 938 939 blank_out_ignores_in_lines(lines, ignore) 940} 941 942## For spell-checking all pot files in a package. 943 944aspell_package_pot_files <- 945function(dir, ignore = character(), 946 control = list(), program = NULL, dictionaries = character()) 947{ 948 dir <- tools::file_path_as_absolute(dir) 949 subdir <- file.path(dir, "po") 950 files <- if(dir.exists(subdir)) 951 Sys.glob(file.path(subdir, "*.pot")) 952 else character() 953 954 meta <- tools:::.get_package_metadata(dir, installed = FALSE) 955 if(is.na(encoding <- meta["Encoding"])) 956 encoding <- "unknown" 957 958 program <- aspell_find_program(program) 959 960 aspell(files, 961 filter = list("pot", ignore = ignore), 962 control = control, 963 encoding = encoding, 964 program = program, 965 dictionaries = dictionaries) 966} 967 968## For spell-checking the R C files. 969 970aspell_R_C_files <- 971function(which = NULL, dir = NULL, 972 ignore = c("[ \t]'[[:alnum:]_.]*'[ \t[:punct:]]", 973 "[ \t][[:alnum:]_.]*\\(\\)[ \t[:punct:]]"), 974 program = NULL, dictionaries = aspell_dictionaries_R) 975{ 976 if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd() 977 if(is.null(which)) 978 which <- tools:::.get_standard_package_names()$base 979 if(!is.na(pos <- match("base", which))) 980 which[pos] <- "R" 981 982 files <- sprintf("%s.pot", 983 file.path(dir, "src", "library", 984 which, "po", which)) 985 files <- files[file_test("-f", files)] 986 987 program <- aspell_find_program(program) 988 989 aspell(files, 990 filter = list("pot", ignore = ignore), 991 control = aspell_control_R_Rd_files[[names(program)]], 992 program = program, 993 dictionaries = dictionaries) 994} 995 996## For spell-checking package C files. 997 998aspell_package_C_files <- 999function(dir, ignore = character(), 1000 control = list(), program = NULL, dictionaries = character()) 1001{ 1002 dir <- tools::file_path_as_absolute(dir) 1003 ## Assume that the package C message template file is shipped as 1004 ## 'po/PACKAGE.pot'. 1005 files <- file.path(dir, "po", 1006 paste(basename(dir), "pot", collapse = ".")) 1007 files <- files[file_test("-f", files)] 1008 1009 meta <- tools:::.get_package_metadata(dir, installed = FALSE) 1010 if(is.na(encoding <- meta["Encoding"])) 1011 encoding <- "unknown" 1012 1013 defaults <- .aspell_package_defaults(dir, encoding)$C_files 1014 if(!is.null(defaults)) { 1015 if(!is.null(d <- defaults$ignore)) 1016 ignore <- d 1017 if(!is.null(d <- defaults$control)) 1018 control <- d 1019 if(!is.null(d <- defaults$program)) 1020 program <- d 1021 if(!is.null(d <- defaults$dictionaries)) { 1022 dictionaries <- 1023 aspell_find_dictionaries(d, file.path(dir, ".aspell")) 1024 } 1025 } 1026 1027 program <- aspell_find_program(program) 1028 1029 aspell(files, 1030 filter = list("pot", ignore = ignore), 1031 control = control, 1032 encoding = encoding, 1033 program = program, 1034 dictionaries = dictionaries) 1035} 1036 1037## Spell-checking DCF files. 1038 1039aspell_filter_db$dcf <- 1040function(ifile, encoding, keep = c("Title", "Description"), 1041 ignore = character()) 1042{ 1043 lines <- readLines(ifile, encoding = encoding, warn = FALSE) 1044 line_has_tags <- grepl("^[^[:blank:]][^:]*:", lines) 1045 tags <- sub(":.*", "", lines[line_has_tags]) 1046 lines[line_has_tags] <- 1047 blank_out_regexp_matches(lines[line_has_tags], "^[^:]*:") 1048 lines <- split(lines, cumsum(line_has_tags)) 1049 ind <- is.na(match(tags, keep)) 1050 lines[ind] <- lapply(lines[ind], function(s) rep.int("", length(s))) 1051 ind <- !ind 1052 lines[ind] <- lapply(lines[ind], paste0, " ") 1053 lines <- unlist(lines, use.names = FALSE) 1054 blank_out_ignores_in_lines(lines, ignore) 1055} 1056 1057## For spell-checking package DESCRIPTION files. 1058 1059aspell_package_description <- 1060function(dir, ignore = character(), 1061 control = list(), program = NULL, dictionaries = character()) 1062{ 1063 dir <- tools::file_path_as_absolute(dir) 1064 files <- file.path(dir, "DESCRIPTION") 1065 1066 meta <- tools:::.get_package_metadata(dir, installed = FALSE) 1067 if(is.na(encoding <- meta["Encoding"])) 1068 encoding <- "unknown" 1069 1070 ## Allow providing package defaults but make this controllable via 1071 ## _R_ASPELL_USE_DEFAULTS_FOR_PACKAGE_DESCRIPTION_ 1072 ## to safeguard against possible mis-use for CRAN incoming checks. 1073 defaults <- 1074 Sys.getenv("_R_ASPELL_USE_DEFAULTS_FOR_PACKAGE_DESCRIPTION_", 1075 "TRUE") 1076 defaults <- if(tools:::config_val_to_logical(defaults)) { 1077 .aspell_package_defaults(dir, encoding)$description 1078 } else NULL 1079 if(!is.null(defaults)) { 1080 if(!is.null(d <- defaults$ignore)) 1081 ignore <- d 1082 if(!is.null(d <- defaults$control)) 1083 control <- d 1084 if(!is.null(d <- defaults$program)) 1085 program <- d 1086 if(!is.null(d <- defaults$dictionaries)) { 1087 dictionaries <- 1088 aspell_find_dictionaries(d, file.path(dir, ".aspell")) 1089 } 1090 } 1091 1092 program <- aspell_find_program(program) 1093 1094 aspell(files, 1095 filter = list("dcf", ignore = ignore), 1096 control = control, 1097 encoding = encoding, 1098 program = program, 1099 dictionaries = dictionaries) 1100} 1101 1102## Spell-checking Markdown files. 1103 1104aspell_filter_db$md <- 1105function(ifile, encoding = "UTF-8") 1106{ 1107 x <- readLines(ifile, encoding = encoding, warn = FALSE) 1108 n <- nchar(x) 1109 y <- strrep(rep.int(" ", length(x)), n) 1110 ## Determine positions of 'texts' along the lines of 1111 ## spelling::parse_text_md () by Jeroen Ooms. 1112 md <- commonmark::markdown_xml(x, extensions = TRUE, 1113 sourcepos = TRUE) 1114 doc <- xml2::xml_ns_strip(xml2::read_xml(md)) 1115 pos <- strsplit(xml2::xml_attr(xml2::xml_find_all(doc, 1116 "//text[@sourcepos]"), 1117 "sourcepos"), 1118 "[:-]") 1119 ## Now use the following idea. 1120 ## Each elt of pos now has positions for l1:c1 to l2:c2. 1121 ## If l1 < l2 1122 ## Lines in (l1, l2) are taken as a whole 1123 ## Line l1 from c1 to nchar for l1 1124 ## Line l2 from 1 to c1 1125 ## otherwise 1126 ## Line l1 from c1 to c2. 1127 for(p in pos) { 1128 p <- as.integer(p) 1129 ## Legibility ... 1130 l1 <- p[1L]; c1 <- p[2L]; l2 <- p[3L]; c2 <- p[4L] 1131 if(l1 < l2) { 1132 substring(y[l1], c1, n[l1]) <- substring(x[l1], c1, n[l1]) 1133 if(l1 + 1L < l2) { 1134 w <- seq.int(from = l1 + 1L, to = l2 - 1L) 1135 y[w] <- x[w] 1136 } 1137 substring(y[l2], 1L, c2) <- substring(x[l2], 1L, c2) 1138 } else { 1139 substring(y[l1], c1, c2) <- substring(x[l1], c1, c2) 1140 } 1141 } 1142 y 1143} 1144 1145## For spell checking packages. 1146 1147aspell_package <- 1148function(dir, 1149 control = list(), program = NULL, dictionaries = character()) 1150{ 1151 args <- list(dir = dir, 1152 program = program, 1153 control = control, 1154 dictionaries = dictionaries) 1155 a <- rbind(do.call(aspell_package_description, args), 1156 do.call(aspell_package_Rd_files, args), 1157 do.call(aspell_package_vignettes, args), 1158 do.call(aspell_package_R_files, args), 1159 do.call(aspell_package_C_files, args)) 1160 if(nrow(a)) { 1161 a$File <- tools:::.file_path_relative_to_dir(a$File, 1162 dirname(dir)) 1163 } 1164 a 1165} 1166 1167## For writing personal dictionaries: 1168 1169aspell_write_personal_dictionary_file <- 1170function(x, out, language = "en", program = NULL) 1171{ 1172 if(inherits(x, "aspell")) 1173 x <- sort(unique(x$Original)) 1174 1175 program <- aspell_find_program(program) 1176 if(is.na(program)) 1177 stop("No suitable spell check program found.") 1178 1179 ## <NOTE> 1180 ## Ispell and Hunspell take simple word lists as personal dictionary 1181 ## files, but Aspell requires a special format, see e.g. 1182 ## http://aspell.net/man-html/Format-of-the-Personal-and-Replacement-Dictionaries.html 1183 ## and one has to create these by hand, as 1184 ## aspell --lang=en create personal ./foo "a b c" 1185 ## gives: Sorry "create/merge personal" is currently unimplemented. 1186 1187 ## Encodings are a nightmare. 1188 ## Try to canonicalize to UTF-8 for Aspell (which allows recording 1189 ## the encoding in the personal dictionary). 1190 ## <FIXME> 1191 ## What should we do for Hunspell (which can handle UTF-8, but has 1192 ## no encoding information in the personal dictionary), or Ispell 1193 ## (which cannot handle UTF-8)? 1194 ## </FIXME> 1195 1196 if(names(program) == "aspell") { 1197 header <- sprintf("personal_ws-1.1 %s %d UTF-8", 1198 language, length(x)) 1199 x <- enc2utf8(x) 1200 } 1201 else { 1202 header <- NULL 1203 } 1204 1205 writeLines(c(header, x), out, useBytes = TRUE) 1206} 1207 1208## For reading package defaults: 1209 1210.aspell_package_defaults <- 1211function(dir, encoding = "unknown") 1212{ 1213 dfile <- file.path(dir, ".aspell", "defaults.R") 1214 if(!file_test("-f", dfile)) 1215 return(NULL) 1216 exprs <- parse(dfile, encoding = encoding) 1217 envir <- new.env() 1218 for(e in exprs) eval(e, envir) 1219 as.list(envir) 1220} 1221 1222## Utilities. 1223 1224blank_out_regexp_matches <- 1225function(s, re, ...) 1226{ 1227 m <- gregexpr(re, s, ...) 1228 regmatches(s, m) <- 1229 Map(function(n) strrep(" ", n), 1230 lapply(regmatches(s, m), nchar)) 1231 s 1232} 1233 1234blank_out_ignores_in_lines <- 1235function(lines, ignore) 1236{ 1237 args <- list() 1238 if(is.list(ignore)) { 1239 args <- ignore[-1L] 1240 ignore <- ignore[[1L]] 1241 } 1242 for(re in ignore[nzchar(ignore)]) 1243 lines <- do.call(blank_out_regexp_matches, 1244 c(list(lines, re), args)) 1245 lines 1246} 1247 1248find_files_in_directories <- 1249function(basenames, dirnames) 1250{ 1251 dirnames <- dirnames[dir.exists(dirnames)] 1252 dirnames <- normalizePath(dirnames, "/") 1253 1254 out <- character(length(basenames)) 1255 pos <- seq_along(out) 1256 1257 for(dir in dirnames) { 1258 paths <- file.path(dir, basenames[pos]) 1259 ind <- file_test("-f", paths) 1260 out[pos[ind]] <- paths[ind] 1261 pos <- pos[!ind] 1262 if(!length(pos)) break 1263 } 1264 1265 out 1266} 1267