1#' Convert a document with pandoc 2#' 3#' Convert documents to and from various formats using the pandoc utility. 4#' 5#' @param input Character vector containing paths to input files 6#' (files must be UTF-8 encoded) 7#' @param to Format to convert to (if not specified, you must specify 8#' \code{output}) 9#' @param from Format to convert from (if not specified then the format is 10#' determined based on the file extension of \code{input}). 11#' @param output Output file (if not specified then determined based on format 12#' being converted to) 13#' @param citeproc \code{TRUE} to run the pandoc-citeproc filter (for processing 14#' citations) as part of the conversion 15#' @param options Character vector of command line options to pass to pandoc. 16#' @param verbose \code{TRUE} to show the pandoc command line which was executed 17#' @param wd Working directory in which code will be executed. If not 18#' supplied, defaults to the common base directory of \code{input} 19#' 20#' @details Supported input and output formats are described in the 21#' \href{http://johnmacfarlane.net/pandoc/README.html}{pandoc user guide}. 22#' 23#' The system path as well as the version of pandoc shipped with RStudio (if 24#' running under RStudio) are scanned for pandoc and the highest version 25#' available is used. 26#' 27#' @examples 28#' \dontrun{ 29#' library(rmarkdown) 30#' 31#' # convert markdown to various formats 32#' pandoc_convert("input.md", to = "html") 33#' pandoc_convert("input.md", to = "pdf") 34#' 35#' # process citations 36#' pandoc_convert("input.md", to = "html", citeproc = TRUE) 37#' 38#' # add some pandoc options 39#' pandoc_convert("input.md", to="pdf", options = c("--listings")) 40#' } 41#' 42#' @export 43pandoc_convert <- function(input, 44 to = NULL, 45 from = NULL, 46 output = NULL, 47 citeproc = FALSE, 48 options = NULL, 49 verbose = FALSE, 50 wd = NULL) { 51 52 # ensure we've scanned for pandoc 53 find_pandoc() 54 55 # execute in specified working directory 56 if (is.null(wd)) { 57 wd <- base_dir(input) 58 } 59 oldwd <- setwd(wd) 60 on.exit(setwd(oldwd), add = TRUE) 61 62 63 # input file and formats 64 args <- c(input) 65 if (!is.null(to)) 66 args <- c(args, "--to", to) 67 if (!is.null(from)) 68 args <- c(args, "--from", from) 69 70 # output file 71 if (!is.null(output)) 72 args <- c(args, "--output", output) 73 74 # set pandoc stack size 75 stack_size <- getOption("pandoc.stack.size", default = "512m") 76 args <- c(c("+RTS", paste0("-K", stack_size), "-RTS"), args) 77 78 # additional command line options 79 args <- c(args, options) 80 81 # citeproc filter if requested 82 if (citeproc) { 83 args <- c(args, "--filter", pandoc_citeproc()) 84 # --natbib/--biblatex conflicts with '--filter pandoc-citeproc' 85 i <- stats::na.omit(match(c("--natbib", "--biblatex"), options)) 86 if (length(i)) options <- options[-i] 87 } 88 89 # build the conversion command 90 command <- paste(quoted(pandoc()), paste(quoted(args), collapse = " ")) 91 92 # show it in verbose mode 93 if (verbose) 94 cat(command, "\n") 95 96 # run the conversion 97 with_pandoc_safe_environment({ 98 result <- system(command) 99 }) 100 if (result != 0) 101 stop("pandoc document conversion failed with error ", result, call. = FALSE) 102 103 invisible(NULL) 104} 105 106#' Check pandoc availabilty and version 107#' 108#' Determine whether pandoc is currently available on the system (optionally 109#' checking for a specific version or greater). Determine the specific version 110#' of pandoc available. 111#' 112#' @param version Required version of pandoc 113#' @param error Whether to signal an error if pandoc with the required version 114#' is not found 115#' 116#' @return \code{pandoc_available} returns a logical indicating whether the 117#' required version of pandoc is available. \code{pandoc_version} returns a 118#' \code{\link[base]{numeric_version}} with the version of pandoc found. 119#' 120#' @details 121#' 122#' The system environment variable \samp{PATH} as well as the version of pandoc 123#' shipped with RStudio (its location is set via the environment variable 124#' \samp{RSTUDIO_PANDOC} by RStudio products like the RStudio IDE, RStudio 125#' Server, Shiny Server, and RStudio Connect, etc) are scanned for pandoc and 126#' the highest version available is used. Please do not modify the environment 127#' varaible \samp{RSTUDIO_PANDOC} unless you know what it means. 128#' 129#' @examples 130#' \dontrun{ 131#' library(rmarkdown) 132#' 133#' if (pandoc_available()) 134#' cat("pandoc", as.character(pandoc_version()), "is available!\n") 135#' 136#' if (pandoc_available("1.12.3")) 137#' cat("requried version of pandoc is available!\n") 138#' } 139#' @export 140pandoc_available <- function(version = NULL, error = FALSE) { 141 142 # ensure we've scanned for pandoc 143 find_pandoc() 144 145 # check availability 146 found <- !is.null(.pandoc$dir) && (is.null(version) || .pandoc$version >= version) 147 148 msg <- c( 149 "pandoc", if (!is.null(version)) c("version", version, "or higher"), 150 "is required and was not found (see the help page ?rmarkdown::pandoc_available)." 151 ) 152 if (error && !found) stop(paste(msg, collapse = " "), call. = FALSE) 153 154 found 155} 156 157 158#' @rdname pandoc_available 159#' @export 160pandoc_version <- function() { 161 find_pandoc() 162 .pandoc$version 163} 164 165#' Functions for generating pandoc command line arguments 166#' 167#' Functions that assist in creating various types of pandoc command line 168#' arguments (e.g. for templates, table of contents, highlighting, and content 169#' includes) 170#' 171#' @inheritParams includes 172#' 173#' @param name Name of template variable to set. 174#' @param value Value of template variable (defaults to \code{true} if missing). 175#' @param toc \code{TRUE} to include a table of contents in the output. 176#' @param toc_depth Depth of headers to include in table of contents. 177#' @param highlight The name of a pandoc syntax highlighting theme. 178#' @param latex_engine LaTeX engine for producing PDF output. Options are 179#' "pdflatex", "lualatex", and "xelatex". 180#' @param default The highlighting theme to use if "default" 181#' is specified. 182#' 183#' @return A character vector with pandoc command line arguments 184#' 185#' @details Non-absolute paths for resources referenced from the 186#' \code{in_header}, \code{before_body}, and \code{after_body} 187#' parameters are resolved relative to the directory of the input document. 188#' 189#' @examples 190#' \dontrun{ 191#' 192#' library(rmarkdown) 193#' 194#' pandoc_include_args(before_body = "header.htm") 195#' pandoc_include_args(before_body = "header.tex") 196#' 197#' pancoc_highlight_args("kate") 198#' 199#' pandoc_latex_engine_args("pdflatex") 200#' 201#' pandoc_toc_args(toc = TRUE, toc_depth = 2) 202#' 203#' } 204#' @name pandoc_args 205NULL 206 207#' @rdname pandoc_args 208#' @export 209pandoc_variable_arg <- function(name, value) { 210 c("--variable", if (missing(value)) name else paste(name, "=", value, sep = "")) 211} 212 213 214#' @rdname pandoc_args 215#' @export 216pandoc_include_args <- function(in_header = NULL, 217 before_body = NULL, 218 after_body = NULL) { 219 args <- c() 220 221 for (file in in_header) 222 args <- c(args, "--include-in-header", pandoc_path_arg(file)) 223 224 for (file in before_body) 225 args <- c(args, "--include-before-body", pandoc_path_arg(file)) 226 227 for (file in after_body) 228 args <- c(args, "--include-after-body", pandoc_path_arg(file)) 229 230 args 231} 232 233#' @rdname pandoc_args 234#' @export 235pandoc_highlight_args <- function(highlight, default = "tango") { 236 237 args <- c() 238 239 if (is.null(highlight)) 240 args <- c(args, "--no-highlight") 241 else { 242 if (identical(highlight, "default")) 243 highlight <- default 244 args <- c(args, "--highlight-style", highlight) 245 } 246 247 args 248} 249 250#' @rdname pandoc_args 251#' @export 252pandoc_latex_engine_args <- function(latex_engine) { 253 c(if (pandoc2.0()) "--pdf-engine" else "--latex-engine", 254 find_latex_engine(latex_engine)) 255} 256 257# For macOS, use a full path to the latex engine since the stripping 258# of the PATH environment variable by OSX 10.10 Yosemite prevents 259# pandoc from finding the engine in e.g. /usr/texbin 260find_latex_engine <- function(latex_engine) { 261 # do not need full path if latex_engine is available from PATH 262 if (!is_osx() || nzchar(Sys.which(latex_engine))) return(latex_engine) 263 # resolve path if it's not already an absolute path 264 if (!grepl("/", latex_engine) && nzchar(path <- find_program(latex_engine))) 265 latex_engine <- path 266 latex_engine 267} 268 269#' @rdname pandoc_args 270#' @export 271pandoc_toc_args <- function(toc, toc_depth = 3) { 272 273 args <- c() 274 275 if (toc) { 276 args <- c(args, "--table-of-contents") 277 args <- c(args, "--toc-depth", toc_depth) 278 } 279 280 args 281} 282 283 284#' Transform path for passing to pandoc 285#' 286#' Transform a path for passing to pandoc on the command line. Calls 287#' \code{\link[base:path.expand]{path.expand}} on all platforms. On Windows, 288#' transform it to a short path name if it contains spaces, and then convert 289#' forward slashes to back slashes (as required by pandoc for some path 290#' references) 291#' 292#' @param path Path to transform 293#' @param backslash Whether to replace forward slashes in \code{path} with 294#' backslashes on Windows 295#' 296#' @return Transformed path that can be passed to pandoc on the command line 297#' 298#' @export 299pandoc_path_arg <- function(path, backslash = TRUE) { 300 301 path <- path.expand(path) 302 303 # remove redundant ./ prefix if present 304 path <- sub('^[.]/', '', path) 305 306 if (is_windows()) { 307 i <- grep(' ', path) 308 if (length(i)) 309 path[i] <- utils::shortPathName(path[i]) 310 if (backslash) path <- gsub('/', '\\\\', path) 311 } 312 313 path 314} 315 316 317#' Render a pandoc template. 318#' 319#' Use the pandoc templating engine to render a text file. Substitutions are 320#' done using the \code{metadata} list passed to the function. 321#' 322#' @param metadata A named list containing metadata to pass to template. 323#' @param template Path to a pandoc template. 324#' @param output Path to save output. 325#' @param verbose \code{TRUE} to show the pandoc command line which was 326#' executed. 327#' @return (Invisibly) The path of the generated file. 328#' 329#' @export 330pandoc_template <- function(metadata, template, output, verbose = FALSE) { 331 332 tmp <- tempfile(fileext = ".md") 333 on.exit(unlink(tmp)) 334 335 cat("---\n", file = tmp) 336 cat(yaml::as.yaml(metadata), file = tmp, append = TRUE) 337 cat("---\n", file = tmp, append = TRUE) 338 cat("\n", file = tmp, append = TRUE) 339 340 pandoc_convert(tmp, "markdown", output = output, 341 options = paste0("--template=", template), 342 verbose = verbose) 343 344 invisible(output) 345} 346 347#' Create a self-contained HTML document using pandoc. 348#' 349#' Create a self-contained HTML document by base64 encoding images, 350#' scripts, and stylesheets referended by the input document. 351#' 352#' @param input Input html file to create self-contained version of. 353#' @param output Path to save output. 354#' 355#' @return (Invisibly) The path of the generated file. 356#' 357#' @export 358pandoc_self_contained_html <- function(input, output) { 359 360 # make input file path absolute 361 input <- normalizePath(input) 362 363 # ensure output file exists and make it's path absolute 364 if (!file.exists(output)) 365 file.create(output) 366 output <- normalizePath(output) 367 368 # create a simple body-only template 369 template <- tempfile(fileext = ".html") 370 on.exit(unlink(template), add = TRUE) 371 writeLines("$body$", template) 372 373 # convert from markdown to html to get base64 encoding 374 # (note there is no markdown in the source document but 375 # we still need to do this "conversion" to get the 376 # base64 encoding) 377 378 # determine from (there are bugs in pandoc < 1.17 that 379 # cause markdown_strict to hang on very large script 380 # elements) 381 from <- if (pandoc_available("1.17")) 382 "markdown_strict" 383 else 384 "markdown" 385 386 # do the conversion 387 pandoc_convert( 388 input = input, 389 from = from, 390 output = output, 391 options = c( 392 "--self-contained", 393 "--template", template 394 ) 395 ) 396 397 invisible(output) 398} 399 400 401 402validate_self_contained <- function(mathjax) { 403 if (identical(mathjax, "local")) 404 stop("Local MathJax isn't compatible with self_contained\n", 405 "(you should set self_contained to FALSE)", call. = FALSE) 406} 407 408pandoc_mathjax_args <- function(mathjax, 409 template, 410 self_contained, 411 files_dir, 412 output_dir) { 413 args <- c() 414 415 if (!is.null(mathjax)) { 416 417 if (identical(mathjax, "default")) { 418 if (identical(template, "default")) 419 mathjax <- default_mathjax() 420 else 421 mathjax <- NULL 422 } 423 else if (identical(mathjax, "local")) { 424 mathjax_path <- pandoc_mathjax_local_path() 425 mathjax_path <- render_supporting_files(mathjax_path, 426 files_dir, 427 "mathjax-local") 428 mathjax <- paste(normalized_relative_to(output_dir, mathjax_path), "/", 429 mathjax_config(), sep = "") 430 } 431 432 if (identical(template, "default")) { 433 args <- c(args, "--mathjax") 434 args <- c(args, "--variable", paste0("mathjax-url:", mathjax)) 435 } else if (!self_contained) { 436 args <- c(args, "--mathjax") 437 if (!is.null(mathjax)) 438 args <- c(args, mathjax) 439 } else { 440 warning("MathJax doesn't work with self_contained when not ", 441 "using the rmarkdown \"default\" template.", call. = FALSE) 442 } 443 444 } 445 446 args 447} 448 449 450pandoc_mathjax_local_path <- function() { 451 452 local_path <- Sys.getenv("RMARKDOWN_MATHJAX_PATH", unset = NA) 453 if (is.na(local_path)) { 454 local_path <- unix_mathjax_path() 455 if (is.na(local_path)) { 456 stop("For mathjax = \"local\", please set the RMARKDOWN_MATHJAX_PATH ", 457 "environment variable to the location of MathJax. ", 458 "On Linux systems you can also install MathJax using your ", 459 "system package manager.") 460 } else { 461 local_path 462 } 463 } else { 464 local_path 465 } 466} 467 468 469unix_mathjax_path <- function() { 470 if (identical(.Platform$OS.type, "unix")) { 471 mathjax_path <- "/usr/share/javascript/mathjax" 472 if (file.exists(file.path(mathjax_path, "MathJax.js"))) 473 mathjax_path 474 else 475 NA 476 } else { 477 NA 478 } 479} 480 481 482pandoc_html_highlight_args <- function(template, highlight) { 483 484 args <- c() 485 486 if (is.null(highlight)) { 487 args <- c(args, "--no-highlight") 488 } 489 else if (!identical(template, "default")) { 490 if (identical(highlight, "default")) 491 highlight <- "pygments" 492 args <- c(args, "--highlight-style", highlight) 493 } 494 else { 495 highlight <- match.arg(highlight, html_highlighters()) 496 if (is_highlightjs(highlight)) { 497 args <- c(args, "--no-highlight") 498 args <- c(args, "--variable", "highlightjs=1") 499 } 500 else { 501 args <- c(args, "--highlight-style", highlight) 502 } 503 } 504 505 args 506} 507 508is_highlightjs <- function(highlight) { 509 !is.null(highlight) && (highlight %in% c("default", "textmate")) 510} 511 512# Scan for a copy of pandoc and set the internal cache if it's found. 513find_pandoc <- function() { 514 515 if (is.null(.pandoc$dir)) { 516 517 # define potential sources 518 sys_pandoc <- find_program("pandoc") 519 sources <- c(Sys.getenv("RSTUDIO_PANDOC"), 520 ifelse(nzchar(sys_pandoc), dirname(sys_pandoc), "")) 521 if (!is_windows()) 522 sources <- c(sources, path.expand("~/opt/pandoc")) 523 524 # determine the versions of the sources 525 versions <- lapply(sources, function(src) { 526 if (dir_exists(src)) 527 get_pandoc_version(src) 528 else 529 numeric_version("0") 530 }) 531 532 # find the maximum version 533 found_src <- NULL 534 found_ver <- numeric_version("0") 535 for (i in 1:length(sources)) { 536 ver <- versions[[i]] 537 if (ver > found_ver) { 538 found_ver <- ver 539 found_src <- sources[[i]] 540 } 541 } 542 543 # did we find a version? 544 if (!is.null(found_src)) { 545 .pandoc$dir <- found_src 546 .pandoc$version <- found_ver 547 } 548 } 549} 550 551# Get an S3 numeric_version for the pandoc utility at the specified path 552get_pandoc_version <- function(pandoc_dir) { 553 pandoc_path <- file.path(pandoc_dir, "pandoc") 554 if (is_windows()) pandoc_path <- paste0(pandoc_path, ".exe") 555 if (!utils::file_test("-x", pandoc_path)) return(numeric_version("0")) 556 with_pandoc_safe_environment({ 557 version_info <- system(paste(shQuote(pandoc_path), "--version"), 558 intern = TRUE) 559 }) 560 version <- strsplit(version_info, "\n")[[1]][1] 561 version <- strsplit(version, " ")[[1]][2] 562 numeric_version(version) 563} 564 565# wrap a system call to pandoc so that LC_ALL is not set 566# see: https://github.com/rstudio/rmarkdown/issues/31 567# see: https://ghc.haskell.org/trac/ghc/ticket/7344 568with_pandoc_safe_environment <- function(code) { 569 lc_all <- Sys.getenv("LC_ALL", unset = NA) 570 if (!is.na(lc_all)) { 571 Sys.unsetenv("LC_ALL") 572 on.exit(Sys.setenv(LC_ALL = lc_all), add = TRUE) 573 } 574 lc_ctype <- Sys.getenv("LC_CTYPE", unset = NA) 575 if (!is.na(lc_ctype)) { 576 Sys.unsetenv("LC_CTYPE") 577 on.exit(Sys.setenv(LC_CTYPE = lc_ctype), add = TRUE) 578 } 579 if (Sys.info()['sysname'] == "Linux" && 580 is.na(Sys.getenv("HOME", unset = NA))) { 581 stop("The 'HOME' environment variable must be set before running Pandoc.") 582 } 583 if (Sys.info()['sysname'] == "Linux" && 584 is.na(Sys.getenv("LANG", unset = NA))) { 585 # fill in a the LANG environment variable if it doesn't exist 586 Sys.setenv(LANG = detect_generic_lang()) 587 on.exit(Sys.unsetenv("LANG"), add = TRUE) 588 } 589 if (Sys.info()['sysname'] == "Linux" && 590 identical(Sys.getenv("LANG"), "en_US")) { 591 Sys.setenv(LANG = "en_US.UTF-8") 592 on.exit(Sys.setenv(LANG = "en_US"), add = TRUE) 593 } 594 force(code) 595} 596 597# if there is no LANG environment variable set pandoc is going to hang so 598# we need to specify a "generic" lang setting. With glibc >= 2.13 you can 599# specify C.UTF-8 so we prefer that. If we can't find that then we fall back 600# to en_US.UTF-8. 601detect_generic_lang <- function() { 602 603 locale_util <- Sys.which("locale") 604 605 if (nzchar(locale_util)) { 606 locales <- system(paste(locale_util, "-a"), intern = TRUE) 607 locales <- suppressWarnings( 608 strsplit(locales, split = "\n", fixed = TRUE) 609 ) 610 if ("C.UTF-8" %in% locales) 611 return("C.UTF-8") 612 } 613 614 # default to en_US.UTF-8 615 "en_US.UTF-8" 616} 617 618 619 620# get the path to the pandoc binary 621pandoc <- function() { 622 find_pandoc() 623 file.path(.pandoc$dir, "pandoc") 624} 625 626# get the path to the pandoc-citeproc binary 627pandoc_citeproc <- function() { 628 find_pandoc() 629 citeproc_path = file.path(.pandoc$dir, "pandoc-citeproc") 630 if (file.exists(citeproc_path)) 631 citeproc_path 632 else 633 "pandoc-citeproc" 634} 635 636# quote args if they need it 637quoted <- function(args) { 638 # some characters are legal in filenames but without quoting are likely to be 639 # interpreted by the shell (e.g. redirection, wildcard expansion, etc.) -- 640 # wrap arguments containing these characters in quotes. 641 shell_chars <- grepl(.shell_chars_regex, args) 642 args[shell_chars] <- shQuote(args[shell_chars]) 643 args 644} 645 646find_pandoc_theme_variable <- function(args) { 647 range <- length(args) - 1 648 for (i in 1:range) { 649 if (args[[i]] == "--variable" && grepl("^theme:", args[[i + 1]])) { 650 return(substring(args[[i + 1]], nchar("theme:") + 1)) 651 } 652 } 653 # none found, return NULL 654 NULL 655} 656 657 658# Environment used to cache the current pandoc directory and version 659.pandoc <- new.env() 660.pandoc$dir <- NULL 661.pandoc$version <- NULL 662 663pandoc2.0 <- function() pandoc_available("2.0") 664