1#' Write a data frame to a delimited file 2#' 3#' The `write_*()` family of functions are an improvement to analogous function such 4#' as [write.csv()] because they are approximately twice as fast. Unlike [write.csv()], 5#' these functions do not include row names as a column in the written file. 6#' A generic function, `output_column()`, is applied to each variable 7#' to coerce columns to suitable output. 8#' 9#' @section Output: 10#' Factors are coerced to character. Doubles are formatted to a decimal string 11#' using the grisu3 algorithm. `POSIXct` values are formatted as ISO8601 with a 12#' UTC timezone *Note: `POSIXct` objects in local or non-UTC timezones will be 13#' converted to UTC time before writing.* 14#' 15#' All columns are encoded as UTF-8. `write_excel_csv()` and `write_excel_csv2()` also include a 16#' \href{https://en.wikipedia.org/wiki/Byte_order_mark}{UTF-8 Byte order mark} 17#' which indicates to Excel the csv is UTF-8 encoded. 18#' 19#' `write_excel_csv2()` and `write_csv2` were created to allow users with 20#' different locale settings to save .csv files using their default settings 21#' (e.g. `;` as the column separator and `,` as the decimal separator). 22#' This is common in some European countries. 23#' 24#' Values are only quoted if they contain a comma, quote or newline. 25#' 26#' The `write_*()` functions will automatically compress outputs if an appropriate extension is given. 27#' Three extensions are currently supported: `.gz` for gzip compression, `.bz2` for bzip2 compression and 28#' `.xz` for lzma compression. See the examples for more information. 29#' 30#' @param x A data frame or tibble to write to disk. 31#' @param file File or connection to write to. 32#' @param append If `FALSE`, will overwrite existing file. If `TRUE`, 33#' will append to existing file. In both cases, if the file does not exist a new 34#' file is created. 35#' @param col_names If `FALSE`, column names will not be included at the top of the file. If `TRUE`, 36#' column names will be included. If not specified, `col_names` will take the opposite value given to `append`. 37#' @param delim Delimiter used to separate values. Defaults to `" "` for `write_delim()`, `","` for `write_excel_csv()` and 38#' `";"` for `write_excel_csv2()`. Must be a single character. 39#' @param na String used for missing values. Defaults to NA. Missing values 40#' will never be quoted; strings with the same value as `na` will 41#' always be quoted. 42#' @param quote_escape \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}, use the `escape` argument instead. 43#' @param eol The end of line character to use. Most commonly either `"\n"` for 44#' Unix style newlines, or `"\r\n"` for Windows style newlines. 45#' @param path \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}, use the `file` argument instead. 46#' @return `write_*()` returns the input `x` invisibly. 47#' @inheritParams vroom::vroom_write 48#' @inheritParams read_delim 49#' @references Florian Loitsch, Printing Floating-Point Numbers Quickly and 50#' Accurately with Integers, PLDI '10, 51#' <http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf> 52#' @export 53#' @examples 54#' \dontshow{ 55#' .old_wd <- setwd(tempdir()) 56#' } 57#' # If only a file name is specified, write_()* will write 58#' # the file to the current working directory. 59#' write_csv(mtcars, "mtcars.csv") 60#' write_tsv(mtcars, "mtcars.tsv") 61#' 62#' # If you add an extension to the file name, write_()* will 63#' # automatically compress the output. 64#' write_tsv(mtcars, "mtcars.tsv.gz") 65#' write_tsv(mtcars, "mtcars.tsv.bz2") 66#' write_tsv(mtcars, "mtcars.tsv.xz") 67#' \dontshow{ 68#' setwd(.old_wd) 69#' } 70write_delim <- function(x, file, delim = " ", na = "NA", append = FALSE, 71 col_names = !append, 72 quote = c("needed", "all", "none"), 73 escape = c("double", "backslash", "none"), 74 eol = "\n", 75 num_threads = readr_threads(), 76 progress = show_progress(), 77 path = deprecated(), 78 quote_escape = deprecated()) { 79 if (is_present(path)) { 80 deprecate_warn("1.4.0", "write_delim(path = )", "write_delim(file = )") 81 file <- path 82 } 83 84 if (is_present(quote_escape)) { 85 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 86 escape <- quote_escape 87 } 88 89 stopifnot(is.data.frame(x)) 90 check_column_types(x) 91 92 x_out <- x 93 x[] <- lapply(x, output_column) 94 if (edition_first()) { 95 stream_delim(x, file, 96 delim = delim, col_names = col_names, append = append, 97 na = na, quote_escape = escape, eol = eol 98 ) 99 return(invisible(x_out)) 100 } 101 vroom::vroom_write(x, file, 102 delim = delim, col_names = col_names, append = append, 103 na = na, eol = eol, quote = quote, escape = escape, num_threads = num_threads, 104 progress = progress 105 ) 106 107 invisible(x_out) 108} 109 110#' @rdname write_delim 111#' @export 112write_csv <- function(x, file, na = "NA", append = FALSE, col_names = !append, 113 quote = c("needed", "all", "none"), 114 escape = c("double", "backslash", "none"), 115 eol = "\n", 116 num_threads = readr_threads(), 117 progress = show_progress(), 118 path = deprecated(), 119 quote_escape = deprecated()) { 120 121 if (is_present(path)) { 122 deprecate_warn("1.4.0", "write_csv(path = )", "write_csv(file = )") 123 file <- path 124 } 125 126 if (is_present(quote_escape)) { 127 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 128 escape <- quote_escape 129 } 130 131 write_delim(x, file, 132 delim = ",", na = na, append = append, 133 col_names = col_names, quote = quote, escape = escape, eol = eol, num_threads = num_threads, 134 progress = progress 135 ) 136} 137 138#' @rdname write_delim 139#' @export 140write_csv2 <- function(x, file, na = "NA", append = FALSE, col_names = !append, 141 quote = c("needed", "all", "none"), 142 escape = c("double", "backslash", "none"), 143 eol = "\n", 144 num_threads = readr_threads(), 145 progress = show_progress(), 146 path = deprecated(), 147 quote_escape = deprecated()) { 148 if (is_present(path)) { 149 deprecate_warn("1.4.0", "write_csv2(path = )", "write_csv2(file = )") 150 file <- path 151 } 152 153 if (is_present(quote_escape)) { 154 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 155 escape <- quote_escape 156 } 157 158 x_out <- x 159 x <- change_decimal_separator(x, decimal_mark = ",") 160 write_delim(x, file, 161 delim = ";", na = na, append = append, 162 col_names = col_names, quote = quote, escape = escape, eol = eol, num_threads = num_threads, 163 progress = progress 164 ) 165 166 invisible(x_out) 167} 168 169#' @rdname write_delim 170#' @export 171write_excel_csv <- function(x, file, na = "NA", append = FALSE, 172 col_names = !append, delim = ",", 173 quote = "all", 174 escape = c("double", "backslash", "none"), 175 eol = "\n", 176 num_threads = readr_threads(), 177 progress = show_progress(), 178 path = deprecated(), 179 quote_escape = deprecated()) { 180 if (is_present(path)) { 181 deprecate_warn("1.4.0", "write_excel_csv(path = )", "write_excel_csv(file = )") 182 file <- path 183 } 184 185 if (is_present(quote_escape)) { 186 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 187 escape <- quote_escape 188 } 189 190 stopifnot(is.data.frame(x)) 191 check_column_types(x) 192 193 x_out <- x 194 datetime_cols <- vapply(x, inherits, logical(1), "POSIXt") 195 x[datetime_cols] <- lapply(x[datetime_cols], format, "%Y/%m/%d %H:%M:%S") 196 197 x[] <- lapply(x, output_column) 198 if (edition_first()) { 199 stream_delim(x, file, delim, 200 col_names = col_names, append = append, 201 na = na, bom = !append, quote_escape = escape, eol = eol 202 ) 203 return(invisible(x_out)) 204 } 205 vroom::vroom_write(x, file, delim, 206 col_names = col_names, append = append, 207 na = na, bom = !append, 208 quote = quote, escape = escape, 209 eol = eol, num_threads = num_threads, 210 progress = progress 211 ) 212 213 invisible(x_out) 214} 215 216#' @rdname write_delim 217#' @export 218write_excel_csv2 <- function(x, file, na = "NA", append = FALSE, 219 col_names = !append, delim = ";", 220 quote = "all", 221 escape = c("double", "backslash", "none"), 222 eol = "\n", 223 num_threads = readr_threads(), 224 progress = show_progress(), 225 path = deprecated(), 226 quote_escape = deprecated()) { 227 if (is_present(path)) { 228 deprecate_warn("1.4.0", "write_excel_csv2(path = )", "write_excel_csv2(file = )") 229 file <- path 230 } 231 232 if (is_present(quote_escape)) { 233 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 234 escape <- quote_escape 235 } 236 237 stopifnot(is.data.frame(x)) 238 check_column_types(x) 239 240 x_out <- x 241 x <- change_decimal_separator(x, decimal_mark = ",") 242 243 datetime_cols <- vapply(x, inherits, logical(1), "POSIXt") 244 x[datetime_cols] <- lapply(x[datetime_cols], format, "%Y/%m/%d %H:%M:%S") 245 246 x[] <- lapply(x, output_column) 247 write_excel_csv(x, file, na, append, col_names, delim, 248 quote = quote, 249 escape = escape, 250 eol = eol, num_threads = num_threads, 251 progress = progress 252 ) 253 254 invisible(x_out) 255} 256 257#' @rdname write_delim 258#' @export 259write_tsv <- function(x, file, na = "NA", append = FALSE, col_names = !append, 260 quote = "none", 261 escape = c("double", "backslash", "none"), 262 eol = "\n", 263 num_threads = readr_threads(), 264 progress = show_progress(), 265 path = deprecated(), 266 quote_escape = deprecated()) { 267 if (is_present(path)) { 268 deprecate_warn("1.4.0", "write_tsv(path = )", "write_tsv(file = )") 269 file <- path 270 } 271 272 if (is_present(quote_escape)) { 273 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 274 escape <- quote_escape 275 } 276 277 278 write_delim(x, file, 279 delim = "\t", na = na, append = append, 280 col_names = col_names, quote = quote, escape = escape, eol = eol, 281 num_threads = num_threads, progress = progress 282 ) 283} 284 285#' Convert a data frame to a delimited string 286#' 287#' These functions are equivalent to [write_csv()] etc., but instead 288#' of writing to disk, they return a string. 289#' 290#' @return A string. 291#' @inheritSection write_delim Output 292#' @inheritParams write_delim 293#' @param x A data frame. 294#' @inherit write_delim references 295#' @examples 296#' # format_()* functions are useful for testing and reprexes 297#' cat(format_csv(mtcars)) 298#' cat(format_tsv(mtcars)) 299#' cat(format_delim(mtcars, ";")) 300#' 301#' # Specifying missing values 302#' df <- data.frame(x = c(1, NA, 3)) 303#' format_csv(df, na = "missing") 304#' 305#' # Quotes are automatically added as needed 306#' df <- data.frame(x = c("a ", '"', ",", "\n")) 307#' cat(format_csv(df)) 308#' @export 309format_delim <- function(x, delim, na = "NA", append = FALSE, 310 col_names = !append, 311 quote = c("needed", "all", "none"), 312 escape = c("double", "backslash", "none"), 313 eol = "\n", 314 quote_escape = deprecated()) { 315 stopifnot(is.data.frame(x)) 316 check_column_types(x) 317 318 if (is_present(quote_escape)) { 319 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 320 escape <- quote_escape 321 } 322 323 x[] <- lapply(x, output_column) 324 if (edition_first()) { 325 res <- stream_delim(df = x, file = NULL, delim = delim, col_names = col_names, append = append, na = na, quote_escape = escape, eol = eol) 326 Encoding(res) <- "UTF-8" 327 return(res) 328 } 329 res <- vroom::vroom_format(x, delim = delim, eol = eol, col_names = col_names, na = na, quote = quote, escape = escape) 330 Encoding(res) <- "UTF-8" 331 res 332} 333 334#' @export 335#' @rdname format_delim 336format_csv <- function(x, na = "NA", append = FALSE, col_names = !append, 337 quote = c("needed", "all", "none"), 338 escape = c("double", "backslash", "none"), 339 eol = "\n", 340 quote_escape = deprecated()) { 341 if (is_present(quote_escape)) { 342 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 343 escape <- quote_escape 344 } 345 346 format_delim(x, delim = ",", na = na, append = append, col_names = col_names, eol = eol, quote = quote, escape = escape) 347} 348 349#' @export 350#' @rdname format_delim 351format_csv2 <- function(x, na = "NA", append = FALSE, col_names = !append, 352 quote = c("needed", "all", "none"), 353 escape = c("double", "backslash", "none"), 354 eol = "\n", 355 quote_escape = deprecated()) { 356 if (is_present(quote_escape)) { 357 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 358 escape <- quote_escape 359 } 360 361 x <- change_decimal_separator(x, decimal_mark = ",") 362 format_delim(x, delim = ";", na = na, append = append, col_names = col_names, eol = eol, quote = quote, escape = escape) 363} 364 365#' @export 366#' @rdname format_delim 367format_tsv <- function(x, na = "NA", append = FALSE, col_names = !append, 368 quote = c("needed", "all", "none"), 369 escape = c("double", "backslash", "none"), 370 eol = "\n", 371 quote_escape = deprecated()) { 372 if (is_present(quote_escape)) { 373 deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )") 374 escape <- quote_escape 375 } 376 377 format_delim(x, delim = "\t", na = na, append = append, col_names = col_names, eol = eol, quote = quote, escape = escape) 378} 379 380#' Preprocess column for output 381#' 382#' This is a generic function that applied to each column before it is saved 383#' to disk. It provides a hook for S3 classes that need special handling. 384#' 385#' @keywords internal 386#' @param x A vector 387#' @export 388#' @examples 389#' # Most columns are not altered, but POSIXct are converted to ISO8601. 390#' x <- parse_datetime("2016-01-01") 391#' str(output_column(x)) 392output_column <- function(x, name) { 393 UseMethod("output_column") 394} 395 396#' @export 397output_column.default <- function(x, name) { 398 if (!is.object(x) || "AsIs" %in% class(x)) { 399 return(x) 400 } 401 402 as.character(x) 403} 404 405#' @export 406output_column.double <- function(x, name) { 407 x 408} 409 410#' @export 411output_column.POSIXt <- function(x, name) { 412 format(x, "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC", justify = "none") 413} 414 415stream_delim <- function(df, file, append = FALSE, bom = FALSE, ..., quote_escape, eol) { 416 quote_escape <- standardise_escape(quote_escape) 417 418 file <- standardise_path(file, input = FALSE) 419 420 if (is.null(file)) { 421 out_file <- tempfile() 422 con <- file(out_file, "wb") 423 on.exit( 424 { 425 try(close(con), silent = TRUE) 426 unlink(out_file) 427 }, 428 add = TRUE 429 ) 430 431 stream_delim_(df, con, ..., bom = bom, quote_escape = quote_escape, eol = eol) 432 close(con) 433 return(read_file(out_file)) 434 } 435 436 if (inherits(file, "connection") && !isOpen(file)) { 437 on.exit(close(file), add = TRUE) 438 if (isTRUE(append)) { 439 open(file, "ab") 440 } else { 441 open(file, "wb") 442 } 443 } 444 stream_delim_(df, file, ..., bom = bom, quote_escape = quote_escape, eol = eol) 445} 446 447change_decimal_separator <- function(x, decimal_mark = ",") { 448 stopifnot(is.data.frame(x)) 449 numeric_cols <- vapply(x, is.numeric, logical(1)) 450 451 format_seps <- function(x, decimal_mark) { 452 nas <- is.na(x) 453 x <- format(x, decimal.mark = decimal_mark, trim = TRUE, digits = 15) 454 x[nas] <- NA_character_ 455 x 456 } 457 458 x[numeric_cols] <- lapply(x[numeric_cols], format_seps, decimal_mark) 459 460 x 461} 462 463standardise_escape <- function(x) { 464 if (identical(x, FALSE)) { 465 x <- "none" 466 } 467 468 escape_types <- c("double" = 1L, "backslash" = 2L, "none" = 3L) 469 escape <- match.arg(tolower(x), names(escape_types)) 470 471 escape_types[escape] 472} 473 474check_column_types <- function(x) { 475 is_bad_column <- vapply(x, function(xx) !is.null(dim(xx)), logical(1)) 476 if (any(is_bad_column)) { 477 cli_block(type = rlang::abort, { 478 cli::cli_text("`x` must not contain list or matrix columns:") 479 cli::cli_alert_danger("invalid columns at index(s): {paste0(which(is_bad_column), collapse = '\n')}") 480 }) 481 } 482} 483