1#' Write a data frame to a delimited file
2#'
3#' The `write_*()` family of functions are an improvement to analogous function such
4#' as [write.csv()] because they are approximately twice as fast. Unlike [write.csv()],
5#' these functions do not include row names as a column in the written file.
6#' A generic function, `output_column()`, is applied to each variable
7#' to coerce columns to suitable output.
8#'
9#' @section Output:
10#' Factors are coerced to character. Doubles are formatted to a decimal string
11#' using the grisu3 algorithm. `POSIXct` values are formatted as ISO8601 with a
12#' UTC timezone *Note: `POSIXct` objects in local or non-UTC timezones will be
13#' converted to UTC time before writing.*
14#'
15#' All columns are encoded as UTF-8. `write_excel_csv()` and `write_excel_csv2()` also include a
16#' \href{https://en.wikipedia.org/wiki/Byte_order_mark}{UTF-8 Byte order mark}
17#' which indicates to Excel the csv is UTF-8 encoded.
18#'
19#' `write_excel_csv2()` and `write_csv2` were created to allow users with
20#' different locale settings to save .csv files using their default settings
21#' (e.g. `;` as the column separator and `,` as the decimal separator).
22#' This is common in some European countries.
23#'
24#' Values are only quoted if they contain a comma, quote or newline.
25#'
26#' The `write_*()` functions will automatically compress outputs if an appropriate extension is given.
27#' Three extensions are currently supported: `.gz` for gzip compression, `.bz2` for bzip2 compression and
28#' `.xz` for lzma compression.  See the examples for more information.
29#'
30#' @param x A data frame or tibble to write to disk.
31#' @param file File or connection to write to.
32#' @param append If `FALSE`, will overwrite existing file. If `TRUE`,
33#'   will append to existing file. In both cases, if the file does not exist a new
34#'   file is created.
35#' @param col_names If `FALSE`, column names will not be included at the top of the file. If `TRUE`,
36#' column names will be included. If not specified, `col_names` will take the opposite value given to `append`.
37#' @param delim Delimiter used to separate values. Defaults to `" "` for `write_delim()`, `","` for `write_excel_csv()` and
38#' `";"` for `write_excel_csv2()`. Must be a single character.
39#' @param na String used for missing values. Defaults to NA. Missing values
40#'   will never be quoted; strings with the same value as `na` will
41#'   always be quoted.
42#' @param quote_escape \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}, use the `escape` argument instead.
43#' @param eol The end of line character to use. Most commonly either `"\n"` for
44#'   Unix style newlines, or `"\r\n"` for Windows style newlines.
45#' @param path \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}, use the `file` argument instead.
46#' @return `write_*()` returns the input `x` invisibly.
47#' @inheritParams vroom::vroom_write
48#' @inheritParams read_delim
49#' @references Florian Loitsch, Printing Floating-Point Numbers Quickly and
50#' Accurately with Integers, PLDI '10,
51#' <http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf>
52#' @export
53#' @examples
54#' \dontshow{
55#' .old_wd <- setwd(tempdir())
56#' }
57#' # If only a file name is specified, write_()* will write
58#' # the file to the current working directory.
59#' write_csv(mtcars, "mtcars.csv")
60#' write_tsv(mtcars, "mtcars.tsv")
61#'
62#' # If you add an extension to the file name, write_()* will
63#' # automatically compress the output.
64#' write_tsv(mtcars, "mtcars.tsv.gz")
65#' write_tsv(mtcars, "mtcars.tsv.bz2")
66#' write_tsv(mtcars, "mtcars.tsv.xz")
67#' \dontshow{
68#' setwd(.old_wd)
69#' }
70write_delim <- function(x, file, delim = " ", na = "NA", append = FALSE,
71                        col_names = !append,
72                        quote = c("needed", "all", "none"),
73                        escape = c("double", "backslash", "none"),
74                        eol = "\n",
75                        num_threads = readr_threads(),
76                        progress = show_progress(),
77                        path = deprecated(),
78                        quote_escape = deprecated()) {
79  if (is_present(path)) {
80    deprecate_warn("1.4.0", "write_delim(path = )", "write_delim(file = )")
81    file <- path
82  }
83
84  if (is_present(quote_escape)) {
85    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
86    escape <- quote_escape
87  }
88
89  stopifnot(is.data.frame(x))
90  check_column_types(x)
91
92  x_out <- x
93  x[] <- lapply(x, output_column)
94  if (edition_first()) {
95    stream_delim(x, file,
96      delim = delim, col_names = col_names, append = append,
97      na = na, quote_escape = escape, eol = eol
98    )
99    return(invisible(x_out))
100  }
101  vroom::vroom_write(x, file,
102    delim = delim, col_names = col_names, append = append,
103    na = na, eol = eol, quote = quote, escape = escape, num_threads = num_threads,
104    progress = progress
105  )
106
107  invisible(x_out)
108}
109
110#' @rdname write_delim
111#' @export
112write_csv <- function(x, file, na = "NA", append = FALSE, col_names = !append,
113                      quote = c("needed", "all", "none"),
114                      escape = c("double", "backslash", "none"),
115                      eol = "\n",
116                      num_threads = readr_threads(),
117                      progress = show_progress(),
118                      path = deprecated(),
119                      quote_escape = deprecated()) {
120
121  if (is_present(path)) {
122    deprecate_warn("1.4.0", "write_csv(path = )", "write_csv(file = )")
123    file <- path
124  }
125
126  if (is_present(quote_escape)) {
127    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
128    escape <- quote_escape
129  }
130
131  write_delim(x, file,
132    delim = ",", na = na, append = append,
133    col_names = col_names, quote = quote, escape = escape, eol = eol, num_threads = num_threads,
134    progress = progress
135  )
136}
137
138#' @rdname write_delim
139#' @export
140write_csv2 <- function(x, file, na = "NA", append = FALSE, col_names = !append,
141                       quote = c("needed", "all", "none"),
142                       escape = c("double", "backslash", "none"),
143                       eol = "\n",
144                       num_threads = readr_threads(),
145                       progress = show_progress(),
146                       path = deprecated(),
147                       quote_escape = deprecated()) {
148  if (is_present(path)) {
149    deprecate_warn("1.4.0", "write_csv2(path = )", "write_csv2(file = )")
150    file <- path
151  }
152
153  if (is_present(quote_escape)) {
154    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
155    escape <- quote_escape
156  }
157
158  x_out <- x
159  x <- change_decimal_separator(x, decimal_mark = ",")
160  write_delim(x, file,
161    delim = ";", na = na, append = append,
162    col_names = col_names, quote = quote, escape = escape, eol = eol, num_threads = num_threads,
163    progress = progress
164  )
165
166  invisible(x_out)
167}
168
169#' @rdname write_delim
170#' @export
171write_excel_csv <- function(x, file, na = "NA", append = FALSE,
172                            col_names = !append, delim = ",",
173                            quote = "all",
174                            escape = c("double", "backslash", "none"),
175                            eol = "\n",
176                            num_threads = readr_threads(),
177                            progress = show_progress(),
178                            path = deprecated(),
179                            quote_escape = deprecated()) {
180  if (is_present(path)) {
181    deprecate_warn("1.4.0", "write_excel_csv(path = )", "write_excel_csv(file = )")
182    file <- path
183  }
184
185  if (is_present(quote_escape)) {
186    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
187    escape <- quote_escape
188  }
189
190  stopifnot(is.data.frame(x))
191  check_column_types(x)
192
193  x_out <- x
194  datetime_cols <- vapply(x, inherits, logical(1), "POSIXt")
195  x[datetime_cols] <- lapply(x[datetime_cols], format, "%Y/%m/%d %H:%M:%S")
196
197  x[] <- lapply(x, output_column)
198  if (edition_first()) {
199    stream_delim(x, file, delim,
200      col_names = col_names, append = append,
201      na = na, bom = !append, quote_escape = escape, eol = eol
202    )
203    return(invisible(x_out))
204  }
205  vroom::vroom_write(x, file, delim,
206    col_names = col_names, append = append,
207    na = na, bom = !append,
208    quote = quote, escape = escape,
209    eol = eol, num_threads = num_threads,
210    progress = progress
211  )
212
213  invisible(x_out)
214}
215
216#' @rdname write_delim
217#' @export
218write_excel_csv2 <- function(x, file, na = "NA", append = FALSE,
219                             col_names = !append, delim = ";",
220                             quote = "all",
221                             escape = c("double", "backslash", "none"),
222                             eol = "\n",
223                             num_threads = readr_threads(),
224                             progress = show_progress(),
225                             path = deprecated(),
226                             quote_escape = deprecated()) {
227  if (is_present(path)) {
228    deprecate_warn("1.4.0", "write_excel_csv2(path = )", "write_excel_csv2(file = )")
229    file <- path
230  }
231
232  if (is_present(quote_escape)) {
233    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
234    escape <- quote_escape
235  }
236
237  stopifnot(is.data.frame(x))
238  check_column_types(x)
239
240  x_out <- x
241  x <- change_decimal_separator(x, decimal_mark = ",")
242
243  datetime_cols <- vapply(x, inherits, logical(1), "POSIXt")
244  x[datetime_cols] <- lapply(x[datetime_cols], format, "%Y/%m/%d %H:%M:%S")
245
246  x[] <- lapply(x, output_column)
247  write_excel_csv(x, file, na, append, col_names, delim,
248    quote = quote,
249    escape = escape,
250    eol = eol, num_threads = num_threads,
251    progress = progress
252  )
253
254  invisible(x_out)
255}
256
257#' @rdname write_delim
258#' @export
259write_tsv <- function(x, file, na = "NA", append = FALSE, col_names = !append,
260                      quote = "none",
261                      escape = c("double", "backslash", "none"),
262                      eol = "\n",
263                      num_threads = readr_threads(),
264                      progress = show_progress(),
265                      path = deprecated(),
266                      quote_escape = deprecated()) {
267  if (is_present(path)) {
268    deprecate_warn("1.4.0", "write_tsv(path = )", "write_tsv(file = )")
269    file <- path
270  }
271
272  if (is_present(quote_escape)) {
273    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
274    escape <- quote_escape
275  }
276
277
278  write_delim(x, file,
279    delim = "\t", na = na, append = append,
280    col_names = col_names, quote = quote, escape = escape, eol = eol,
281    num_threads = num_threads, progress = progress
282  )
283}
284
285#' Convert a data frame to a delimited string
286#'
287#' These functions are equivalent to [write_csv()] etc., but instead
288#' of writing to disk, they return a string.
289#'
290#' @return A string.
291#' @inheritSection write_delim Output
292#' @inheritParams write_delim
293#' @param x A data frame.
294#' @inherit write_delim references
295#' @examples
296#' # format_()* functions are useful for testing and reprexes
297#' cat(format_csv(mtcars))
298#' cat(format_tsv(mtcars))
299#' cat(format_delim(mtcars, ";"))
300#'
301#' # Specifying missing values
302#' df <- data.frame(x = c(1, NA, 3))
303#' format_csv(df, na = "missing")
304#'
305#' # Quotes are automatically added as needed
306#' df <- data.frame(x = c("a ", '"', ",", "\n"))
307#' cat(format_csv(df))
308#' @export
309format_delim <- function(x, delim, na = "NA", append = FALSE,
310                         col_names = !append,
311                         quote = c("needed", "all", "none"),
312                         escape = c("double", "backslash", "none"),
313                         eol = "\n",
314                         quote_escape = deprecated()) {
315  stopifnot(is.data.frame(x))
316  check_column_types(x)
317
318  if (is_present(quote_escape)) {
319    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
320    escape <- quote_escape
321  }
322
323  x[] <- lapply(x, output_column)
324  if (edition_first()) {
325    res <- stream_delim(df = x, file = NULL, delim = delim, col_names = col_names, append = append, na = na, quote_escape = escape, eol = eol)
326    Encoding(res) <- "UTF-8"
327    return(res)
328  }
329  res <- vroom::vroom_format(x, delim = delim, eol = eol, col_names = col_names, na = na, quote = quote, escape = escape)
330  Encoding(res) <- "UTF-8"
331  res
332}
333
334#' @export
335#' @rdname format_delim
336format_csv <- function(x, na = "NA", append = FALSE, col_names = !append,
337  quote = c("needed", "all", "none"),
338  escape = c("double", "backslash", "none"),
339  eol = "\n",
340  quote_escape = deprecated()) {
341  if (is_present(quote_escape)) {
342    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
343    escape <- quote_escape
344  }
345
346  format_delim(x, delim = ",", na = na, append = append, col_names = col_names, eol = eol, quote = quote, escape = escape)
347}
348
349#' @export
350#' @rdname format_delim
351format_csv2 <- function(x, na = "NA", append = FALSE, col_names = !append,
352  quote = c("needed", "all", "none"),
353  escape = c("double", "backslash", "none"),
354  eol = "\n",
355  quote_escape = deprecated()) {
356  if (is_present(quote_escape)) {
357    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
358    escape <- quote_escape
359  }
360
361  x <- change_decimal_separator(x, decimal_mark = ",")
362  format_delim(x, delim = ";", na = na, append = append, col_names = col_names, eol = eol, quote = quote, escape = escape)
363}
364
365#' @export
366#' @rdname format_delim
367format_tsv <- function(x, na = "NA", append = FALSE, col_names = !append,
368  quote = c("needed", "all", "none"),
369  escape = c("double", "backslash", "none"),
370  eol = "\n",
371  quote_escape = deprecated()) {
372  if (is_present(quote_escape)) {
373    deprecate_soft("2.0.0", "write_delim(quote_escape = )", "write_delim(escape = )")
374    escape <- quote_escape
375  }
376
377  format_delim(x, delim = "\t", na = na, append = append, col_names = col_names, eol = eol, quote = quote, escape = escape)
378}
379
380#' Preprocess column for output
381#'
382#' This is a generic function that applied to each column before it is saved
383#' to disk. It provides a hook for S3 classes that need special handling.
384#'
385#' @keywords internal
386#' @param x A vector
387#' @export
388#' @examples
389#' # Most columns are not altered, but POSIXct are converted to ISO8601.
390#' x <- parse_datetime("2016-01-01")
391#' str(output_column(x))
392output_column <- function(x, name) {
393  UseMethod("output_column")
394}
395
396#' @export
397output_column.default <- function(x, name) {
398  if (!is.object(x) || "AsIs" %in% class(x)) {
399    return(x)
400  }
401
402  as.character(x)
403}
404
405#' @export
406output_column.double <- function(x, name) {
407  x
408}
409
410#' @export
411output_column.POSIXt <- function(x, name) {
412  format(x, "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC", justify = "none")
413}
414
415stream_delim <- function(df, file, append = FALSE, bom = FALSE, ..., quote_escape, eol) {
416  quote_escape <- standardise_escape(quote_escape)
417
418  file <- standardise_path(file, input = FALSE)
419
420  if (is.null(file)) {
421    out_file <- tempfile()
422    con <- file(out_file, "wb")
423    on.exit(
424      {
425        try(close(con), silent = TRUE)
426        unlink(out_file)
427      },
428      add = TRUE
429    )
430
431    stream_delim_(df, con, ..., bom = bom, quote_escape = quote_escape, eol = eol)
432    close(con)
433    return(read_file(out_file))
434  }
435
436  if (inherits(file, "connection") && !isOpen(file)) {
437    on.exit(close(file), add = TRUE)
438    if (isTRUE(append)) {
439      open(file, "ab")
440    } else {
441      open(file, "wb")
442    }
443  }
444  stream_delim_(df, file, ..., bom = bom, quote_escape = quote_escape, eol = eol)
445}
446
447change_decimal_separator <- function(x, decimal_mark = ",") {
448  stopifnot(is.data.frame(x))
449  numeric_cols <- vapply(x, is.numeric, logical(1))
450
451  format_seps <- function(x, decimal_mark) {
452    nas <- is.na(x)
453    x <- format(x, decimal.mark = decimal_mark, trim = TRUE, digits = 15)
454    x[nas] <- NA_character_
455    x
456  }
457
458  x[numeric_cols] <- lapply(x[numeric_cols], format_seps, decimal_mark)
459
460  x
461}
462
463standardise_escape <- function(x) {
464  if (identical(x, FALSE)) {
465    x <- "none"
466  }
467
468  escape_types <- c("double" = 1L, "backslash" = 2L, "none" = 3L)
469  escape <- match.arg(tolower(x), names(escape_types))
470
471  escape_types[escape]
472}
473
474check_column_types <- function(x) {
475  is_bad_column <- vapply(x, function(xx) !is.null(dim(xx)), logical(1))
476  if (any(is_bad_column)) {
477    cli_block(type = rlang::abort, {
478      cli::cli_text("`x` must not contain list or matrix columns:")
479      cli::cli_alert_danger("invalid columns at index(s): {paste0(which(is_bad_column), collapse = '\n')}")
480    })
481  }
482}
483