1% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/read_excel.R
3\name{read_excel}
4\alias{read_excel}
5\alias{read_xls}
6\alias{read_xlsx}
7\title{Read xls and xlsx files}
8\usage{
9read_excel(path, sheet = NULL, range = NULL, col_names = TRUE,
10  col_types = NULL, na = "", trim_ws = TRUE, skip = 0,
11  n_max = Inf, guess_max = min(1000, n_max),
12  progress = readxl_progress(), .name_repair = "unique")
13
14read_xls(path, sheet = NULL, range = NULL, col_names = TRUE,
15  col_types = NULL, na = "", trim_ws = TRUE, skip = 0,
16  n_max = Inf, guess_max = min(1000, n_max),
17  progress = readxl_progress(), .name_repair = "unique")
18
19read_xlsx(path, sheet = NULL, range = NULL, col_names = TRUE,
20  col_types = NULL, na = "", trim_ws = TRUE, skip = 0,
21  n_max = Inf, guess_max = min(1000, n_max),
22  progress = readxl_progress(), .name_repair = "unique")
23}
24\arguments{
25\item{path}{Path to the xls/xlsx file.}
26
27\item{sheet}{Sheet to read. Either a string (the name of a sheet), or an
28integer (the position of the sheet). Ignored if the sheet is specified via
29\code{range}. If neither argument specifies the sheet, defaults to the first
30sheet.}
31
32\item{range}{A cell range to read from, as described in \link{cell-specification}.
33Includes typical Excel ranges like "B3:D87", possibly including the sheet
34name like "Budget!B2:G14", and more. Interpreted strictly, even if the
35range forces the inclusion of leading or trailing empty rows or columns.
36Takes precedence over \code{skip}, \code{n_max} and \code{sheet}.}
37
38\item{col_names}{\code{TRUE} to use the first row as column names, \code{FALSE} to get
39default names, or a character vector giving a name for each column. If user
40provides \code{col_types} as a vector, \code{col_names} can have one entry per
41column, i.e. have the same length as \code{col_types}, or one entry per
42unskipped column.}
43
44\item{col_types}{Either \code{NULL} to guess all from the spreadsheet or a
45character vector containing one entry per column from these options:
46"skip", "guess", "logical", "numeric", "date", "text" or "list". If exactly
47one \code{col_type} is specified, it will be recycled. The content of a cell in
48a skipped column is never read and that column will not appear in the data
49frame output. A list cell loads a column as a list of length 1 vectors,
50which are typed using the type guessing logic from \code{col_types = NULL}, but
51on a cell-by-cell basis.}
52
53\item{na}{Character vector of strings to interpret as missing values. By
54default, readxl treats blank cells as missing data.}
55
56\item{trim_ws}{Should leading and trailing whitespace be trimmed?}
57
58\item{skip}{Minimum number of rows to skip before reading anything, be it
59column names or data. Leading empty rows are automatically skipped, so this
60is a lower bound. Ignored if \code{range} is given.}
61
62\item{n_max}{Maximum number of data rows to read. Trailing empty rows are
63automatically skipped, so this is an upper bound on the number of rows in
64the returned tibble. Ignored if \code{range} is given.}
65
66\item{guess_max}{Maximum number of data rows to use for guessing column
67types.}
68
69\item{progress}{Display a progress spinner? By default, the spinner appears
70only in an interactive session, outside the context of knitting a document,
71and when the call is likely to run for several seconds or more. See
72\code{\link[=readxl_progress]{readxl_progress()}} for more details.}
73
74\item{.name_repair}{Handling of column names. By default, readxl ensures
75column names are not empty and are unique. If the tibble package version is
76recent enough, there is full support for \code{.name_repair} as documented in
77\code{\link[tibble:tibble]{tibble::tibble()}}. If an older version of tibble is present, readxl falls
78back to name repair in the style of tibble v1.4.2.}
79}
80\value{
81A \link[tibble:tibble-package]{tibble}
82}
83\description{
84Read xls and xlsx files
85
86\code{read_excel()} calls \code{\link[=excel_format]{excel_format()}} to determine if \code{path} is xls or xlsx,
87based on the file extension and the file itself, in that order. Use
88\code{read_xls()} and \code{read_xlsx()} directly if you know better and want to
89prevent such guessing.
90}
91\examples{
92datasets <- readxl_example("datasets.xlsx")
93read_excel(datasets)
94
95# Specify sheet either by position or by name
96read_excel(datasets, 2)
97read_excel(datasets, "mtcars")
98
99# Skip rows and use default column names
100read_excel(datasets, skip = 148, col_names = FALSE)
101
102# Recycle a single column type
103read_excel(datasets, col_types = "text")
104
105# Specify some col_types and guess others
106read_excel(datasets, col_types = c("text", "guess", "numeric", "guess", "guess"))
107
108# Accomodate a column with disparate types via col_type = "list"
109df <- read_excel(readxl_example("clippy.xlsx"), col_types = c("text", "list"))
110df
111df$value
112sapply(df$value, class)
113
114# Limit the number of data rows read
115read_excel(datasets, n_max = 3)
116
117# Read from an Excel range using A1 or R1C1 notation
118read_excel(datasets, range = "C1:E7")
119read_excel(datasets, range = "R1C2:R2C5")
120
121# Specify the sheet as part of the range
122read_excel(datasets, range = "mtcars!B1:D5")
123
124# Read only specific rows or columns
125read_excel(datasets, range = cell_rows(102:151), col_names = FALSE)
126read_excel(datasets, range = cell_cols("B:D"))
127
128# Get a preview of column names
129names(read_excel(readxl_example("datasets.xlsx"), n_max = 0))
130
131if (utils::packageVersion("tibble") > "1.4.2") {
132  ## exploit full .name_repair flexibility from tibble
133
134  ## "universal" names are unique and syntactic
135  read_excel(
136    readxl_example("deaths.xlsx"),
137    range = "arts!A5:F15",
138    .name_repair = "universal"
139  )
140
141  ## specify name repair as a built-in function
142  read_excel(readxl_example("clippy.xlsx"), .name_repair = toupper)
143
144  ## specify name repair as a custom function
145  my_custom_name_repair <- function(nms) tolower(gsub("[.]", "_", nms))
146  read_excel(
147    readxl_example("datasets.xlsx"),
148    .name_repair = my_custom_name_repair
149  )
150
151  ## specify name repair as an anonymous function
152  read_excel(
153    readxl_example("datasets.xlsx"),
154    sheet = "chickwts",
155    .name_repair = ~ substr(.x, start = 1, stop = 3)
156  )
157}
158}
159\seealso{
160\link{cell-specification} for more details on targetting cells with the
161\code{range} argument
162}
163