1% Generated by roxygen2: do not edit by hand 2% Please edit documentation in R/read_excel.R 3\name{read_excel} 4\alias{read_excel} 5\alias{read_xls} 6\alias{read_xlsx} 7\title{Read xls and xlsx files} 8\usage{ 9read_excel(path, sheet = NULL, range = NULL, col_names = TRUE, 10 col_types = NULL, na = "", trim_ws = TRUE, skip = 0, 11 n_max = Inf, guess_max = min(1000, n_max), 12 progress = readxl_progress(), .name_repair = "unique") 13 14read_xls(path, sheet = NULL, range = NULL, col_names = TRUE, 15 col_types = NULL, na = "", trim_ws = TRUE, skip = 0, 16 n_max = Inf, guess_max = min(1000, n_max), 17 progress = readxl_progress(), .name_repair = "unique") 18 19read_xlsx(path, sheet = NULL, range = NULL, col_names = TRUE, 20 col_types = NULL, na = "", trim_ws = TRUE, skip = 0, 21 n_max = Inf, guess_max = min(1000, n_max), 22 progress = readxl_progress(), .name_repair = "unique") 23} 24\arguments{ 25\item{path}{Path to the xls/xlsx file.} 26 27\item{sheet}{Sheet to read. Either a string (the name of a sheet), or an 28integer (the position of the sheet). Ignored if the sheet is specified via 29\code{range}. If neither argument specifies the sheet, defaults to the first 30sheet.} 31 32\item{range}{A cell range to read from, as described in \link{cell-specification}. 33Includes typical Excel ranges like "B3:D87", possibly including the sheet 34name like "Budget!B2:G14", and more. Interpreted strictly, even if the 35range forces the inclusion of leading or trailing empty rows or columns. 36Takes precedence over \code{skip}, \code{n_max} and \code{sheet}.} 37 38\item{col_names}{\code{TRUE} to use the first row as column names, \code{FALSE} to get 39default names, or a character vector giving a name for each column. If user 40provides \code{col_types} as a vector, \code{col_names} can have one entry per 41column, i.e. have the same length as \code{col_types}, or one entry per 42unskipped column.} 43 44\item{col_types}{Either \code{NULL} to guess all from the spreadsheet or a 45character vector containing one entry per column from these options: 46"skip", "guess", "logical", "numeric", "date", "text" or "list". If exactly 47one \code{col_type} is specified, it will be recycled. The content of a cell in 48a skipped column is never read and that column will not appear in the data 49frame output. A list cell loads a column as a list of length 1 vectors, 50which are typed using the type guessing logic from \code{col_types = NULL}, but 51on a cell-by-cell basis.} 52 53\item{na}{Character vector of strings to interpret as missing values. By 54default, readxl treats blank cells as missing data.} 55 56\item{trim_ws}{Should leading and trailing whitespace be trimmed?} 57 58\item{skip}{Minimum number of rows to skip before reading anything, be it 59column names or data. Leading empty rows are automatically skipped, so this 60is a lower bound. Ignored if \code{range} is given.} 61 62\item{n_max}{Maximum number of data rows to read. Trailing empty rows are 63automatically skipped, so this is an upper bound on the number of rows in 64the returned tibble. Ignored if \code{range} is given.} 65 66\item{guess_max}{Maximum number of data rows to use for guessing column 67types.} 68 69\item{progress}{Display a progress spinner? By default, the spinner appears 70only in an interactive session, outside the context of knitting a document, 71and when the call is likely to run for several seconds or more. See 72\code{\link[=readxl_progress]{readxl_progress()}} for more details.} 73 74\item{.name_repair}{Handling of column names. By default, readxl ensures 75column names are not empty and are unique. If the tibble package version is 76recent enough, there is full support for \code{.name_repair} as documented in 77\code{\link[tibble:tibble]{tibble::tibble()}}. If an older version of tibble is present, readxl falls 78back to name repair in the style of tibble v1.4.2.} 79} 80\value{ 81A \link[tibble:tibble-package]{tibble} 82} 83\description{ 84Read xls and xlsx files 85 86\code{read_excel()} calls \code{\link[=excel_format]{excel_format()}} to determine if \code{path} is xls or xlsx, 87based on the file extension and the file itself, in that order. Use 88\code{read_xls()} and \code{read_xlsx()} directly if you know better and want to 89prevent such guessing. 90} 91\examples{ 92datasets <- readxl_example("datasets.xlsx") 93read_excel(datasets) 94 95# Specify sheet either by position or by name 96read_excel(datasets, 2) 97read_excel(datasets, "mtcars") 98 99# Skip rows and use default column names 100read_excel(datasets, skip = 148, col_names = FALSE) 101 102# Recycle a single column type 103read_excel(datasets, col_types = "text") 104 105# Specify some col_types and guess others 106read_excel(datasets, col_types = c("text", "guess", "numeric", "guess", "guess")) 107 108# Accomodate a column with disparate types via col_type = "list" 109df <- read_excel(readxl_example("clippy.xlsx"), col_types = c("text", "list")) 110df 111df$value 112sapply(df$value, class) 113 114# Limit the number of data rows read 115read_excel(datasets, n_max = 3) 116 117# Read from an Excel range using A1 or R1C1 notation 118read_excel(datasets, range = "C1:E7") 119read_excel(datasets, range = "R1C2:R2C5") 120 121# Specify the sheet as part of the range 122read_excel(datasets, range = "mtcars!B1:D5") 123 124# Read only specific rows or columns 125read_excel(datasets, range = cell_rows(102:151), col_names = FALSE) 126read_excel(datasets, range = cell_cols("B:D")) 127 128# Get a preview of column names 129names(read_excel(readxl_example("datasets.xlsx"), n_max = 0)) 130 131if (utils::packageVersion("tibble") > "1.4.2") { 132 ## exploit full .name_repair flexibility from tibble 133 134 ## "universal" names are unique and syntactic 135 read_excel( 136 readxl_example("deaths.xlsx"), 137 range = "arts!A5:F15", 138 .name_repair = "universal" 139 ) 140 141 ## specify name repair as a built-in function 142 read_excel(readxl_example("clippy.xlsx"), .name_repair = toupper) 143 144 ## specify name repair as a custom function 145 my_custom_name_repair <- function(nms) tolower(gsub("[.]", "_", nms)) 146 read_excel( 147 readxl_example("datasets.xlsx"), 148 .name_repair = my_custom_name_repair 149 ) 150 151 ## specify name repair as an anonymous function 152 read_excel( 153 readxl_example("datasets.xlsx"), 154 sheet = "chickwts", 155 .name_repair = ~ substr(.x, start = 1, stop = 3) 156 ) 157} 158} 159\seealso{ 160\link{cell-specification} for more details on targetting cells with the 161\code{range} argument 162} 163