1% File src/library/utils/man/read.DIF.Rd
2% Part of the R package, https://www.R-project.org
3% Copyright 1995-2014 R Core Team
4% Distributed under GPL 2 or later
5
6\name{read.DIF}
7\alias{read.DIF}
8\title{Data Input from Spreadsheet}
9\description{
10  Reads a file in Data Interchange Format (DIF) and creates a data frame
11  from it.  DIF is a format for data matrices such as single spreadsheets.
12}
13\usage{
14read.DIF(file, header = FALSE,
15         dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
16         row.names, col.names, as.is = !stringsAsFactors,
17         na.strings = "NA", colClasses = NA, nrows = -1,
18         skip = 0, check.names = TRUE, blank.lines.skip = TRUE,
19         stringsAsFactors = FALSE,
20         transpose = FALSE, fileEncoding = "")
21}
22\arguments{
23  \item{file}{the name of the file which the data are to be read from,
24    or a \link{connection}, or a complete URL.
25
26    The name \code{"clipboard"} may also be used on Windows, in which
27    case \code{read.DIF("clipboard")} will look for a DIF format entry
28    in the Windows clipboard.
29  }
30
31  \item{header}{a logical value indicating whether the spreadsheet contains the
32    names of the variables as its first line.  If missing, the value is
33    determined from the file format: \code{header} is set to \code{TRUE}
34    if and only if the first row contains only character values and
35    the top left cell is empty.}
36
37  \item{dec}{the character used in the file for decimal points.}
38
39  \item{numerals}{string indicating how to convert numbers whose conversion
40    to double precision would lose accuracy, see \code{\link{type.convert}}.}
41
42  \item{row.names}{a vector of row names.  This can be a vector giving
43    the actual row names, or a single number giving the column of the
44    table which contains the row names, or character string giving the
45    name of the table column containing the row names.
46
47    If there is a header and the first row contains one fewer field than
48    the number of columns, the first column in the input is used for the
49    row names.  Otherwise if \code{row.names} is missing, the rows are
50    numbered.
51
52    Using \code{row.names = NULL} forces row numbering.
53  }
54
55  \item{col.names}{a vector of optional names for the variables.
56    The default is to use \code{"V"} followed by the column number.}
57
58  \item{as.is}{controls conversion of character variables (insofar as
59    they are not converted to logical, numeric or complex) to factors,
60    if not otherwise specified by \code{colClasses}.
61    Its value is either a vector of logicals (values are recycled if
62    necessary), or a vector of numeric or character indices which
63    specify which columns should not be converted to factors.
64
65    Note: In releases prior to \R{} 2.12.1, cells marked as being of
66    character type were converted to logical, numeric or complex using
67    \code{\link{type.convert}} as in \code{\link{read.table}}.
68
69    Note: to suppress all conversions including those of numeric
70    columns, set \code{colClasses = "character"}.
71
72    Note that \code{as.is} is specified per column (not per
73    variable) and so includes the column of row names (if any) and any
74    columns to be skipped.
75  }
76
77  \item{na.strings}{a character vector of strings which are to be
78    interpreted as \code{\link{NA}} values.  Blank fields are also
79    considered to be missing values in logical, integer, numeric and
80    complex fields.}
81
82  \item{colClasses}{character.  A vector of classes to be assumed for
83    the columns.  Recycled as necessary, or if the character vector is
84    named, unspecified values are taken to be \code{NA}.
85
86    Possible values are \code{NA} (when \code{\link{type.convert}} is
87    used), \code{"NULL"} (when the column is skipped), one of the atomic
88    vector classes (logical, integer, numeric, complex, character, raw),
89    or \code{"factor"}, \code{"Date"} or \code{"POSIXct"}.  Otherwise
90    there needs to be an \code{as} method (from package \pkg{methods})
91    for conversion from \code{"character"} to the specified formal
92    class.
93
94    Note that \code{colClasses} is specified per column (not per
95    variable) and so includes the column of row names (if any).
96  }
97
98  \item{nrows}{the maximum number of rows to read in.  Negative values
99    are ignored.}
100
101  \item{skip}{the number of lines of the data file to skip before
102    beginning to read data.}
103
104  \item{check.names}{logical.  If \code{TRUE} then the names of the
105    variables in the data frame are checked to ensure that they are
106    syntactically valid variable names.  If necessary they are adjusted
107    (by \code{\link{make.names}}) so that they are, and also to ensure
108    that there are no duplicates.}
109
110  \item{blank.lines.skip}{logical: if \code{TRUE} blank lines in the
111    input are ignored.}
112
113  \item{stringsAsFactors}{logical: should character vectors be converted
114    to factors?}
115
116  \item{transpose}{logical, indicating if the row and column
117    interpretation should be transposed.  Microsoft's Excel has been
118    known to produce (non-standard conforming) DIF files which would
119    need \code{transpose = TRUE} to be read correctly.}
120
121  \item{fileEncoding}{character string: if non-empty declares the
122    encoding used on a file (not a connection or clipboard) so the
123    character data can be re-encoded.  See the \sQuote{Encoding} section
124    of the help for \code{\link{file}}, the \sQuote{R Data Import/Export}
125    manual and \sQuote{Note}.}
126}
127\value{
128  A data frame (\code{\link{data.frame}}) containing a representation of
129  the data in the file.  Empty input is an error unless \code{col.names}
130  is specified, when a 0-row data frame is returned: similarly giving
131  just a header line if \code{header = TRUE} results in a 0-row data frame.
132}
133
134\note{
135  The columns referred to in \code{as.is} and \code{colClasses} include
136  the column of row names (if any).
137
138  Less memory will be used if \code{colClasses} is specified as one of
139  the six atomic vector classes.
140}
141\author{R Core; \code{transpose} option by Christoph Buser, ETH Zurich}
142\seealso{
143  The \emph{R Data Import/Export} manual.
144
145  \code{\link{scan}}, \code{\link{type.convert}},
146  \code{\link{read.fwf}} for reading \emph{f}ixed \emph{w}idth
147  \emph{f}ormatted input;
148  \code{\link{read.table}};
149  \code{\link{data.frame}}.
150}
151\references{
152  The DIF format specification can be found by searching on
153  \url{http://www.wotsit.org/}; the optional header fields are ignored.
154  See also
155  \url{https://en.wikipedia.org/wiki/Data_Interchange_Format}.
156
157  The term is likely to lead to confusion: Windows will have a
158  \sQuote{Windows Data Interchange Format (DIF) data format} as part of
159  its WinFX system, which may or may not be compatible.
160}
161\examples{
162## read.DIF() may need transpose = TRUE for a file exported from Excel
163udir <- system.file("misc", package = "utils")
164dd <- read.DIF(file.path(udir, "exDIF.dif"), header = TRUE, transpose = TRUE)
165dc <- read.csv(file.path(udir, "exDIF.csv"), header = TRUE)
166stopifnot(identical(dd, dc), dim(dd) == c(4,2))
167}
168\keyword{file}
169\keyword{connection}
170