1#  File src/library/base/R/dataframe.R
2#  Part of the R package, https://www.R-project.org
3#
4#  This program is free software; you can redistribute it and/or modify
5#  it under the terms of the GNU General Public License as published by
6#  the Free Software Foundation; either version 2 of the License, or
7#  (at your option) any later version.
8#
9#  This program is distributed in the hope that it will be useful,
10#  but WITHOUT ANY WARRANTY; without even the implied warranty of
11#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12#  GNU General Public License for more details.
13#
14#  A copy of the GNU General Public License is available at
15#  https://www.R-project.org/Licenses/
16
17# Statlib code by John Chambers, Bell Labs, 1994
18# Changes Copyright (C) 1998-2021 The R Core Team
19
20
21## As from R 2.4.0, row.names can be either character or integer.
22## row.names() will always return character.
23## attr(, "row.names") will return either character or integer.
24##
25## Do not assume that the internal representation is either, since
26## 1L:n is stored as the integer vector c(NA, n) to save space (and
27## the C-level code to get/set the attribute makes the appropriate
28## translations.
29##
30## As from 2.5.0 c(NA, n > 0) indicates deliberately assigned row names,
31## and c(NA, n < 0) automatic row names.
32
33## We cannot allow long vectors as elements until we can handle
34## duplication of row names.
35
36.row_names_info <- function(x, type = 1L)
37    .Internal(shortRowNames(x, type))
38
39row.names <- function(x) UseMethod("row.names")
40row.names.data.frame <- function(x) as.character(attr(x, "row.names"))
41row.names.default <- function(x) if(!is.null(dim(x))) rownames(x)# else NULL
42
43.set_row_names <- function(n)
44    if(n > 0) c(NA_integer_, -n) else integer()
45
46
47##_H Hack around the fact that other packages fail with a newly improved `row.names<-`:
48##_H
49##_H `row.names<-` <- function(x, make.names = FALSE, value) UseMethod("row.names<-")
50`row.names<-` <- function(x, value) UseMethod("row.names<-")
51
52##_H `row.names<-.data.frame` <-
53`.rowNamesDF<-` <- function(x, make.names = FALSE, value)
54{
55    if (!is.data.frame(x)) x <- as.data.frame(x)
56    n <- .row_names_info(x, 2L)
57    if(is.null(value)) { # set automatic row.names
58        attr(x, "row.names") <- .set_row_names(n)
59        return(x)
60    }
61    ## do this here, as e.g. POSIXlt changes length when coerced.
62    if( is.object(value) || !is.integer(value) )
63        value <- as.character(value)
64    if(n == 0L) {
65        ## we have to be careful here.  This could be a
66        ## 0-row data frame or an invalid one being constructed.
67        if(!is.null(attr(x, "row.names")) && length(value) > 0L)
68           stop("invalid 'row.names' length")
69    }
70    else if (length(value) != n) {
71	if(isFALSE(make.names)) stop("invalid 'row.names' length")
72        else if(is.na(make.names)) { # automatic row.names
73            attr(x, "row.names") <- .set_row_names(n)
74            return(x)
75        }
76        else if(!isTRUE(make.names)) stop("invalid 'make.names'")
77        ## else  make.names = TRUE: amend 'value' to correct ones:
78        else if((nv <- length(value)) < n)
79            value <- c(value, rep_len(value[nv], n-nv))
80        else # length(value) > n
81            value <- value[seq_len(n)]
82    }
83    if (anyDuplicated(value)) {
84        if(isFALSE(make.names)) {
85            nonuniq <- sort(unique(value[duplicated(value)]))
86            ## warning + stop ?? FIXME: s/warning/stop/ and drop (2nd) stop ??
87            warning(ngettext(length(nonuniq),
88                             sprintf("non-unique value when setting 'row.names': %s",
89                                     sQuote(nonuniq[1L])),
90                             sprintf("non-unique values when setting 'row.names': %s",
91                                     paste(sQuote(nonuniq), collapse = ", "))),
92                domain = NA, call. = FALSE)
93            stop("duplicate 'row.names' are not allowed")
94        }
95        else if(is.na(make.names)) { # automatic row.names
96            value <- .set_row_names( # find nrow(.) in case 'n' is not usable:
97                if(n == 0L && is.null(.row_names_info(x, 0L)) && length(x) > 0L)
98                    length(x[[1L]])
99                else n)
100        }
101        else if(!isTRUE(make.names)) stop("invalid 'make.names'")
102        else # make.names = TRUE: amend 'value' to correct ones:
103            value <- make.names(value, unique=TRUE)
104        ## NB: 'value' is now guaranteed to have no NA's ==> can use 'else if' :
105    }
106    else if (anyNA(value)) {
107        if(isFALSE(make.names))
108            stop("missing values in 'row.names' are not allowed")
109        if(is.na(make.names)) # automatic row.names
110            value <- .set_row_names(n)
111        else if(!isTRUE(make.names)) stop("invalid 'make.names'")
112        else # make.names = TRUE: amend 'value' to correct ones:
113            value <- make.names(value, unique=TRUE)
114    }
115    attr(x, "row.names") <- value
116    x
117}
118
119`row.names<-.data.frame` <- function(x, value) `.rowNamesDF<-`(x, value=value)
120
121##_H `row.names<-.default` <- function(x, ..., value) `rownames<-`(x, value)
122`row.names<-.default` <- function(x, value) `rownames<-`(x, value)
123
124is.na.data.frame <- function (x)
125{
126    ## need to special-case no columns
127    y <- if (length(x)) {
128        do.call("cbind", lapply(x, "is.na")) # gives a matrix
129    } else matrix(FALSE, length(row.names(x)), 0)
130    if(.row_names_info(x) > 0L) rownames(y) <- row.names(x)
131    y
132}
133
134## Provide for efficiency reasons (PR#17600):
135anyNA.data.frame <- function(x, recursive = FALSE)
136    any(vapply(x, anyNA, NA, USE.NAMES = FALSE))
137
138is.data.frame <- function(x) inherits(x, "data.frame")
139
140## as fast as possible; used also for subsetting
141I <- function(x) { class(x) <- unique.default(c("AsIs", oldClass(x))); x }
142
143print.AsIs <- function (x, ...)
144{
145    cl <- oldClass(x)
146    oldClass(x) <- cl[cl != "AsIs"]
147    NextMethod("print")
148    invisible(x)
149}
150
151
152t.data.frame <- function(x)
153{
154    x <- as.matrix(x)
155    NextMethod("t")
156}
157
158dim.data.frame <- function(x) c(.row_names_info(x, 2L), length(x))
159
160dimnames.data.frame <- function(x) list(row.names(x), names(x))
161
162`dimnames<-.data.frame` <- function(x, value)
163{
164    d <- dim(x)
165    if(!is.list(value) || length(value) != 2L)
166	stop("invalid 'dimnames' given for data frame")
167    ## do the coercion first, as might change length
168    value[[1L]] <- as.character(value[[1L]])
169    value[[2L]] <- as.character(value[[2L]])
170    if(d[[1L]] != length(value[[1L]]) || d[[2L]] != length(value[[2L]]))
171	stop("invalid 'dimnames' given for data frame")
172    row.names(x) <- value[[1L]] # checks validity
173    names(x) <- value[[2L]]
174    x
175}
176
177as.data.frame <- function(x, row.names = NULL, optional = FALSE, ...)
178{
179    if(is.null(x))			# can't assign class to NULL
180	return(as.data.frame(list()))
181    UseMethod("as.data.frame")
182}
183
184as.data.frame.default <- function(x, ...)
185    stop(gettextf("cannot coerce class %s to a data.frame",
186                  sQuote(deparse(class(x))[1L])),
187         domain = NA)
188
189###  Here are methods ensuring that the arguments to "data.frame"
190###  are in a form suitable for combining into a data frame.
191
192as.data.frame.data.frame <- function(x, row.names = NULL, ...)
193{
194    cl <- oldClass(x)
195    i <- match("data.frame", cl)
196    if(i > 1L)
197	class(x) <- cl[ - (1L:(i-1L))]
198    if(!is.null(row.names)){
199        nr <- .row_names_info(x, 2L)
200	if(length(row.names) == nr)
201	    attr(x, "row.names") <- row.names
202	else
203            stop(sprintf(ngettext(nr,
204                                  "invalid 'row.names', length %d for a data frame with %d row",
205                                  "invalid 'row.names', length %d for a data frame with %d rows"),
206                         length(row.names), nr), domain = NA)
207    }
208    x
209}
210
211## prior to 1.8.0 this coerced names - PR#3280
212as.data.frame.list <-
213    function(x, row.names = NULL, optional = FALSE, ...,
214	     cut.names = FALSE, col.names = names(x), fix.empty.names = TRUE,
215             check.names = !optional,
216             stringsAsFactors = FALSE)
217{
218    ## need to protect names in x.
219    ## truncate any of more than 256 (or cut.names) bytes:
220    new.nms <- !missing(col.names)
221    if(cut.names) {
222	maxL <- if(is.logical(cut.names)) 256L else as.integer(cut.names)
223	if(any(long <- nchar(col.names, "bytes", keepNA = FALSE) > maxL))
224	    col.names[long] <- paste(substr(col.names[long], 1L, maxL - 6L), "...")
225	else cut.names <- FALSE
226    }
227    m <- match(names(formals(data.frame))[-1L],
228	       ## c("row.names", "check.rows", ...., "stringsAsFactors"),
229	       col.names, 0L)
230    if(any.m <- any(m)) col.names[m] <- paste0("..adfl.", col.names[m])
231    if(new.nms || any.m || cut.names) names(x) <- col.names
232    ## data.frame() is picky with its 'row.names':
233    alis <- c(list(check.names = check.names, fix.empty.names = fix.empty.names,
234		   stringsAsFactors = stringsAsFactors),
235	      if(!missing(row.names)) list(row.names = row.names))
236    x <- do.call(data.frame, c(x, alis))
237    if(any.m) names(x) <- sub("^\\.\\.adfl\\.", "", names(x))
238    x
239}
240
241as.data.frame.vector <- function(x, row.names = NULL, optional = FALSE, ...,
242				 nm = deparse1(substitute(x)))
243{
244    force(nm)
245    nrows <- length(x)
246    ## ## row.names -- for now warn about and "forget" illegal row.names
247    ## ##           -- can simplify much (move this *after* the is.null(.) case) once we stop() !
248### FIXME: allow  integer [of full length]
249    if(!(is.null(row.names) || (is.character(row.names) && length(row.names) == nrows))) {
250	warning(gettextf(
251	    "'row.names' is not a character vector of length %d -- omitting it. Will be an error!",
252	    nrows), domain = NA)
253	row.names <- NULL
254    }
255    if(is.null(row.names)) {
256	if (nrows == 0L)
257	    row.names <- character()
258	else if(length(row.names <- names(x)) != nrows || anyDuplicated(row.names))
259	    row.names <- .set_row_names(nrows)
260    }
261    ## else if(length(row.names) != nrows) # same behavior as the 'matrix' method
262    ##     row.names <- .set_row_names(nrows)
263    if(!is.null(names(x))) names(x) <- NULL # remove names as from 2.0.0
264    value <- list(x)
265    if(!optional) names(value) <- nm
266    structure(value, row.names = row.names, class = "data.frame")
267}
268
269as.data.frame.ts <- function(x, ...)
270{
271    if(is.matrix(x))
272	as.data.frame.matrix(x, ...)
273    else
274	as.data.frame.vector(x, ...)
275}
276
277as.data.frame.raw  <- as.data.frame.vector
278as.data.frame.factor  <- as.data.frame.vector
279as.data.frame.ordered <- as.data.frame.vector
280as.data.frame.integer <- as.data.frame.vector
281as.data.frame.logical <- as.data.frame.vector
282as.data.frame.numeric <- as.data.frame.vector
283as.data.frame.complex <- as.data.frame.vector
284
285
286default.stringsAsFactors <- function()
287{
288    val <- getOption("stringsAsFactors")
289    if(is.null(val)) val <- FALSE
290    if(!is.logical(val) || is.na(val) || length(val) != 1L)
291        stop('options("stringsAsFactors") not set to TRUE or FALSE')
292    val
293}
294
295## in case someone passes 'nm'
296as.data.frame.character <-
297    function(x, ..., stringsAsFactors = FALSE)
298{
299    nm <- deparse1(substitute(x))
300    if(stringsAsFactors) x <- factor(x)
301    if(!"nm" %in% ...names())
302        as.data.frame.vector(x, ..., nm = nm)
303    else as.data.frame.vector(x, ...)
304}
305
306as.data.frame.matrix <- function(x, row.names = NULL, optional = FALSE, make.names = TRUE, ...,
307                                 stringsAsFactors = FALSE)
308{
309    d <- dim(x)
310    nrows <- d[[1L]]
311    ncols <- d[[2L]]
312    ic <- seq_len(ncols)
313    dn <- dimnames(x)
314    ## surely it cannot be right to override the supplied row.names?
315    ## changed in 1.8.0
316    if(is.null(row.names)) row.names <- dn[[1L]]
317    collabs <- dn[[2L]]
318    ## These might be NA
319    if(any(empty <- !nzchar(collabs)))
320	collabs[empty] <- paste0("V", ic)[empty]
321    value <- vector("list", ncols)
322    if(mode(x) == "character" && stringsAsFactors) {
323	for(i in ic)
324	    value[[i]] <- as.factor(x[,i])
325    } else {
326	for(i in ic)
327	    value[[i]] <- as.vector(x[,i])
328    }
329    ## Explicitly check for NULL in case nrows==0
330    autoRN <- (is.null(row.names) || length(row.names) != nrows)
331    if(length(collabs) == ncols)
332	names(value) <- collabs
333    else if(!optional)
334	names(value) <- paste0("V", ic)
335    class(value) <- "data.frame"
336    if(autoRN)
337        attr(value, "row.names") <- .set_row_names(nrows)
338    else
339        .rowNamesDF(value, make.names=make.names) <- row.names
340    value
341}
342
343as.data.frame.model.matrix <-
344    function(x, row.names = NULL, optional = FALSE, make.names = TRUE, ...)
345{
346    d <- dim(x)
347    nrows <- d[[1L]]
348    dn <- dimnames(x)
349    row.names <- dn[[1L]]
350    value <- list(x)
351    if(!optional) names(value) <- deparse(substitute(x))[[1L]]
352                                        # FIXME? better:  , nlines=1L  or deparse1(.)
353    class(value) <- "data.frame"
354    if(!is.null(row.names)) {
355	row.names <- as.character(row.names)
356	if(length(row.names) != nrows)
357            stop(sprintf(ngettext(length(row.names),
358                                  "supplied %d row name for %d rows",
359                                  "supplied %d row names for %d rows"),
360                          length(row.names), nrows), domain = NA)
361        .rowNamesDF(value, make.names=make.names) <- row.names
362    }
363    else attr(value, "row.names") <- .set_row_names(nrows)
364    value
365}
366
367as.data.frame.array <- function(x, row.names = NULL, optional = FALSE, ...)
368{
369    d <- dim(x)
370    if(length(d) == 1L) { ## same as as.data.frame.vector, but deparsed here
371	## c(): better than drop() or as.vector() !
372	value <- as.data.frame.vector( c(x), row.names, optional, ...)
373        if(!optional) names(value) <- deparse(substitute(x))[[1L]]
374                                        # FIXME? better:  , nlines=1L  or deparse1(.)
375        value
376    } else if (length(d) == 2L) {
377        ## for explicit "array" class; otherwise *.matrix() is dispatched
378        as.data.frame.matrix(x, row.names, optional, ...)
379    } else {
380        dn <- dimnames(x)
381        dim(x) <- c(d[1L], prod(d[-1L]))
382        if(!is.null(dn)) {
383            if(length(dn[[1L]])) rownames(x) <- dn[[1L]]
384            for(i in 2L:length(d))
385                if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i])
386            colnames(x) <- interaction(expand.grid(dn[-1L]))
387        }
388        as.data.frame.matrix(x, row.names, optional, ...)
389    }
390}
391
392## Allow extraction method to have changed the underlying class,
393## so re-assign the class based on the result.
394`[.AsIs` <- function(x, i, ...) I(NextMethod("["))
395
396
397## NB: this is called relatively often from data.frame() itself, ...
398as.data.frame.AsIs <- function(x, row.names = NULL, optional = FALSE, ...)
399{
400    if(length(dim(x)) == 2L)
401	as.data.frame.model.matrix(x, row.names, optional)
402    else { # as.data.frame.vector without removing names
403        nrows <- length(x)
404        nm <- deparse1(substitute(x))
405        if(is.null(row.names)) {
406            autoRN <- FALSE
407            if (nrows == 0L)
408                row.names <- character()
409            else if(length(row.names <- names(x)) == nrows &&
410                    !anyDuplicated(row.names)) {
411            }
412            else {
413                autoRN <- TRUE
414                row.names <- .set_row_names(nrows)
415            }
416        } else
417            autoRN <- is.integer(row.names) && length(row.names) == 2L &&
418                is.na(rn1 <- row.names[[1L]]) && rn1 < 0
419        value <- list(x)
420        if(!optional) names(value) <- nm
421        class(value) <- "data.frame"
422        ## FIXME -- Need to comment the  'row.names(.) <-'  case
423        ## if(autoRN)
424            attr(value, "row.names") <- row.names
425        ## else
426        ##     row.names(value) <- row.names
427        value
428    }
429
430}
431
432###  This is the real "data.frame".
433###  It does everything by calling the methods presented above.
434
435data.frame <-
436    function(..., row.names = NULL, check.rows = FALSE, check.names = TRUE,
437	     fix.empty.names = TRUE,
438             stringsAsFactors = FALSE)
439{
440    data.row.names <-
441	if(check.rows && is.null(row.names))
442	    function(current, new, i) {
443		if(is.character(current)) new <- as.character(new)
444		if(is.character(new)) current <- as.character(current)
445		if(anyDuplicated(new))
446		    return(current)
447		if(is.null(current))
448		    return(new)
449		if(all(current == new) || all(current == ""))
450		    return(new)
451		stop(gettextf(
452		    "mismatch of row names in arguments of 'data.frame\', item %d", i),
453		    domain = NA)
454	    }
455	else function(current, new, i) {
456	    if(is.null(current)) {
457		if(anyDuplicated(new)) {
458		    warning(gettextf(
459                        "some row.names duplicated: %s --> row.names NOT used",
460                        paste(which(duplicated(new)), collapse=",")),
461                        domain = NA)
462		    current
463		} else new
464	    } else current
465	}
466    object <- as.list(substitute(list(...)))[-1L]
467    mirn <- missing(row.names) # record before possibly changing
468    mrn  <- is.null(row.names) # missing or NULL
469    x <- list(...)
470    n <- length(x)
471    if(n < 1L) {
472        if(!mrn) {
473            if(is.object(row.names) || !is.integer(row.names))
474                row.names <- as.character(row.names)
475            if(anyNA(row.names))
476                stop("row names contain missing values")
477            if(anyDuplicated(row.names))
478                stop(gettextf("duplicate row.names: %s",
479                              paste(unique(row.names[duplicated(row.names)]),
480                                    collapse = ", ")),
481                     domain = NA)
482        } else row.names <- integer()
483	return(structure(list(), names = character(),
484                         row.names = row.names,
485			 class = "data.frame"))
486    }
487    vnames <- names(x)
488    if(length(vnames) != n)
489	vnames <- character(n)
490    no.vn <- !nzchar(vnames)
491    vlist <- vnames <- as.list(vnames)
492    nrows <- ncols <- integer(n)
493    for(i in seq_len(n)) {
494        ## do it this way until all as.data.frame methods have been updated
495	xi <- if(is.character(x[[i]]) || is.list(x[[i]]))
496		  as.data.frame(x[[i]], optional = TRUE,
497				stringsAsFactors = stringsAsFactors)
498	      else as.data.frame(x[[i]], optional = TRUE)
499
500        nrows[i] <- .row_names_info(xi) # signed for now
501	ncols[i] <- length(xi)
502	namesi <- names(xi)
503	if(ncols[i] > 1L) {
504	    if(length(namesi) == 0L) namesi <- seq_len(ncols[i])
505	    vnames[[i]] <- if(no.vn[i]) namesi
506			   else paste(vnames[[i]], namesi, sep=".")
507	} else if(length(namesi)) {
508	    vnames[[i]] <- namesi
509	} else if (fix.empty.names && no.vn[[i]]) {
510	    tmpname <- deparse(object[[i]], nlines = 1L)[1L]
511	    if(startsWith(tmpname, "I(") && endsWith(tmpname, ")")) {
512                ## from 'I(*)', only keep '*':
513		ntmpn <- nchar(tmpname, "c")
514                tmpname <- substr(tmpname, 3L, ntmpn - 1L)
515	    }
516	    vnames[[i]] <- tmpname
517	} ## else vnames[[i]] are not changed
518	if(mirn && nrows[i] > 0L) {
519            rowsi <- attr(xi, "row.names")
520            ## Avoid all-blank names
521            if(any(nzchar(rowsi)))
522                row.names <- data.row.names(row.names, rowsi, i)
523        }
524        nrows[i] <- abs(nrows[i])
525	vlist[[i]] <- xi
526    }
527    nr <- max(nrows)
528    for(i in seq_len(n)[nrows < nr]) {
529	xi <- vlist[[i]]
530	if(nrows[i] > 0L && (nr %% nrows[i] == 0L)) {
531            ## make some attempt to recycle column i
532            xi <- unclass(xi) # avoid data-frame methods
533            fixed <- TRUE
534            for(j in seq_along(xi)) {
535                xi1 <- xi[[j]]
536                if(is.vector(xi1) || is.factor(xi1))
537                    xi[[j]] <- rep(xi1, length.out = nr)
538		else if(is.character(xi1) && inherits(xi1, "AsIs"))
539                    xi[[j]] <- structure(rep(xi1, length.out = nr),
540                                         class = class(xi1))
541                else if(inherits(xi1, "Date") || inherits(xi1, "POSIXct"))
542                    xi[[j]] <- rep(xi1, length.out = nr)
543                else {
544                    fixed <- FALSE
545                    break
546                }
547            }
548            if (fixed) {
549                vlist[[i]] <- xi
550                next
551            }
552        }
553        stop(gettextf("arguments imply differing number of rows: %s",
554                      paste(unique(nrows), collapse = ", ")),
555             domain = NA)
556    }
557    value <- unlist(vlist, recursive=FALSE, use.names=FALSE)
558    ## unlist() drops i-th component if it has 0 columns
559    vnames <- as.character(unlist(vnames[ncols > 0L]))
560    if(fix.empty.names && any(noname <- !nzchar(vnames)))
561	vnames[noname] <- paste0("Var.", seq_along(vnames))[noname]
562    if(check.names) {
563	if(fix.empty.names)
564	    vnames <- make.names(vnames, unique=TRUE)
565	else { ## do not fix ""
566	    nz <- nzchar(vnames)
567	    vnames[nz] <- make.names(vnames[nz], unique=TRUE)
568	}
569    }
570    names(value) <- vnames
571    if(!mrn) { # non-null row.names arg was supplied
572        if(length(row.names) == 1L && nr != 1L) {  # one of the variables
573            if(is.character(row.names))
574                row.names <- match(row.names, vnames, 0L)
575            if(length(row.names) != 1L ||
576               row.names < 1L || row.names > length(vnames))
577                stop("'row.names' should specify one of the variables")
578            i <- row.names
579            row.names <- value[[i]]
580            value <- value[ - i]
581        } else if ( !is.null(row.names) && length(row.names) != nr )
582            stop("row names supplied are of the wrong length")
583    } else if( !is.null(row.names) && length(row.names) != nr ) {
584        warning("row names were found from a short variable and have been discarded")
585        row.names <- NULL
586    }
587    class(value) <- "data.frame"
588    if(is.null(row.names))
589        attr(value, "row.names") <- .set_row_names(nr) #seq_len(nr)
590    else {
591        if(is.object(row.names) || !is.integer(row.names))
592            row.names <- as.character(row.names)
593        if(anyNA(row.names))
594            stop("row names contain missing values")
595        if(anyDuplicated(row.names))
596            stop(gettextf("duplicate row.names: %s",
597                          paste(unique(row.names[duplicated(row.names)]),
598                                collapse = ", ")),
599                 domain = NA)
600        row.names(value) <- row.names
601    }
602    value
603}
604
605
606###  Subsetting and mutation methods
607###  These are a little less general than S
608
609`[.data.frame` <-
610    function(x, i, j, drop = if(missing(i)) TRUE else length(cols) == 1)
611{
612    mdrop <- missing(drop)
613    Narg <- nargs() - !mdrop  # number of arg from x,i,j that were specified
614    has.j <- !missing(j)
615    if(!all(names(sys.call()) %in% c("", "drop"))
616       && !isS4(x)) # at least don't warn for callNextMethod!
617        warning("named arguments other than 'drop' are discouraged")
618
619    if(Narg < 3L) {  # list-like indexing or matrix indexing
620        if(!mdrop) warning("'drop' argument will be ignored")
621	if(missing(i)) return(x)
622	if(is.matrix(i))
623	    return(as.matrix(x)[i])  # desperate measures
624        ## zero-column data frames prior to 2.4.0 had no names.
625        nm <- names(x); if(is.null(nm)) nm <- character()
626        ## if we have NA names, character indexing should always fail
627        ## (for positive index length)
628        if(!is.character(i) && anyNA(nm)) { # less efficient version
629            names(nm) <- names(x) <- seq_along(x)
630            y <- NextMethod("[")
631            cols <- names(y)
632            if(anyNA(cols)) stop("undefined columns selected")
633            cols <- names(y) <- nm[cols]
634        } else {
635            y <- NextMethod("[")
636            cols <- names(y)
637            if(!is.null(cols) && anyNA(cols))
638                stop("undefined columns selected")
639        }
640        ## added in 1.8.0
641        if(anyDuplicated(cols)) names(y) <- make.unique(cols)
642        ## since we have not touched the rows, copy over the raw row.names
643        ## Claimed at one time at least one fewer copies: PR#15274
644        attr(y, "row.names") <- .row_names_info(x, 0L)
645        attr(y, "class") <- oldClass(x)
646        return(y)
647    }
648
649    if(missing(i)) { # df[, j] or df[ , ]
650        ## not quite the same as the 1/2-arg case, as 'drop' is used.
651        if(drop && !has.j && length(x) == 1L) return(.subset2(x, 1L))
652        nm <- names(x); if(is.null(nm)) nm <- character()
653        if(has.j && !is.character(j) && anyNA(nm)) {
654            ## less efficient version
655            names(nm) <- names(x) <- seq_along(x)
656            y <- .subset(x, j)
657            cols <- names(y)
658            if(anyNA(cols)) stop("undefined columns selected")
659            cols <- names(y) <- nm[cols]
660        } else {
661            y <- if(has.j) .subset(x, j) else x
662            cols <- names(y)
663            if(anyNA(cols)) stop("undefined columns selected")
664        }
665        if(drop && length(y) == 1L) return(.subset2(y, 1L))
666        if(anyDuplicated(cols)) names(y) <- make.unique(cols)
667        nrow <- .row_names_info(x, 2L)
668        if(drop && !mdrop && nrow == 1L)
669            return(structure(y, class = NULL, row.names = NULL))
670        else {
671            ## Claimed at one time at least one fewer copies: PR#15274
672            attr(y, "class") <- oldClass(x)
673            attr(y, "row.names") <- .row_names_info(x, 0L)
674            return(y)
675        }
676    }
677
678    ### df[i, j] or df[i , ]
679    ## rewritten for R 2.5.0 to avoid duplicating x.
680    xx <- x
681    cols <- names(xx)  # needed for computation of 'drop' arg
682    ## make a shallow copy
683    x <- vector("list", length(x))
684    ## attributes(x) <- attributes(xx) expands row names
685    x <- .Internal(copyDFattr(xx, x))
686    oldClass(x) <- attr(x, "row.names") <- NULL
687
688    if(has.j) { # df[i, j]
689        nm <- names(x); if(is.null(nm)) nm <- character()
690        if(!is.character(j) && anyNA(nm))
691            names(nm) <- names(x) <- seq_along(x)
692        x <- x[j]
693        cols <- names(x)  # needed for 'drop'
694        if(drop && length(x) == 1L) {
695            ## for consistency with [, <length-1>]
696            if(is.character(i)) {
697                rows <- attr(xx, "row.names")
698                i <- pmatch(i, rows, duplicates.ok = TRUE)
699            }
700            ## need to figure which col was selected:
701            ## cannot use .subset2 directly as that may
702            ## use recursive selection for a logical index.
703            xj <- .subset2(.subset(xx, j), 1L)
704            return(if(length(dim(xj)) != 2L) xj[i] else xj[i, , drop = FALSE])
705        }
706        if(anyNA(cols)) stop("undefined columns selected")
707        ## fix up names if we altered them.
708        if(!is.null(names(nm))) cols <- names(x) <- nm[cols]
709        ## sxx <- match(cols, names(xx)) fails with duplicate names
710        nxx <- structure(seq_along(xx), names=names(xx))
711        sxx <- match(nxx[j], seq_along(xx))
712    } else sxx <- seq_along(x)
713
714    rows <- NULL # placeholder: only create row names when needed
715                 # as this can be expensive.
716    if(is.character(i)) {
717        rows <- attr(xx, "row.names")
718        i <- pmatch(i, rows, duplicates.ok = TRUE)
719    }
720    for(j in seq_along(x)) {
721        xj <- xx[[ sxx[j] ]]
722        ## had drop = drop prior to 1.8.0
723        x[[j]] <- if(length(dim(xj)) != 2L) xj[i] else xj[i, , drop = FALSE]
724    }
725
726    if(drop) {
727	n <- length(x)
728	if(n == 1L) return(x[[1L]]) # drops attributes
729	if(n > 1L) {
730	    xj <- x[[1L]]
731	    nrow <- if(length(dim(xj)) == 2L) dim(xj)[1L] else length(xj)
732            ## for consistency with S: don't drop (to a list)
733            ## if only one row, unless explicitly asked for
734            drop <- !mdrop && nrow == 1L
735	} else drop <- FALSE ## for n == 0
736    }
737
738    if(!drop) { # not else as previous section might reset drop
739        ## row names might have NAs.
740        if(is.null(rows)) rows <- attr(xx, "row.names")
741        rows <- rows[i]
742	if((ina <- anyNA(rows)) | (dup <- anyDuplicated(rows))) {
743	    ## both will coerce integer 'rows' to character:
744	    if (!dup && is.character(rows)) dup <- "NA" %in% rows
745	    if(ina)
746		rows[is.na(rows)] <- "NA"
747	    if(dup)
748		rows <- make.unique(as.character(rows))
749	}
750        ## new in 1.8.0  -- might have duplicate columns
751	if(has.j && anyDuplicated(nm <- names(x)))
752            names(x) <- make.unique(nm)
753        if(is.null(rows)) rows <- attr(xx, "row.names")[i]
754	attr(x, "row.names") <- rows
755	oldClass(x) <- oldClass(xx)
756    }
757    x
758}
759
760`[[.data.frame` <- function(x, ..., exact=TRUE)
761{
762    ## use in-line functions to refer to the 1st and 2nd ... arguments
763    ## explicitly. Also will check for wrong number or empty args
764    na <- nargs() - !missing(exact)
765    if(!all(names(sys.call()) %in% c("", "exact")))
766        warning("named arguments other than 'exact' are discouraged")
767
768    if(na < 3L)
769	(function(x, i, exact)
770	  if(is.matrix(i)) as.matrix(x)[[i]]
771 	  else .subset2(x, i, exact=exact))(x, ..., exact=exact)
772    else {
773        col <- .subset2(x, ..2, exact=exact)
774        i <- if(is.character(..1))
775            pmatch(..1, row.names(x), duplicates.ok = TRUE)
776        else ..1
777        ## we do want to dispatch on methods for a column.
778        ## .subset2(col, i, exact=exact)
779        col[[i, exact = exact]]
780    }
781}
782
783`[<-.data.frame` <- function(x, i, j, value)
784{
785    if(!all(names(sys.call()) %in% c("", "value")))
786        warning("named arguments are discouraged")
787
788    nA <- nargs() # 'value' is never missing, so 3 or 4.
789    if(nA == 4L) { ## df[,] or df[i,] or df[, j] or df[i,j]
790	has.i <- !missing(i)
791	has.j <- !missing(j)
792    }
793    else if(nA == 3L) {
794        ## this collects both df[] and df[ind]
795        if (is.atomic(value) && !is.null(names(value)))
796            names(value) <- NULL
797        if(missing(i) && missing(j)) { # case df[]
798            i <- j <- NULL
799            has.i <- has.j <- FALSE
800            ## added in 1.8.0
801            if(is.null(value)) return(x[logical()])
802        } else { # case df[ind]
803            ## really ambiguous, but follow common use as if list
804            ## except for two column numeric matrix or full-sized logical matrix
805            if(is.numeric(i) && is.matrix(i) && ncol(i) == 2) {
806                # Rewrite i as a logical index
807                index <- rep.int(FALSE, prod(dim(x)))
808                dim(index) <- dim(x)
809                tryCatch(index[i] <- TRUE,
810                         error = function(e) stop(conditionMessage(e), call.=FALSE))
811                # Put values in the right order
812                o <- order(i[,2], i[,1])
813                N <- length(value)
814                if (length(o) %% N != 0L)
815                    warning("number of items to replace is not a multiple of replacement length")
816                if (N < length(o))
817                    value <- rep(value, length.out=length(o))
818                value <- value[o]
819                i <- index
820            }
821            if(is.logical(i) && is.matrix(i) && all(dim(i) == dim(x))) {
822                nreplace <- sum(i, na.rm=TRUE)
823                if(!nreplace) return(x) # nothing to replace
824                ## allow replication of length(value) > 1 in 1.8.0
825                N <- length(value)
826                if(N > 1L && N < nreplace && (nreplace %% N) == 0L)
827                    value <- rep(value, length.out = nreplace)
828                if(N > 1L && (length(value) != nreplace))
829                    stop("'value' is the wrong length")
830                n <- 0L
831                nv <- nrow(x)
832                for(v in seq_len(dim(i)[2L])) {
833                    thisvar <- i[, v, drop = TRUE]
834                    nv <- sum(thisvar, na.rm = TRUE)
835                    if(nv) {
836                        if(is.matrix(x[[v]]))
837                            x[[v]][thisvar, ] <- if(N > 1L) value[n+seq_len(nv)] else value
838                        else
839                            x[[v]][thisvar] <- if(N > 1L) value[n+seq_len(nv)] else value
840                    }
841                    n <- n+nv
842                }
843                return(x)
844            }  # end of logical matrix
845            if(is.matrix(i))
846                stop("unsupported matrix index in replacement")
847            j <- i
848            i <- NULL
849            has.i <- FALSE
850            has.j <- TRUE
851        }
852    }
853    else # nargs() <= 2
854	stop("need 0, 1, or 2 subscripts")
855
856    if ((has.j && !length(j)) ||	# "no", i.e. empty columns specified
857        (has.i && !length(i) && !has.j))# empty rows and no col.   specified
858	return(x)
859
860    cl <- oldClass(x)
861    ## delete class: S3 idiom to avoid any special methods for [[, etc
862    class(x) <- NULL
863    new.cols <- NULL
864    nvars <- length(x)
865    nrows <- .row_names_info(x, 2L)
866    if(has.i && length(i)) { # df[i, ] or df[i, j]
867        rows <- NULL  # indicator that it is not yet set
868        if(anyNA(i))
869            stop("missing values are not allowed in subscripted assignments of data frames")
870	if(char.i <- is.character(i)) {
871            rows <- attr(x, "row.names")
872	    ii <- match(i, rows)
873	    nextra <- sum(new.rows <- is.na(ii))
874	    if(nextra > 0L) {
875		ii[new.rows] <- seq.int(from = nrows + 1L, length.out = nextra)
876		new.rows <- i[new.rows]
877	    }
878	    i <- ii
879	}
880	if(!is.logical(i) &&
881	   (char.i && nextra  ||  all(i >= 0L) && (nn <- max(i)) > nrows)) {
882	    ## expand
883            if(is.null(rows)) rows <- attr(x, "row.names")
884	    if(!char.i) {
885		nrr <- (nrows + 1L):nn
886		if(inherits(value, "data.frame") &&
887		   (dim(value)[1L]) >= length(nrr)) {
888		    new.rows <- attr(value, "row.names")[seq_along(nrr)]
889		    repl <- duplicated(new.rows) | match(new.rows, rows, 0L)
890		    if(any(repl)) new.rows[repl] <- nrr[repl]
891		}
892		else new.rows <- nrr
893	    }
894	    x <- xpdrows.data.frame(x, rows, new.rows)
895	    rows <- attr(x, "row.names")
896	    nrows <- length(rows)
897	}
898	iseq <- seq_len(nrows)[i]
899	if(anyNA(iseq)) stop("non-existent rows not allowed")
900    }
901    else iseq <- NULL
902
903    if(has.j) {
904        if(anyNA(j))
905            stop("missing values are not allowed in subscripted assignments of data frames")
906	if(is.character(j)) {
907            if("" %in% j) stop("column name \"\" cannot match any column")
908	    jseq <- match(j, names(x))
909	    if(anyNA(jseq)) {
910		n <- is.na(jseq)
911		jseq[n] <- nvars + seq_len(sum(n))
912		new.cols <- j[n]
913	    }
914	}
915	else if(is.logical(j) || min(j) < 0L)
916	    jseq <- seq_along(x)[j]
917	else {
918	    jseq <- j
919	    if(max(jseq) > nvars) {
920		new.cols <- paste0("V",
921                                   seq.int(from = nvars + 1L, to = max(jseq)))
922		if(length(new.cols)  != sum(jseq > nvars))
923		    stop("new columns would leave holes after existing columns")
924                ## try to use the names of a list `value'
925                if(is.list(value) && !is.null(vnm <- names(value))) {
926                    p <- length(jseq)
927                    if(length(vnm) < p) vnm <- rep_len(vnm, p)
928                    new.cols <- vnm[jseq > nvars]
929                }
930	    }
931	}
932    }
933    else jseq <- seq_along(x)
934
935    ## empty rows and not (a *new* column as in  d[FALSE, "new"] <- val )  :
936    if(has.i && !length(iseq) && all(1L <= jseq & jseq <= nvars))
937	return(`class<-`(x, cl))
938
939    ## addition in 1.8.0
940    if(anyDuplicated(jseq))
941        stop("duplicate subscripts for columns")
942    n <- length(iseq)
943    if(n == 0L) n <- nrows
944    p <- length(jseq)
945    if (is.null(value)) {
946        value <- list(NULL)
947    }
948    m <- length(value)
949    if(!is.list(value)) {
950        if(p == 1L) {
951            N <- NROW(value)
952            if(N > n)
953                stop(sprintf(ngettext(N,
954                                      "replacement has %d row, data has %d",
955                                      "replacement has %d rows, data has %d"),
956                             N, n), domain = NA)
957            if(N < n && N > 0L)
958                if(n %% N == 0L && length(dim(value)) <= 1L)
959                    value <- rep(value, length.out = n)
960                else
961                    stop(sprintf(ngettext(N,
962                                          "replacement has %d row, data has %d",
963                                          "replacement has %d rows, data has %d"),
964                                 N, nrows), domain = NA)
965            if (!is.null(names(value))) names(value) <- NULL
966            value <- list(value)
967         } else {
968            if(m < n*p && (m == 0L || (n*p) %% m))
969                stop(sprintf(ngettext(m,
970                                      "replacement has %d item, need %d",
971                                      "replacement has %d items, need %d"),
972                             m, n*p), domain = NA)
973            value <- matrix(value, n, p)  ## will recycle
974            ## <FIXME split.matrix>
975            value <- split(c(value), col(value))
976        }
977	dimv <- c(n, p)
978    } else { # a list
979        ## careful, as.data.frame turns things into factors.
980	## value <- as.data.frame(value)
981        value <- unclass(value) # to avoid data frame indexing
982        lens <- vapply(value, NROW, 1L)
983        for(k in seq_along(lens)) {
984            N <- lens[k]
985            if(n != N && length(dim(value[[k]])) == 2L)
986                stop(sprintf(ngettext(N,
987                                      "replacement element %d is a matrix/data frame of %d row, need %d",
988                                      "replacement element %d is a matrix/data frame of %d rows, need %d"),
989                             k, N, n),
990                     domain = NA)
991            if(N > 0L && N < n && n %% N)
992                stop(sprintf(ngettext(N,
993                                      "replacement element %d has %d row, need %d",
994                                      "replacement element %d has %d rows, need %d"),
995                             k, N, n), domain = NA)
996            ## these fixing-ups will not work for matrices
997            if(N > 0L && N < n) value[[k]] <- rep(value[[k]], length.out = n)
998            if(N > n) {
999                warning(sprintf(ngettext(N,
1000                                         "replacement element %d has %d row to replace %d rows",
1001                                         "replacement element %d has %d rows to replace %d rows"),
1002                                k, N, n), domain = NA)
1003                value[[k]] <- value[[k]][seq_len(n)]
1004            }
1005        }
1006	dimv <- c(n, length(value))
1007    }
1008    nrowv <- dimv[1L]
1009    if(nrowv < n && nrowv > 0L) {
1010	if(n %% nrowv == 0L)
1011	    value <- value[rep_len(seq_len(nrowv), n),,drop = FALSE]
1012	else
1013            stop(sprintf(ngettext(nrowv,
1014                                  "%d row in value to replace %d rows",
1015                                  "%d rows in value to replace %d rows"),
1016                         nrowv, n), domain = NA)
1017    }
1018    else if(nrowv > n)
1019        warning(sprintf(ngettext(nrowv,
1020                                 "replacement data has %d row to replace %d rows",
1021                                 "replacement data has %d rows to replace %d rows"),
1022                        nrowv, n), domain = NA)
1023    ncolv <- dimv[2L]
1024    jvseq <- seq_len(p)
1025    if(ncolv < p) jvseq <- rep_len(seq_len(ncolv), p)
1026    else if(p != 0L && ncolv > p) {
1027        warning(sprintf(ngettext(ncolv,
1028                                 "provided %d variable to replace %d variables",
1029                                 "provided %d variables to replace %d variables"),
1030                        ncolv, p), domain = NA)
1031        new.cols <- new.cols[seq_len(p)]
1032    }
1033    if(length(new.cols)) {
1034        ## extend and name now, as assignment of NULL may delete cols later.
1035        nm <- names(x)
1036        rows <- .row_names_info(x, 0L)
1037        a <- attributes(x); a["names"] <- NULL
1038        x <- c(x, vector("list", length(new.cols)))
1039        attributes(x) <- a
1040        names(x) <- c(nm, new.cols)
1041        attr(x, "row.names") <- rows
1042    }
1043    if(has.i)
1044	for(jjj in seq_len(p)) {
1045	    jj <- jseq[jjj]
1046	    vjj <- value[[ jvseq[[jjj]] ]]
1047            if(jj <= nvars) {
1048                ## if a column exists, preserve its attributes
1049                if(length(dim(x[[jj]])) != 2L)
1050                     x[[jj]][iseq  ] <- vjj
1051                else x[[jj]][iseq, ] <- vjj
1052            } else {
1053                ## try to make a new column match in length: may be an error
1054                x[[jj]] <- vjj[FALSE]
1055                if(length(dim(vjj)) == 2L) {
1056                    length(x[[jj]]) <- nrows * ncol(vjj)
1057                    dim(x[[jj]])  <- c(nrows,  ncol(vjj))
1058                    x[[jj]][iseq, ] <- vjj
1059                } else {
1060                    length(x[[jj]]) <- nrows
1061                    x[[jj]][iseq] <- vjj
1062                }
1063            }
1064	}
1065    else if(p > 0L)
1066      for(jjj in p:1L) { # we might delete columns with NULL
1067        ## ... and for that reason, we'd better ensure that jseq is increasing!
1068        o <- order(jseq)
1069        jseq <- jseq[o]
1070        jvseq <- jvseq[o]
1071
1072        jj <- jseq[jjj]
1073        v <- value[[ jvseq[[jjj]] ]]
1074        ## This is consistent with the have.i case rather than with
1075        ## [[<- and $<- (which throw an error).  But both are plausible.
1076        if (!is.null(v) && nrows > 0L && !length(v)) length(v) <- nrows
1077	x[[jj]] <- v
1078        if (!is.null(v) && is.atomic(x[[jj]]) && !is.null(names(x[[jj]])))
1079            names(x[[jj]]) <- NULL
1080    }
1081    if(length(new.cols) > 0L) {
1082        new.cols <- names(x) # we might delete columns with NULL
1083        ## added in 1.8.0
1084        if(anyDuplicated(new.cols)) names(x) <- make.unique(new.cols)
1085    }
1086    class(x) <- cl
1087    x
1088}
1089
1090`[[<-.data.frame` <- function(x, i, j, value)
1091{
1092    if(!all(names(sys.call()) %in% c("", "value")))
1093        warning("named arguments are discouraged")
1094
1095    cl <- oldClass(x)
1096    ## delete class: Version 3 idiom
1097    ## to avoid any special methods for [[<-
1098    class(x) <- NULL
1099    nrows <- .row_names_info(x, 2L)
1100    if(is.atomic(value) && !is.null(names(value))) names(value) <- NULL
1101    if(nargs() < 4L) {
1102	## really ambiguous, but follow common use as if list
1103        nc <- length(x)
1104	if(!is.null(value)) {
1105            N <- NROW(value)
1106            if(N > nrows)
1107                stop(sprintf(ngettext(N,
1108                                      "replacement has %d row, data has %d",
1109                                      "replacement has %d rows, data has %d"),
1110                             N, nrows), domain = NA)
1111            if(N < nrows)
1112                if(N > 0L && (nrows %% N == 0L) && length(dim(value)) <= 1L)
1113                    value <- rep(value, length.out = nrows)
1114                else
1115                    stop(sprintf(ngettext(N,
1116                                          "replacement has %d row, data has %d",
1117                                          "replacement has %d rows, data has %d"),
1118                                 N, nrows), domain = NA)
1119	}
1120	x[[i]] <- value
1121        ## added in 1.8.0 -- make sure there is a name
1122        if(length(x) > nc) {
1123            nc <- length(x)
1124            if(names(x)[nc] == "") names(x)[nc] <- paste0("V", nc)
1125            names(x) <- make.unique(names(x))
1126        }
1127	class(x) <- cl
1128	return(x)
1129    }
1130    if(missing(i) || missing(j))
1131	stop("only valid calls are x[[j]] <- value or x[[i,j]] <- value")
1132    rows <- attr(x, "row.names")
1133    nvars <- length(x)
1134    if(n <- is.character(i)) {
1135	ii <- match(i, rows)
1136	n <- sum(new.rows <- is.na(ii))
1137	if(n > 0L) {
1138	    ii[new.rows] <- seq.int(from = nrows + 1L, length.out = n)
1139	    new.rows <- i[new.rows]
1140	}
1141	i <- ii
1142    }
1143    if(all(i >= 0L) && (nn <- max(i)) > nrows) {
1144	## expand
1145	if(n == 0L) {
1146	    nrr <- (nrows + 1L):nn
1147	    if(inherits(value, "data.frame") &&
1148	       (dim(value)[1L]) >= length(nrr)) {
1149		new.rows <- attr(value, "row.names")[seq_len(nrr)]
1150		repl <- duplicated(new.rows) | match(new.rows, rows, 0L)
1151		if(any(repl)) new.rows[repl] <- nrr[repl]
1152	    }
1153	    else new.rows <- nrr
1154	}
1155	x <- xpdrows.data.frame(x, rows, new.rows)
1156	rows <- attr(x, "row.names")
1157	nrows <- length(rows)
1158    }
1159
1160    ## FIXME: this is wasteful and probably unnecessary
1161    iseq <- seq_len(nrows)[i]
1162    if(anyNA(iseq))
1163	stop("non-existent rows not allowed")
1164
1165    if(is.character(j)) {
1166        if("" %in% j) stop("column name \"\" cannot match any column")
1167	jseq <- match(j, names(x))
1168	if(anyNA(jseq))
1169            stop(gettextf("replacing element in non-existent column: %s",
1170                          j[is.na(jseq)]), domain = NA)
1171    }
1172    else if(is.logical(j) || min(j) < 0L)
1173	jseq <- seq_along(x)[j]
1174    else {
1175	jseq <- j
1176	if(max(jseq) > nvars)
1177            stop(gettextf("replacing element in non-existent column: %s",
1178                          jseq[jseq > nvars]), domain = NA)
1179    }
1180    if(length(iseq) > 1L || length(jseq) > 1L)
1181	stop("only a single element should be replaced")
1182    x[[jseq]][[iseq]] <- value
1183    class(x) <- cl
1184    x
1185}
1186
1187## added in 1.8.0
1188`$<-.data.frame` <- function(x, name, value)
1189{
1190    cl <- oldClass(x)
1191    ## delete class: Version 3 idiom
1192    ## to avoid any special methods for [[<-
1193    ## This forces a copy, but we are going to need one anyway
1194    ## and NAMED=1 prevents any further copying.
1195    class(x) <- NULL
1196    nrows <- .row_names_info(x, 2L)
1197    if(!is.null(value)) {
1198        N <- NROW(value)
1199        if(N > nrows)
1200            stop(sprintf(ngettext(N,
1201                                  "replacement has %d row, data has %d",
1202                                  "replacement has %d rows, data has %d"),
1203                         N, nrows), domain = NA)
1204        if (N < nrows)
1205            if (N > 0L && (nrows %% N == 0L) && length(dim(value)) <= 1L)
1206                value <- rep(value, length.out = nrows)
1207            else
1208                stop(sprintf(ngettext(N,
1209                                      "replacement has %d row, data has %d",
1210                                      "replacement has %d rows, data has %d"),
1211                             N, nrows), domain = NA)
1212        if(is.atomic(value) && !is.null(names(value))) names(value) <- NULL
1213    }
1214    x[[name]] <- value
1215    class(x) <- cl
1216    return(x)
1217}
1218
1219
1220xpdrows.data.frame <- function(x, old.rows, new.rows)
1221{
1222    nc <- length(x)
1223    nro <- length(old.rows)
1224    nrn <- length(new.rows)
1225    nr <- nro + nrn
1226    for (i in seq_len(nc)) {
1227	y <- x[[i]]
1228	dy <- dim(y)
1229	cy <- oldClass(y)
1230	class(y) <- NULL
1231	if (length(dy) == 2L) {
1232	    dny <- dimnames(y)
1233	    if (length(dny[[1L]]) > 0L)
1234		dny[[1L]] <- c(dny[[1L]], new.rows)
1235	    z <- array(y[1L], dim = c(nr, nc), dimnames = dny)
1236	    z[seq_len(nro), ] <- y
1237	    class(z) <- cy
1238	    x[[i]] <- z
1239	}
1240	else {
1241	    ay <- attributes(y)
1242	    if (length(names(y)) > 0L)
1243		ay$names <- c(ay$names, new.rows)
1244	    length(y) <- nr
1245	    attributes(y) <- ay
1246	    class(y) <- cy
1247	    x[[i]] <- y
1248	}
1249    }
1250    nm <- c(old.rows, new.rows)
1251    if (any(duplicated(nm))) nm <- make.unique(as.character(nm))
1252    attr(x, "row.names") <- nm
1253    x
1254}
1255
1256
1257### Here are the methods for rbind and cbind.
1258
1259cbind.data.frame <- function(..., deparse.level = 1)
1260    data.frame(..., check.names = FALSE)
1261
1262rbind.data.frame <- function(..., deparse.level = 1, make.row.names = TRUE,
1263                             stringsAsFactors = FALSE,
1264                             factor.exclude = TRUE)
1265{
1266    match.names <- function(clabs, nmi)
1267    {
1268	if(identical(clabs, nmi)) NULL
1269	else if(length(nmi) == length(clabs) && all(nmi %in% clabs)) {
1270            ## we need 1-1 matches here
1271	    m <- pmatch(nmi, clabs, 0L)
1272            if(any(m == 0L))
1273                stop("names do not match previous names")
1274            m
1275	} else stop("names do not match previous names")
1276    }
1277    allargs <- list(...)
1278    allargs <- allargs[lengths(allargs) > 0L]
1279    if(length(allargs)) {
1280        ## drop any zero-row data frames, as they may not have proper column
1281        ## types (e.g. NULL).
1282        nr <- vapply(allargs, function(x)
1283                     if(is.data.frame(x)) .row_names_info(x, 2L)
1284                     else if(is.list(x)) length(x[[1L]])
1285					# mismatched lists are checked later
1286                     else length(x), 1L)
1287	if(any(n0 <- nr == 0L)) {
1288	    if(all(n0)) return(allargs[[1L]]) # pretty arbitrary
1289	    allargs <- allargs[!n0]
1290	}
1291    }
1292    n <- length(allargs)
1293    if(n == 0L)
1294	return(list2DF())
1295    nms <- names(allargs)
1296    if(is.null(nms))
1297	nms <- character(n)
1298    cl <- NULL
1299    perm <- rows <- vector("list", n)
1300    if(make.row.names) {
1301	rlabs <- rows
1302	autoRnms <- TRUE # result with 1:nrow(.) row names? [efficiency!]
1303	Make.row.names <- function(nmi, ri, ni, nrow)
1304	{
1305	    if(nzchar(nmi)) {
1306		if(autoRnms) autoRnms <<- FALSE
1307		if(ni == 0L) character()  # PR#8506
1308		else if(ni > 1L) paste(nmi, ri, sep = ".")
1309		else nmi
1310	    }
1311	    else if(autoRnms && nrow > 0L && identical(ri, seq_len(ni)))
1312		as.integer(seq.int(from = nrow + 1L, length.out = ni))
1313	    else {
1314		if(autoRnms && (nrow > 0L || !identical(ri, seq_len(ni))))
1315		    autoRnms <<- FALSE
1316		ri
1317	    }
1318	}
1319    }
1320    smartX <- isTRUE(factor.exclude)
1321
1322    ## check the arguments, develop row and column labels
1323    nrow <- 0L
1324    value <- clabs <- NULL
1325    all.levs <- list()
1326    for(i in seq_len(n)) { ## check and treat arg [[ i ]]  -- part 1
1327	xi <- allargs[[i]]
1328	nmi <- nms[i]
1329        ## coerce matrix to data frame
1330        if(is.matrix(xi)) allargs[[i]] <- xi <-
1331            as.data.frame(xi, stringsAsFactors = stringsAsFactors)
1332	if(inherits(xi, "data.frame")) {
1333	    if(is.null(cl))
1334		cl <- oldClass(xi)
1335	    ri <- attr(xi, "row.names")
1336	    ni <- length(ri)
1337	    if(is.null(clabs)) ## first time
1338		clabs <- names(xi)
1339	    else {
1340                if(length(xi) != length(clabs))
1341                    stop("numbers of columns of arguments do not match")
1342		pi <- match.names(clabs, names(xi))
1343		if( !is.null(pi) ) perm[[i]] <- pi
1344	    }
1345	    rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni)
1346	    if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
1347	    nrow <- nrow + ni
1348	    if(is.null(value)) { ## first time ==> setup once:
1349		value <- unclass(xi)
1350		nvar <- length(value)
1351		all.levs <- vector("list", nvar)
1352		has.dim <- facCol <- ordCol <- logical(nvar)
1353		if(smartX) NA.lev <- ordCol
1354		for(j in seq_len(nvar)) {
1355		    xj <- value[[j]]
1356                    facCol[j] <- fac <-
1357                        if(!is.null(lj <- levels(xj))) {
1358                            all.levs[[j]] <- lj
1359                            TRUE # turn categories into factors
1360                        } else
1361                            is.factor(xj)
1362		    if(fac) {
1363			ordCol[j] <- is.ordered(xj)
1364			if(smartX && !NA.lev[j])
1365			    NA.lev[j] <- anyNA(lj)
1366		    }
1367		    has.dim[j] <- length(dim(xj)) == 2L
1368		}
1369	    }
1370	    else for(j in seq_len(nvar)) {
1371                xij <- xi[[j]]
1372                if(is.null(pi) || is.na(jj <- pi[[j]])) jj <- j
1373                if(facCol[jj]) {
1374                    if(length(lij <- levels(xij))) {
1375                        all.levs[[jj]] <- unique(c(all.levs[[jj]], lij))
1376			if(ordCol[jj])
1377			    ordCol[jj] <- is.ordered(xij)
1378			if(smartX && !NA.lev[jj])
1379			    NA.lev[jj] <- anyNA(lij)
1380                    } else if(is.character(xij))
1381                        all.levs[[jj]] <- unique(c(all.levs[[jj]], xij))
1382                }
1383            }
1384	} ## end{data.frame}
1385	else if(is.list(xi)) {
1386	    ni <- range(lengths(xi))
1387	    if(ni[1L] == ni[2L])
1388		ni <- ni[1L]
1389	    else stop("invalid list argument: all variables should have the same length")
1390            ri <- seq_len(ni)
1391	    rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni)
1392	    if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
1393	    nrow <- nrow + ni
1394	    if(length(nmi <- names(xi)) > 0L) {
1395		if(is.null(clabs))
1396		    clabs <- nmi
1397		else {
1398                    if(length(xi) != length(clabs))
1399                        stop("numbers of columns of arguments do not match")
1400		    pi <- match.names(clabs, nmi)
1401		    if( !is.null(pi) ) perm[[i]] <- pi
1402		}
1403	    }
1404	}
1405	else if(length(xi)) { # 1 new row
1406	    rows[[i]] <- nrow <- nrow + 1L
1407            if(make.row.names)
1408		rlabs[[i]] <- if(nzchar(nmi)) nmi else as.integer(nrow)
1409	}
1410    } # for(i .)
1411
1412    nvar <- length(clabs)
1413    if(nvar == 0L)
1414	nvar <- max(lengths(allargs)) # only vector args
1415    if(nvar == 0L)
1416	return(list2DF())
1417    pseq <- seq_len(nvar)
1418    if(is.null(value)) { # this happens if there has been no data frame
1419	value <- list()
1420	value[pseq] <- list(logical(nrow)) # OK for coercion except to raw.
1421        all.levs <- vector("list", nvar)
1422	has.dim <- facCol <- ordCol <- logical(nvar)
1423	if(smartX) NA.lev <- ordCol
1424    }
1425    names(value) <- clabs
1426    for(j in pseq)
1427	if(length(lij <- all.levs[[j]]))
1428            value[[j]] <-
1429		factor(as.vector(value[[j]]), levels = lij,
1430		       exclude = if(smartX) {
1431				     if(!NA.lev[j]) NA # else NULL
1432				 } else factor.exclude,
1433		       ordered = ordCol[j])
1434
1435    if(any(has.dim)) { # some col's are matrices or d.frame's
1436        jdim <- pseq[has.dim]
1437        if(!all(df <- vapply(jdim, function(j) inherits(value[[j]],"data.frame"), NA))) {
1438            ## Ensure matrix columns can be filled in  for(i ...) below
1439            rmax <- max(unlist(rows))
1440            for(j in jdim[!df]) {
1441		dn <- dimnames(vj <- value[[j]])
1442		rn <- dn[[1L]]
1443		if(length(rn) > 0L) length(rn) <- rmax
1444		pj <- dim(vj)[2L]
1445		length(vj) <- rmax * pj
1446		value[[j]] <- array(vj, c(rmax, pj), list(rn, dn[[2L]]))
1447	    }
1448        }
1449    }
1450
1451    for(i in seq_len(n)) { ## add arg [[i]] to result
1452	xi <- unclass(allargs[[i]])
1453	if(!is.list(xi))
1454	    if(length(xi) != nvar)
1455		xi <- rep(xi, length.out = nvar)
1456	ri <- rows[[i]]
1457	pi <- perm[[i]]
1458	if(is.null(pi)) pi <- pseq
1459	for(j in pseq) {
1460	    jj <- pi[j]
1461            xij <- xi[[j]]
1462	    if(has.dim[jj]) {
1463		value[[jj]][ri,	 ] <- xij
1464                ## copy rownames
1465                if(!is.null(r <- rownames(xij)) &&
1466                   !(inherits(xij, "data.frame") &&
1467                     .row_names_info(xij) <= 0))
1468                    rownames(value[[jj]])[ri] <- r
1469	    } else {
1470                ## coerce factors to vectors, in case lhs is character or
1471                ## level set has changed
1472                value[[jj]][ri] <- if(is.factor(xij)) as.vector(xij) else xij
1473                ## copy names if any
1474                if(!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm
1475            }
1476	}
1477    }
1478    rlabs <- if(make.row.names && !autoRnms) {
1479		 rlabs <- unlist(rlabs)
1480		 if(anyDuplicated(rlabs))
1481		     make.unique(as.character(rlabs), sep = "")
1482		 else
1483		     rlabs
1484	     } # else NULL
1485    if(is.null(cl)) {
1486	as.data.frame(value, row.names = rlabs, fix.empty.names = TRUE,
1487		      stringsAsFactors = stringsAsFactors)
1488    } else {
1489	structure(value, class = cl,
1490		  row.names = if(is.null(rlabs)) .set_row_names(nrow) else rlabs)
1491    }
1492}
1493
1494
1495### coercion and print methods
1496
1497print.data.frame <-
1498    function(x, ..., digits = NULL, quote = FALSE, right = TRUE,
1499	     row.names = TRUE, max = NULL)
1500{
1501    n <- length(row.names(x))
1502    if(length(x) == 0L) {
1503	cat(sprintf(ngettext(n, "data frame with 0 columns and %d row",
1504			     "data frame with 0 columns and %d rows"),
1505		    n), "\n", sep = "")
1506    } else if(n == 0L) {
1507        ## FIXME: header format is inconsistent here
1508	print.default(names(x), quote = FALSE)
1509	cat(gettext("<0 rows> (or 0-length row.names)\n"))
1510    } else {
1511	if(is.null(max)) max <- getOption("max.print", 99999L)
1512        if(!is.finite(max)) stop("invalid 'max' / getOption(\"max.print\"): ", max)
1513	## format.<*>() : avoiding picking up e.g. format.AsIs
1514	omit <- (n0 <- max %/% length(x)) < n
1515	m <- as.matrix(
1516	    format.data.frame(if(omit) x[seq_len(n0), , drop=FALSE] else x,
1517			      digits = digits, na.encode = FALSE))
1518	if(!isTRUE(row.names))
1519	    dimnames(m)[[1L]] <-
1520		if(isFALSE(row.names)) rep.int("", if(omit) n0 else n)
1521		else row.names
1522	print(m, ..., quote = quote, right = right, max = max)
1523	if(omit)
1524	    cat(" [ reached 'max' / getOption(\"max.print\") -- omitted",
1525		n - n0, "rows ]\n")
1526    }
1527    invisible(x)
1528}
1529
1530as.matrix.data.frame <- function (x, rownames.force = NA, ...)
1531{
1532    dm <- dim(x)
1533    rn <- if(rownames.force %in% FALSE) NULL
1534	  else if(rownames.force %in% TRUE || .row_names_info(x) > 0L)
1535              row.names(x) # else NULL
1536    dn <- list(rn, names(x))
1537    if(any(dm == 0L))
1538	return(array(NA, dim = dm, dimnames = dn))
1539    p <- dm[2L] # >= 1
1540    pseq <- seq_len(p)
1541    n <- dm[1L]
1542    X <- unclass(x) # will contain the result;
1543    ## the "big question" is if we return a numeric or a character matrix
1544    non.numeric <- non.atomic <- FALSE
1545    all.logical <- TRUE
1546    for (j in pseq) {
1547	xj <- X[[j]]
1548	if(inherits(xj, "data.frame"))# && ncol(xj) > 1L)
1549	    X[[j]] <- xj <- as.matrix(xj)
1550        j.logic <- is.logical(xj)
1551        if(all.logical && !j.logic) all.logical <- FALSE
1552	if(length(levels(xj)) > 0L || !(j.logic || is.numeric(xj) || is.complex(xj))
1553	   || (!is.null(cl <- attr(xj, "class")) && # numeric classed objects to format:
1554	       any(cl %in% c("Date", "POSIXct", "POSIXlt"))))
1555	    non.numeric <- TRUE
1556	if(!is.atomic(xj) && !inherits(xj, "POSIXlt"))
1557	    non.atomic <- TRUE
1558    }
1559    if(non.atomic) {
1560	for (j in pseq) {
1561	    xj <- X[[j]]
1562	    if(!is.recursive(xj))
1563		X[[j]] <- as.list(as.vector(xj))
1564	}
1565    } else if(all.logical) {
1566        ## do nothing for logical columns if a logical matrix will result.
1567    } else if(non.numeric) {
1568	for (j in pseq) {
1569	    if (is.character(X[[j]]))
1570		next
1571	    else if(is.logical(xj <- X[[j]]))
1572		xj <- as.character(xj) # not format(), takes care of NAs too
1573	    else {
1574		miss <- is.na(xj)
1575		xj <- if(length(levels(xj))) as.vector(xj) else format(xj)
1576		is.na(xj) <- miss
1577	    }
1578            X[[j]] <- xj
1579	}
1580    }
1581    ## These coercions could have changed the number of columns
1582    ## (e.g. class "Surv" coerced to character),
1583    ## so only now can we compute collabs.
1584    collabs <- as.list(dn[[2L]])
1585    for (j in pseq) {
1586        xj <- X[[j]]
1587        dj <- dim(xj)
1588        if(length(dj) == 2L && dj[2L] > 0L) { # matrix with > 0 col
1589            if(!length(dnj <- colnames(xj))) dnj <- seq_len(dj[2L])
1590            collabs[[j]] <-
1591                if(length(collabs)) {
1592                    if(dj[2L] > 1L)
1593                        paste(collabs[[j]], dnj, sep = ".")
1594                    else if(is.character(collabs[[j]])) collabs[[j]]
1595                    else dnj
1596                }
1597                else dnj
1598        }
1599    }
1600    nc <- vapply(X, NCOL, numeric(1), USE.NAMES=FALSE)
1601    X <- unlist(X, recursive = FALSE, use.names = FALSE)
1602    dim(X) <- c(n, length(X)/n)
1603    dimnames(X) <- list(dn[[1L]], unlist(collabs[nc > 0], use.names = FALSE))
1604    X
1605}
1606
1607Math.data.frame <- function (x, ...)
1608{
1609    mode.ok <- vapply(x, function(x)
1610        is.numeric(x) || is.logical(x) || is.complex(x), NA)
1611    if (all(mode.ok)) {
1612	x[] <- lapply(X = x, FUN = .Generic, ...)
1613	return(x)
1614    } else {
1615	vnames <- names(x)
1616	if (is.null(vnames)) vnames <- seq_along(x)
1617	stop("non-numeric-alike variable(s) in data frame: ",
1618	     paste(vnames[!mode.ok], collapse = ", "))
1619    }
1620}
1621
1622Ops.data.frame <- function(e1, e2 = NULL)
1623{
1624    unary <- nargs() == 1L
1625    lclass <- nzchar(.Method[1L])
1626    rclass <- !unary && (nzchar(.Method[2L]))
1627    value <- list()
1628    rn <- NULL
1629    ## set up call as op(left, right)
1630    ## These are used, despite
1631    ## _R_CHECK_CODETOOLS_PROFILE_="suppressLocalUnused=FALSE"
1632    FUN <- get(.Generic, envir = parent.frame(), mode = "function")
1633    f <- if (unary) quote(FUN(left)) else quote(FUN(left, right))
1634    lscalar <- rscalar <- FALSE
1635    if(lclass && rclass) {
1636        nr <- .row_names_info(e1, 2L)
1637	if(.row_names_info(e1) > 0L) rn <- attr(e1, "row.names")
1638	cn <- names(e1)
1639	if(any(dim(e2) != dim(e1)))
1640	    stop(gettextf("%s only defined for equally-sized data frames",
1641                          sQuote(.Generic)), domain = NA)
1642    } else if(lclass) {
1643	## e2 is not a data frame, but e1 is.
1644        nr <- .row_names_info(e1, 2L)
1645	if(.row_names_info(e1) > 0L) rn <- attr(e1, "row.names")
1646	cn <- names(e1)
1647	rscalar <- length(e2) <= 1L # e2 might be null
1648	if(is.list(e2)) {
1649	    if(rscalar) e2 <- e2[[1L]]
1650	    else if(length(e2) != ncol(e1))
1651		stop(gettextf("list of length %d not meaningful", length(e2)),
1652                     domain = NA)
1653	} else {
1654	    if(!rscalar)
1655		e2 <- split(rep_len(as.vector(e2), prod(dim(e1))),
1656			    rep.int(seq_len(ncol(e1)),
1657                                    rep.int(nrow(e1), ncol(e1))))
1658	}
1659    } else {
1660	## e1 is not a data frame, but e2 is.
1661        nr <- .row_names_info(e2, 2L)
1662	if(.row_names_info(e2) > 0L) rn <- attr(e2, "row.names")
1663	cn <- names(e2)
1664	lscalar <- length(e1) <= 1L
1665	if(is.list(e1)) {
1666	    if(lscalar) e1 <- e1[[1L]]
1667	    else if(length(e1) != ncol(e2))
1668		stop(gettextf("list of length %d not meaningful", length(e1)),
1669                     domain = NA)
1670	} else {
1671	    if(!lscalar)
1672		e1 <- split(rep_len(as.vector(e1), prod(dim(e2))),
1673			    rep.int(seq_len(ncol(e2)),
1674                                    rep.int(nrow(e2), ncol(e2))))
1675	}
1676    }
1677    for(j in seq_along(cn)) {
1678	left  <- if(!lscalar) e1[[j]] else e1
1679	right <- if(!rscalar) e2[[j]] else e2
1680	value[[j]] <- eval(f)
1681    }
1682    if(.Generic %in% c("+","-","*","^","%%","%/%","/")) {## == 'Arith'
1683	if(length(value)) {
1684	    names(value) <- cn
1685	    data.frame(value, row.names = rn, check.names = FALSE)
1686	} else
1687	    data.frame(       row.names = rn, check.names = FALSE)
1688    }
1689    else { ## 'Logic' ("&","|")  and  'Compare' ("==",">","<","!=","<=",">=") :
1690	value <- unlist(value, recursive = FALSE, use.names = FALSE)
1691	if(!length(value))
1692	    matrix(logical(), nrow = nr, ncol = length(cn), dimnames = list(rn,cn))
1693	else # nrow + possibly recycled value determine dim:
1694	    matrix(value, nrow = nr, dimnames = list(rn,cn))
1695    }
1696}
1697
1698Summary.data.frame <- function(..., na.rm)
1699{
1700    args <- list(...)
1701    args <- lapply(args, function(x) {
1702        x <- as.matrix(x)
1703        if(!is.numeric(x) && !is.logical(x) && !is.complex(x))
1704            stop("only defined on a data frame with all numeric-alike variables")
1705        x
1706    })
1707    do.call(.Generic, c(args, na.rm=na.rm))
1708}
1709
1710xtfrm.data.frame <- function(x) {
1711    if(tolower(Sys.getenv("_R_STOP_ON_XTFRM_DATA_FRAME_")) %in%
1712       c("1", "yes", "true"))
1713        stop("cannot xtfrm data frames")
1714    else {
1715        warning("cannot xtfrm data frames")
1716        NextMethod("xtfrm")
1717    }
1718}
1719
1720list2DF <-
1721function(x = list(), nrow = NULL)
1722{
1723    stopifnot(is.list(x), is.null(nrow) || nrow >= 0L)
1724    if(n <- length(x)) {
1725        if(is.null(nrow))
1726            nrow <- max(lengths(x), 0L)
1727        x <- lapply(x, rep_len, nrow)
1728    } else {
1729        if(is.null(nrow))
1730            nrow <- 0L
1731    }
1732    if(is.null(names(x)))
1733        names(x) <- character(n)
1734    class(x) <- "data.frame"
1735    attr(x, "row.names") <- .set_row_names(nrow)
1736    x
1737}
1738