1# File src/library/base/R/dataframe.R 2# Part of the R package, https://www.R-project.org 3# 4# This program is free software; you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation; either version 2 of the License, or 7# (at your option) any later version. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# A copy of the GNU General Public License is available at 15# https://www.R-project.org/Licenses/ 16 17# Statlib code by John Chambers, Bell Labs, 1994 18# Changes Copyright (C) 1998-2021 The R Core Team 19 20 21## As from R 2.4.0, row.names can be either character or integer. 22## row.names() will always return character. 23## attr(, "row.names") will return either character or integer. 24## 25## Do not assume that the internal representation is either, since 26## 1L:n is stored as the integer vector c(NA, n) to save space (and 27## the C-level code to get/set the attribute makes the appropriate 28## translations. 29## 30## As from 2.5.0 c(NA, n > 0) indicates deliberately assigned row names, 31## and c(NA, n < 0) automatic row names. 32 33## We cannot allow long vectors as elements until we can handle 34## duplication of row names. 35 36.row_names_info <- function(x, type = 1L) 37 .Internal(shortRowNames(x, type)) 38 39row.names <- function(x) UseMethod("row.names") 40row.names.data.frame <- function(x) as.character(attr(x, "row.names")) 41row.names.default <- function(x) if(!is.null(dim(x))) rownames(x)# else NULL 42 43.set_row_names <- function(n) 44 if(n > 0) c(NA_integer_, -n) else integer() 45 46 47##_H Hack around the fact that other packages fail with a newly improved `row.names<-`: 48##_H 49##_H `row.names<-` <- function(x, make.names = FALSE, value) UseMethod("row.names<-") 50`row.names<-` <- function(x, value) UseMethod("row.names<-") 51 52##_H `row.names<-.data.frame` <- 53`.rowNamesDF<-` <- function(x, make.names = FALSE, value) 54{ 55 if (!is.data.frame(x)) x <- as.data.frame(x) 56 n <- .row_names_info(x, 2L) 57 if(is.null(value)) { # set automatic row.names 58 attr(x, "row.names") <- .set_row_names(n) 59 return(x) 60 } 61 ## do this here, as e.g. POSIXlt changes length when coerced. 62 if( is.object(value) || !is.integer(value) ) 63 value <- as.character(value) 64 if(n == 0L) { 65 ## we have to be careful here. This could be a 66 ## 0-row data frame or an invalid one being constructed. 67 if(!is.null(attr(x, "row.names")) && length(value) > 0L) 68 stop("invalid 'row.names' length") 69 } 70 else if (length(value) != n) { 71 if(isFALSE(make.names)) stop("invalid 'row.names' length") 72 else if(is.na(make.names)) { # automatic row.names 73 attr(x, "row.names") <- .set_row_names(n) 74 return(x) 75 } 76 else if(!isTRUE(make.names)) stop("invalid 'make.names'") 77 ## else make.names = TRUE: amend 'value' to correct ones: 78 else if((nv <- length(value)) < n) 79 value <- c(value, rep_len(value[nv], n-nv)) 80 else # length(value) > n 81 value <- value[seq_len(n)] 82 } 83 if (anyDuplicated(value)) { 84 if(isFALSE(make.names)) { 85 nonuniq <- sort(unique(value[duplicated(value)])) 86 ## warning + stop ?? FIXME: s/warning/stop/ and drop (2nd) stop ?? 87 warning(ngettext(length(nonuniq), 88 sprintf("non-unique value when setting 'row.names': %s", 89 sQuote(nonuniq[1L])), 90 sprintf("non-unique values when setting 'row.names': %s", 91 paste(sQuote(nonuniq), collapse = ", "))), 92 domain = NA, call. = FALSE) 93 stop("duplicate 'row.names' are not allowed") 94 } 95 else if(is.na(make.names)) { # automatic row.names 96 value <- .set_row_names( # find nrow(.) in case 'n' is not usable: 97 if(n == 0L && is.null(.row_names_info(x, 0L)) && length(x) > 0L) 98 length(x[[1L]]) 99 else n) 100 } 101 else if(!isTRUE(make.names)) stop("invalid 'make.names'") 102 else # make.names = TRUE: amend 'value' to correct ones: 103 value <- make.names(value, unique=TRUE) 104 ## NB: 'value' is now guaranteed to have no NA's ==> can use 'else if' : 105 } 106 else if (anyNA(value)) { 107 if(isFALSE(make.names)) 108 stop("missing values in 'row.names' are not allowed") 109 if(is.na(make.names)) # automatic row.names 110 value <- .set_row_names(n) 111 else if(!isTRUE(make.names)) stop("invalid 'make.names'") 112 else # make.names = TRUE: amend 'value' to correct ones: 113 value <- make.names(value, unique=TRUE) 114 } 115 attr(x, "row.names") <- value 116 x 117} 118 119`row.names<-.data.frame` <- function(x, value) `.rowNamesDF<-`(x, value=value) 120 121##_H `row.names<-.default` <- function(x, ..., value) `rownames<-`(x, value) 122`row.names<-.default` <- function(x, value) `rownames<-`(x, value) 123 124is.na.data.frame <- function (x) 125{ 126 ## need to special-case no columns 127 y <- if (length(x)) { 128 do.call("cbind", lapply(x, "is.na")) # gives a matrix 129 } else matrix(FALSE, length(row.names(x)), 0) 130 if(.row_names_info(x) > 0L) rownames(y) <- row.names(x) 131 y 132} 133 134## Provide for efficiency reasons (PR#17600): 135anyNA.data.frame <- function(x, recursive = FALSE) 136 any(vapply(x, anyNA, NA, USE.NAMES = FALSE)) 137 138is.data.frame <- function(x) inherits(x, "data.frame") 139 140## as fast as possible; used also for subsetting 141I <- function(x) { class(x) <- unique.default(c("AsIs", oldClass(x))); x } 142 143print.AsIs <- function (x, ...) 144{ 145 cl <- oldClass(x) 146 oldClass(x) <- cl[cl != "AsIs"] 147 NextMethod("print") 148 invisible(x) 149} 150 151 152t.data.frame <- function(x) 153{ 154 x <- as.matrix(x) 155 NextMethod("t") 156} 157 158dim.data.frame <- function(x) c(.row_names_info(x, 2L), length(x)) 159 160dimnames.data.frame <- function(x) list(row.names(x), names(x)) 161 162`dimnames<-.data.frame` <- function(x, value) 163{ 164 d <- dim(x) 165 if(!is.list(value) || length(value) != 2L) 166 stop("invalid 'dimnames' given for data frame") 167 ## do the coercion first, as might change length 168 value[[1L]] <- as.character(value[[1L]]) 169 value[[2L]] <- as.character(value[[2L]]) 170 if(d[[1L]] != length(value[[1L]]) || d[[2L]] != length(value[[2L]])) 171 stop("invalid 'dimnames' given for data frame") 172 row.names(x) <- value[[1L]] # checks validity 173 names(x) <- value[[2L]] 174 x 175} 176 177as.data.frame <- function(x, row.names = NULL, optional = FALSE, ...) 178{ 179 if(is.null(x)) # can't assign class to NULL 180 return(as.data.frame(list())) 181 UseMethod("as.data.frame") 182} 183 184as.data.frame.default <- function(x, ...) 185 stop(gettextf("cannot coerce class %s to a data.frame", 186 sQuote(deparse(class(x))[1L])), 187 domain = NA) 188 189### Here are methods ensuring that the arguments to "data.frame" 190### are in a form suitable for combining into a data frame. 191 192as.data.frame.data.frame <- function(x, row.names = NULL, ...) 193{ 194 cl <- oldClass(x) 195 i <- match("data.frame", cl) 196 if(i > 1L) 197 class(x) <- cl[ - (1L:(i-1L))] 198 if(!is.null(row.names)){ 199 nr <- .row_names_info(x, 2L) 200 if(length(row.names) == nr) 201 attr(x, "row.names") <- row.names 202 else 203 stop(sprintf(ngettext(nr, 204 "invalid 'row.names', length %d for a data frame with %d row", 205 "invalid 'row.names', length %d for a data frame with %d rows"), 206 length(row.names), nr), domain = NA) 207 } 208 x 209} 210 211## prior to 1.8.0 this coerced names - PR#3280 212as.data.frame.list <- 213 function(x, row.names = NULL, optional = FALSE, ..., 214 cut.names = FALSE, col.names = names(x), fix.empty.names = TRUE, 215 check.names = !optional, 216 stringsAsFactors = FALSE) 217{ 218 ## need to protect names in x. 219 ## truncate any of more than 256 (or cut.names) bytes: 220 new.nms <- !missing(col.names) 221 if(cut.names) { 222 maxL <- if(is.logical(cut.names)) 256L else as.integer(cut.names) 223 if(any(long <- nchar(col.names, "bytes", keepNA = FALSE) > maxL)) 224 col.names[long] <- paste(substr(col.names[long], 1L, maxL - 6L), "...") 225 else cut.names <- FALSE 226 } 227 m <- match(names(formals(data.frame))[-1L], 228 ## c("row.names", "check.rows", ...., "stringsAsFactors"), 229 col.names, 0L) 230 if(any.m <- any(m)) col.names[m] <- paste0("..adfl.", col.names[m]) 231 if(new.nms || any.m || cut.names) names(x) <- col.names 232 ## data.frame() is picky with its 'row.names': 233 alis <- c(list(check.names = check.names, fix.empty.names = fix.empty.names, 234 stringsAsFactors = stringsAsFactors), 235 if(!missing(row.names)) list(row.names = row.names)) 236 x <- do.call(data.frame, c(x, alis)) 237 if(any.m) names(x) <- sub("^\\.\\.adfl\\.", "", names(x)) 238 x 239} 240 241as.data.frame.vector <- function(x, row.names = NULL, optional = FALSE, ..., 242 nm = deparse1(substitute(x))) 243{ 244 force(nm) 245 nrows <- length(x) 246 ## ## row.names -- for now warn about and "forget" illegal row.names 247 ## ## -- can simplify much (move this *after* the is.null(.) case) once we stop() ! 248### FIXME: allow integer [of full length] 249 if(!(is.null(row.names) || (is.character(row.names) && length(row.names) == nrows))) { 250 warning(gettextf( 251 "'row.names' is not a character vector of length %d -- omitting it. Will be an error!", 252 nrows), domain = NA) 253 row.names <- NULL 254 } 255 if(is.null(row.names)) { 256 if (nrows == 0L) 257 row.names <- character() 258 else if(length(row.names <- names(x)) != nrows || anyDuplicated(row.names)) 259 row.names <- .set_row_names(nrows) 260 } 261 ## else if(length(row.names) != nrows) # same behavior as the 'matrix' method 262 ## row.names <- .set_row_names(nrows) 263 if(!is.null(names(x))) names(x) <- NULL # remove names as from 2.0.0 264 value <- list(x) 265 if(!optional) names(value) <- nm 266 structure(value, row.names = row.names, class = "data.frame") 267} 268 269as.data.frame.ts <- function(x, ...) 270{ 271 if(is.matrix(x)) 272 as.data.frame.matrix(x, ...) 273 else 274 as.data.frame.vector(x, ...) 275} 276 277as.data.frame.raw <- as.data.frame.vector 278as.data.frame.factor <- as.data.frame.vector 279as.data.frame.ordered <- as.data.frame.vector 280as.data.frame.integer <- as.data.frame.vector 281as.data.frame.logical <- as.data.frame.vector 282as.data.frame.numeric <- as.data.frame.vector 283as.data.frame.complex <- as.data.frame.vector 284 285 286default.stringsAsFactors <- function() 287{ 288 val <- getOption("stringsAsFactors") 289 if(is.null(val)) val <- FALSE 290 if(!is.logical(val) || is.na(val) || length(val) != 1L) 291 stop('options("stringsAsFactors") not set to TRUE or FALSE') 292 val 293} 294 295## in case someone passes 'nm' 296as.data.frame.character <- 297 function(x, ..., stringsAsFactors = FALSE) 298{ 299 nm <- deparse1(substitute(x)) 300 if(stringsAsFactors) x <- factor(x) 301 if(!"nm" %in% ...names()) 302 as.data.frame.vector(x, ..., nm = nm) 303 else as.data.frame.vector(x, ...) 304} 305 306as.data.frame.matrix <- function(x, row.names = NULL, optional = FALSE, make.names = TRUE, ..., 307 stringsAsFactors = FALSE) 308{ 309 d <- dim(x) 310 nrows <- d[[1L]] 311 ncols <- d[[2L]] 312 ic <- seq_len(ncols) 313 dn <- dimnames(x) 314 ## surely it cannot be right to override the supplied row.names? 315 ## changed in 1.8.0 316 if(is.null(row.names)) row.names <- dn[[1L]] 317 collabs <- dn[[2L]] 318 ## These might be NA 319 if(any(empty <- !nzchar(collabs))) 320 collabs[empty] <- paste0("V", ic)[empty] 321 value <- vector("list", ncols) 322 if(mode(x) == "character" && stringsAsFactors) { 323 for(i in ic) 324 value[[i]] <- as.factor(x[,i]) 325 } else { 326 for(i in ic) 327 value[[i]] <- as.vector(x[,i]) 328 } 329 ## Explicitly check for NULL in case nrows==0 330 autoRN <- (is.null(row.names) || length(row.names) != nrows) 331 if(length(collabs) == ncols) 332 names(value) <- collabs 333 else if(!optional) 334 names(value) <- paste0("V", ic) 335 class(value) <- "data.frame" 336 if(autoRN) 337 attr(value, "row.names") <- .set_row_names(nrows) 338 else 339 .rowNamesDF(value, make.names=make.names) <- row.names 340 value 341} 342 343as.data.frame.model.matrix <- 344 function(x, row.names = NULL, optional = FALSE, make.names = TRUE, ...) 345{ 346 d <- dim(x) 347 nrows <- d[[1L]] 348 dn <- dimnames(x) 349 row.names <- dn[[1L]] 350 value <- list(x) 351 if(!optional) names(value) <- deparse(substitute(x))[[1L]] 352 # FIXME? better: , nlines=1L or deparse1(.) 353 class(value) <- "data.frame" 354 if(!is.null(row.names)) { 355 row.names <- as.character(row.names) 356 if(length(row.names) != nrows) 357 stop(sprintf(ngettext(length(row.names), 358 "supplied %d row name for %d rows", 359 "supplied %d row names for %d rows"), 360 length(row.names), nrows), domain = NA) 361 .rowNamesDF(value, make.names=make.names) <- row.names 362 } 363 else attr(value, "row.names") <- .set_row_names(nrows) 364 value 365} 366 367as.data.frame.array <- function(x, row.names = NULL, optional = FALSE, ...) 368{ 369 d <- dim(x) 370 if(length(d) == 1L) { ## same as as.data.frame.vector, but deparsed here 371 ## c(): better than drop() or as.vector() ! 372 value <- as.data.frame.vector( c(x), row.names, optional, ...) 373 if(!optional) names(value) <- deparse(substitute(x))[[1L]] 374 # FIXME? better: , nlines=1L or deparse1(.) 375 value 376 } else if (length(d) == 2L) { 377 ## for explicit "array" class; otherwise *.matrix() is dispatched 378 as.data.frame.matrix(x, row.names, optional, ...) 379 } else { 380 dn <- dimnames(x) 381 dim(x) <- c(d[1L], prod(d[-1L])) 382 if(!is.null(dn)) { 383 if(length(dn[[1L]])) rownames(x) <- dn[[1L]] 384 for(i in 2L:length(d)) 385 if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) 386 colnames(x) <- interaction(expand.grid(dn[-1L])) 387 } 388 as.data.frame.matrix(x, row.names, optional, ...) 389 } 390} 391 392## Allow extraction method to have changed the underlying class, 393## so re-assign the class based on the result. 394`[.AsIs` <- function(x, i, ...) I(NextMethod("[")) 395 396 397## NB: this is called relatively often from data.frame() itself, ... 398as.data.frame.AsIs <- function(x, row.names = NULL, optional = FALSE, ...) 399{ 400 if(length(dim(x)) == 2L) 401 as.data.frame.model.matrix(x, row.names, optional) 402 else { # as.data.frame.vector without removing names 403 nrows <- length(x) 404 nm <- deparse1(substitute(x)) 405 if(is.null(row.names)) { 406 autoRN <- FALSE 407 if (nrows == 0L) 408 row.names <- character() 409 else if(length(row.names <- names(x)) == nrows && 410 !anyDuplicated(row.names)) { 411 } 412 else { 413 autoRN <- TRUE 414 row.names <- .set_row_names(nrows) 415 } 416 } else 417 autoRN <- is.integer(row.names) && length(row.names) == 2L && 418 is.na(rn1 <- row.names[[1L]]) && rn1 < 0 419 value <- list(x) 420 if(!optional) names(value) <- nm 421 class(value) <- "data.frame" 422 ## FIXME -- Need to comment the 'row.names(.) <-' case 423 ## if(autoRN) 424 attr(value, "row.names") <- row.names 425 ## else 426 ## row.names(value) <- row.names 427 value 428 } 429 430} 431 432### This is the real "data.frame". 433### It does everything by calling the methods presented above. 434 435data.frame <- 436 function(..., row.names = NULL, check.rows = FALSE, check.names = TRUE, 437 fix.empty.names = TRUE, 438 stringsAsFactors = FALSE) 439{ 440 data.row.names <- 441 if(check.rows && is.null(row.names)) 442 function(current, new, i) { 443 if(is.character(current)) new <- as.character(new) 444 if(is.character(new)) current <- as.character(current) 445 if(anyDuplicated(new)) 446 return(current) 447 if(is.null(current)) 448 return(new) 449 if(all(current == new) || all(current == "")) 450 return(new) 451 stop(gettextf( 452 "mismatch of row names in arguments of 'data.frame\', item %d", i), 453 domain = NA) 454 } 455 else function(current, new, i) { 456 if(is.null(current)) { 457 if(anyDuplicated(new)) { 458 warning(gettextf( 459 "some row.names duplicated: %s --> row.names NOT used", 460 paste(which(duplicated(new)), collapse=",")), 461 domain = NA) 462 current 463 } else new 464 } else current 465 } 466 object <- as.list(substitute(list(...)))[-1L] 467 mirn <- missing(row.names) # record before possibly changing 468 mrn <- is.null(row.names) # missing or NULL 469 x <- list(...) 470 n <- length(x) 471 if(n < 1L) { 472 if(!mrn) { 473 if(is.object(row.names) || !is.integer(row.names)) 474 row.names <- as.character(row.names) 475 if(anyNA(row.names)) 476 stop("row names contain missing values") 477 if(anyDuplicated(row.names)) 478 stop(gettextf("duplicate row.names: %s", 479 paste(unique(row.names[duplicated(row.names)]), 480 collapse = ", ")), 481 domain = NA) 482 } else row.names <- integer() 483 return(structure(list(), names = character(), 484 row.names = row.names, 485 class = "data.frame")) 486 } 487 vnames <- names(x) 488 if(length(vnames) != n) 489 vnames <- character(n) 490 no.vn <- !nzchar(vnames) 491 vlist <- vnames <- as.list(vnames) 492 nrows <- ncols <- integer(n) 493 for(i in seq_len(n)) { 494 ## do it this way until all as.data.frame methods have been updated 495 xi <- if(is.character(x[[i]]) || is.list(x[[i]])) 496 as.data.frame(x[[i]], optional = TRUE, 497 stringsAsFactors = stringsAsFactors) 498 else as.data.frame(x[[i]], optional = TRUE) 499 500 nrows[i] <- .row_names_info(xi) # signed for now 501 ncols[i] <- length(xi) 502 namesi <- names(xi) 503 if(ncols[i] > 1L) { 504 if(length(namesi) == 0L) namesi <- seq_len(ncols[i]) 505 vnames[[i]] <- if(no.vn[i]) namesi 506 else paste(vnames[[i]], namesi, sep=".") 507 } else if(length(namesi)) { 508 vnames[[i]] <- namesi 509 } else if (fix.empty.names && no.vn[[i]]) { 510 tmpname <- deparse(object[[i]], nlines = 1L)[1L] 511 if(startsWith(tmpname, "I(") && endsWith(tmpname, ")")) { 512 ## from 'I(*)', only keep '*': 513 ntmpn <- nchar(tmpname, "c") 514 tmpname <- substr(tmpname, 3L, ntmpn - 1L) 515 } 516 vnames[[i]] <- tmpname 517 } ## else vnames[[i]] are not changed 518 if(mirn && nrows[i] > 0L) { 519 rowsi <- attr(xi, "row.names") 520 ## Avoid all-blank names 521 if(any(nzchar(rowsi))) 522 row.names <- data.row.names(row.names, rowsi, i) 523 } 524 nrows[i] <- abs(nrows[i]) 525 vlist[[i]] <- xi 526 } 527 nr <- max(nrows) 528 for(i in seq_len(n)[nrows < nr]) { 529 xi <- vlist[[i]] 530 if(nrows[i] > 0L && (nr %% nrows[i] == 0L)) { 531 ## make some attempt to recycle column i 532 xi <- unclass(xi) # avoid data-frame methods 533 fixed <- TRUE 534 for(j in seq_along(xi)) { 535 xi1 <- xi[[j]] 536 if(is.vector(xi1) || is.factor(xi1)) 537 xi[[j]] <- rep(xi1, length.out = nr) 538 else if(is.character(xi1) && inherits(xi1, "AsIs")) 539 xi[[j]] <- structure(rep(xi1, length.out = nr), 540 class = class(xi1)) 541 else if(inherits(xi1, "Date") || inherits(xi1, "POSIXct")) 542 xi[[j]] <- rep(xi1, length.out = nr) 543 else { 544 fixed <- FALSE 545 break 546 } 547 } 548 if (fixed) { 549 vlist[[i]] <- xi 550 next 551 } 552 } 553 stop(gettextf("arguments imply differing number of rows: %s", 554 paste(unique(nrows), collapse = ", ")), 555 domain = NA) 556 } 557 value <- unlist(vlist, recursive=FALSE, use.names=FALSE) 558 ## unlist() drops i-th component if it has 0 columns 559 vnames <- as.character(unlist(vnames[ncols > 0L])) 560 if(fix.empty.names && any(noname <- !nzchar(vnames))) 561 vnames[noname] <- paste0("Var.", seq_along(vnames))[noname] 562 if(check.names) { 563 if(fix.empty.names) 564 vnames <- make.names(vnames, unique=TRUE) 565 else { ## do not fix "" 566 nz <- nzchar(vnames) 567 vnames[nz] <- make.names(vnames[nz], unique=TRUE) 568 } 569 } 570 names(value) <- vnames 571 if(!mrn) { # non-null row.names arg was supplied 572 if(length(row.names) == 1L && nr != 1L) { # one of the variables 573 if(is.character(row.names)) 574 row.names <- match(row.names, vnames, 0L) 575 if(length(row.names) != 1L || 576 row.names < 1L || row.names > length(vnames)) 577 stop("'row.names' should specify one of the variables") 578 i <- row.names 579 row.names <- value[[i]] 580 value <- value[ - i] 581 } else if ( !is.null(row.names) && length(row.names) != nr ) 582 stop("row names supplied are of the wrong length") 583 } else if( !is.null(row.names) && length(row.names) != nr ) { 584 warning("row names were found from a short variable and have been discarded") 585 row.names <- NULL 586 } 587 class(value) <- "data.frame" 588 if(is.null(row.names)) 589 attr(value, "row.names") <- .set_row_names(nr) #seq_len(nr) 590 else { 591 if(is.object(row.names) || !is.integer(row.names)) 592 row.names <- as.character(row.names) 593 if(anyNA(row.names)) 594 stop("row names contain missing values") 595 if(anyDuplicated(row.names)) 596 stop(gettextf("duplicate row.names: %s", 597 paste(unique(row.names[duplicated(row.names)]), 598 collapse = ", ")), 599 domain = NA) 600 row.names(value) <- row.names 601 } 602 value 603} 604 605 606### Subsetting and mutation methods 607### These are a little less general than S 608 609`[.data.frame` <- 610 function(x, i, j, drop = if(missing(i)) TRUE else length(cols) == 1) 611{ 612 mdrop <- missing(drop) 613 Narg <- nargs() - !mdrop # number of arg from x,i,j that were specified 614 has.j <- !missing(j) 615 if(!all(names(sys.call()) %in% c("", "drop")) 616 && !isS4(x)) # at least don't warn for callNextMethod! 617 warning("named arguments other than 'drop' are discouraged") 618 619 if(Narg < 3L) { # list-like indexing or matrix indexing 620 if(!mdrop) warning("'drop' argument will be ignored") 621 if(missing(i)) return(x) 622 if(is.matrix(i)) 623 return(as.matrix(x)[i]) # desperate measures 624 ## zero-column data frames prior to 2.4.0 had no names. 625 nm <- names(x); if(is.null(nm)) nm <- character() 626 ## if we have NA names, character indexing should always fail 627 ## (for positive index length) 628 if(!is.character(i) && anyNA(nm)) { # less efficient version 629 names(nm) <- names(x) <- seq_along(x) 630 y <- NextMethod("[") 631 cols <- names(y) 632 if(anyNA(cols)) stop("undefined columns selected") 633 cols <- names(y) <- nm[cols] 634 } else { 635 y <- NextMethod("[") 636 cols <- names(y) 637 if(!is.null(cols) && anyNA(cols)) 638 stop("undefined columns selected") 639 } 640 ## added in 1.8.0 641 if(anyDuplicated(cols)) names(y) <- make.unique(cols) 642 ## since we have not touched the rows, copy over the raw row.names 643 ## Claimed at one time at least one fewer copies: PR#15274 644 attr(y, "row.names") <- .row_names_info(x, 0L) 645 attr(y, "class") <- oldClass(x) 646 return(y) 647 } 648 649 if(missing(i)) { # df[, j] or df[ , ] 650 ## not quite the same as the 1/2-arg case, as 'drop' is used. 651 if(drop && !has.j && length(x) == 1L) return(.subset2(x, 1L)) 652 nm <- names(x); if(is.null(nm)) nm <- character() 653 if(has.j && !is.character(j) && anyNA(nm)) { 654 ## less efficient version 655 names(nm) <- names(x) <- seq_along(x) 656 y <- .subset(x, j) 657 cols <- names(y) 658 if(anyNA(cols)) stop("undefined columns selected") 659 cols <- names(y) <- nm[cols] 660 } else { 661 y <- if(has.j) .subset(x, j) else x 662 cols <- names(y) 663 if(anyNA(cols)) stop("undefined columns selected") 664 } 665 if(drop && length(y) == 1L) return(.subset2(y, 1L)) 666 if(anyDuplicated(cols)) names(y) <- make.unique(cols) 667 nrow <- .row_names_info(x, 2L) 668 if(drop && !mdrop && nrow == 1L) 669 return(structure(y, class = NULL, row.names = NULL)) 670 else { 671 ## Claimed at one time at least one fewer copies: PR#15274 672 attr(y, "class") <- oldClass(x) 673 attr(y, "row.names") <- .row_names_info(x, 0L) 674 return(y) 675 } 676 } 677 678 ### df[i, j] or df[i , ] 679 ## rewritten for R 2.5.0 to avoid duplicating x. 680 xx <- x 681 cols <- names(xx) # needed for computation of 'drop' arg 682 ## make a shallow copy 683 x <- vector("list", length(x)) 684 ## attributes(x) <- attributes(xx) expands row names 685 x <- .Internal(copyDFattr(xx, x)) 686 oldClass(x) <- attr(x, "row.names") <- NULL 687 688 if(has.j) { # df[i, j] 689 nm <- names(x); if(is.null(nm)) nm <- character() 690 if(!is.character(j) && anyNA(nm)) 691 names(nm) <- names(x) <- seq_along(x) 692 x <- x[j] 693 cols <- names(x) # needed for 'drop' 694 if(drop && length(x) == 1L) { 695 ## for consistency with [, <length-1>] 696 if(is.character(i)) { 697 rows <- attr(xx, "row.names") 698 i <- pmatch(i, rows, duplicates.ok = TRUE) 699 } 700 ## need to figure which col was selected: 701 ## cannot use .subset2 directly as that may 702 ## use recursive selection for a logical index. 703 xj <- .subset2(.subset(xx, j), 1L) 704 return(if(length(dim(xj)) != 2L) xj[i] else xj[i, , drop = FALSE]) 705 } 706 if(anyNA(cols)) stop("undefined columns selected") 707 ## fix up names if we altered them. 708 if(!is.null(names(nm))) cols <- names(x) <- nm[cols] 709 ## sxx <- match(cols, names(xx)) fails with duplicate names 710 nxx <- structure(seq_along(xx), names=names(xx)) 711 sxx <- match(nxx[j], seq_along(xx)) 712 } else sxx <- seq_along(x) 713 714 rows <- NULL # placeholder: only create row names when needed 715 # as this can be expensive. 716 if(is.character(i)) { 717 rows <- attr(xx, "row.names") 718 i <- pmatch(i, rows, duplicates.ok = TRUE) 719 } 720 for(j in seq_along(x)) { 721 xj <- xx[[ sxx[j] ]] 722 ## had drop = drop prior to 1.8.0 723 x[[j]] <- if(length(dim(xj)) != 2L) xj[i] else xj[i, , drop = FALSE] 724 } 725 726 if(drop) { 727 n <- length(x) 728 if(n == 1L) return(x[[1L]]) # drops attributes 729 if(n > 1L) { 730 xj <- x[[1L]] 731 nrow <- if(length(dim(xj)) == 2L) dim(xj)[1L] else length(xj) 732 ## for consistency with S: don't drop (to a list) 733 ## if only one row, unless explicitly asked for 734 drop <- !mdrop && nrow == 1L 735 } else drop <- FALSE ## for n == 0 736 } 737 738 if(!drop) { # not else as previous section might reset drop 739 ## row names might have NAs. 740 if(is.null(rows)) rows <- attr(xx, "row.names") 741 rows <- rows[i] 742 if((ina <- anyNA(rows)) | (dup <- anyDuplicated(rows))) { 743 ## both will coerce integer 'rows' to character: 744 if (!dup && is.character(rows)) dup <- "NA" %in% rows 745 if(ina) 746 rows[is.na(rows)] <- "NA" 747 if(dup) 748 rows <- make.unique(as.character(rows)) 749 } 750 ## new in 1.8.0 -- might have duplicate columns 751 if(has.j && anyDuplicated(nm <- names(x))) 752 names(x) <- make.unique(nm) 753 if(is.null(rows)) rows <- attr(xx, "row.names")[i] 754 attr(x, "row.names") <- rows 755 oldClass(x) <- oldClass(xx) 756 } 757 x 758} 759 760`[[.data.frame` <- function(x, ..., exact=TRUE) 761{ 762 ## use in-line functions to refer to the 1st and 2nd ... arguments 763 ## explicitly. Also will check for wrong number or empty args 764 na <- nargs() - !missing(exact) 765 if(!all(names(sys.call()) %in% c("", "exact"))) 766 warning("named arguments other than 'exact' are discouraged") 767 768 if(na < 3L) 769 (function(x, i, exact) 770 if(is.matrix(i)) as.matrix(x)[[i]] 771 else .subset2(x, i, exact=exact))(x, ..., exact=exact) 772 else { 773 col <- .subset2(x, ..2, exact=exact) 774 i <- if(is.character(..1)) 775 pmatch(..1, row.names(x), duplicates.ok = TRUE) 776 else ..1 777 ## we do want to dispatch on methods for a column. 778 ## .subset2(col, i, exact=exact) 779 col[[i, exact = exact]] 780 } 781} 782 783`[<-.data.frame` <- function(x, i, j, value) 784{ 785 if(!all(names(sys.call()) %in% c("", "value"))) 786 warning("named arguments are discouraged") 787 788 nA <- nargs() # 'value' is never missing, so 3 or 4. 789 if(nA == 4L) { ## df[,] or df[i,] or df[, j] or df[i,j] 790 has.i <- !missing(i) 791 has.j <- !missing(j) 792 } 793 else if(nA == 3L) { 794 ## this collects both df[] and df[ind] 795 if (is.atomic(value) && !is.null(names(value))) 796 names(value) <- NULL 797 if(missing(i) && missing(j)) { # case df[] 798 i <- j <- NULL 799 has.i <- has.j <- FALSE 800 ## added in 1.8.0 801 if(is.null(value)) return(x[logical()]) 802 } else { # case df[ind] 803 ## really ambiguous, but follow common use as if list 804 ## except for two column numeric matrix or full-sized logical matrix 805 if(is.numeric(i) && is.matrix(i) && ncol(i) == 2) { 806 # Rewrite i as a logical index 807 index <- rep.int(FALSE, prod(dim(x))) 808 dim(index) <- dim(x) 809 tryCatch(index[i] <- TRUE, 810 error = function(e) stop(conditionMessage(e), call.=FALSE)) 811 # Put values in the right order 812 o <- order(i[,2], i[,1]) 813 N <- length(value) 814 if (length(o) %% N != 0L) 815 warning("number of items to replace is not a multiple of replacement length") 816 if (N < length(o)) 817 value <- rep(value, length.out=length(o)) 818 value <- value[o] 819 i <- index 820 } 821 if(is.logical(i) && is.matrix(i) && all(dim(i) == dim(x))) { 822 nreplace <- sum(i, na.rm=TRUE) 823 if(!nreplace) return(x) # nothing to replace 824 ## allow replication of length(value) > 1 in 1.8.0 825 N <- length(value) 826 if(N > 1L && N < nreplace && (nreplace %% N) == 0L) 827 value <- rep(value, length.out = nreplace) 828 if(N > 1L && (length(value) != nreplace)) 829 stop("'value' is the wrong length") 830 n <- 0L 831 nv <- nrow(x) 832 for(v in seq_len(dim(i)[2L])) { 833 thisvar <- i[, v, drop = TRUE] 834 nv <- sum(thisvar, na.rm = TRUE) 835 if(nv) { 836 if(is.matrix(x[[v]])) 837 x[[v]][thisvar, ] <- if(N > 1L) value[n+seq_len(nv)] else value 838 else 839 x[[v]][thisvar] <- if(N > 1L) value[n+seq_len(nv)] else value 840 } 841 n <- n+nv 842 } 843 return(x) 844 } # end of logical matrix 845 if(is.matrix(i)) 846 stop("unsupported matrix index in replacement") 847 j <- i 848 i <- NULL 849 has.i <- FALSE 850 has.j <- TRUE 851 } 852 } 853 else # nargs() <= 2 854 stop("need 0, 1, or 2 subscripts") 855 856 if ((has.j && !length(j)) || # "no", i.e. empty columns specified 857 (has.i && !length(i) && !has.j))# empty rows and no col. specified 858 return(x) 859 860 cl <- oldClass(x) 861 ## delete class: S3 idiom to avoid any special methods for [[, etc 862 class(x) <- NULL 863 new.cols <- NULL 864 nvars <- length(x) 865 nrows <- .row_names_info(x, 2L) 866 if(has.i && length(i)) { # df[i, ] or df[i, j] 867 rows <- NULL # indicator that it is not yet set 868 if(anyNA(i)) 869 stop("missing values are not allowed in subscripted assignments of data frames") 870 if(char.i <- is.character(i)) { 871 rows <- attr(x, "row.names") 872 ii <- match(i, rows) 873 nextra <- sum(new.rows <- is.na(ii)) 874 if(nextra > 0L) { 875 ii[new.rows] <- seq.int(from = nrows + 1L, length.out = nextra) 876 new.rows <- i[new.rows] 877 } 878 i <- ii 879 } 880 if(!is.logical(i) && 881 (char.i && nextra || all(i >= 0L) && (nn <- max(i)) > nrows)) { 882 ## expand 883 if(is.null(rows)) rows <- attr(x, "row.names") 884 if(!char.i) { 885 nrr <- (nrows + 1L):nn 886 if(inherits(value, "data.frame") && 887 (dim(value)[1L]) >= length(nrr)) { 888 new.rows <- attr(value, "row.names")[seq_along(nrr)] 889 repl <- duplicated(new.rows) | match(new.rows, rows, 0L) 890 if(any(repl)) new.rows[repl] <- nrr[repl] 891 } 892 else new.rows <- nrr 893 } 894 x <- xpdrows.data.frame(x, rows, new.rows) 895 rows <- attr(x, "row.names") 896 nrows <- length(rows) 897 } 898 iseq <- seq_len(nrows)[i] 899 if(anyNA(iseq)) stop("non-existent rows not allowed") 900 } 901 else iseq <- NULL 902 903 if(has.j) { 904 if(anyNA(j)) 905 stop("missing values are not allowed in subscripted assignments of data frames") 906 if(is.character(j)) { 907 if("" %in% j) stop("column name \"\" cannot match any column") 908 jseq <- match(j, names(x)) 909 if(anyNA(jseq)) { 910 n <- is.na(jseq) 911 jseq[n] <- nvars + seq_len(sum(n)) 912 new.cols <- j[n] 913 } 914 } 915 else if(is.logical(j) || min(j) < 0L) 916 jseq <- seq_along(x)[j] 917 else { 918 jseq <- j 919 if(max(jseq) > nvars) { 920 new.cols <- paste0("V", 921 seq.int(from = nvars + 1L, to = max(jseq))) 922 if(length(new.cols) != sum(jseq > nvars)) 923 stop("new columns would leave holes after existing columns") 924 ## try to use the names of a list `value' 925 if(is.list(value) && !is.null(vnm <- names(value))) { 926 p <- length(jseq) 927 if(length(vnm) < p) vnm <- rep_len(vnm, p) 928 new.cols <- vnm[jseq > nvars] 929 } 930 } 931 } 932 } 933 else jseq <- seq_along(x) 934 935 ## empty rows and not (a *new* column as in d[FALSE, "new"] <- val ) : 936 if(has.i && !length(iseq) && all(1L <= jseq & jseq <= nvars)) 937 return(`class<-`(x, cl)) 938 939 ## addition in 1.8.0 940 if(anyDuplicated(jseq)) 941 stop("duplicate subscripts for columns") 942 n <- length(iseq) 943 if(n == 0L) n <- nrows 944 p <- length(jseq) 945 if (is.null(value)) { 946 value <- list(NULL) 947 } 948 m <- length(value) 949 if(!is.list(value)) { 950 if(p == 1L) { 951 N <- NROW(value) 952 if(N > n) 953 stop(sprintf(ngettext(N, 954 "replacement has %d row, data has %d", 955 "replacement has %d rows, data has %d"), 956 N, n), domain = NA) 957 if(N < n && N > 0L) 958 if(n %% N == 0L && length(dim(value)) <= 1L) 959 value <- rep(value, length.out = n) 960 else 961 stop(sprintf(ngettext(N, 962 "replacement has %d row, data has %d", 963 "replacement has %d rows, data has %d"), 964 N, nrows), domain = NA) 965 if (!is.null(names(value))) names(value) <- NULL 966 value <- list(value) 967 } else { 968 if(m < n*p && (m == 0L || (n*p) %% m)) 969 stop(sprintf(ngettext(m, 970 "replacement has %d item, need %d", 971 "replacement has %d items, need %d"), 972 m, n*p), domain = NA) 973 value <- matrix(value, n, p) ## will recycle 974 ## <FIXME split.matrix> 975 value <- split(c(value), col(value)) 976 } 977 dimv <- c(n, p) 978 } else { # a list 979 ## careful, as.data.frame turns things into factors. 980 ## value <- as.data.frame(value) 981 value <- unclass(value) # to avoid data frame indexing 982 lens <- vapply(value, NROW, 1L) 983 for(k in seq_along(lens)) { 984 N <- lens[k] 985 if(n != N && length(dim(value[[k]])) == 2L) 986 stop(sprintf(ngettext(N, 987 "replacement element %d is a matrix/data frame of %d row, need %d", 988 "replacement element %d is a matrix/data frame of %d rows, need %d"), 989 k, N, n), 990 domain = NA) 991 if(N > 0L && N < n && n %% N) 992 stop(sprintf(ngettext(N, 993 "replacement element %d has %d row, need %d", 994 "replacement element %d has %d rows, need %d"), 995 k, N, n), domain = NA) 996 ## these fixing-ups will not work for matrices 997 if(N > 0L && N < n) value[[k]] <- rep(value[[k]], length.out = n) 998 if(N > n) { 999 warning(sprintf(ngettext(N, 1000 "replacement element %d has %d row to replace %d rows", 1001 "replacement element %d has %d rows to replace %d rows"), 1002 k, N, n), domain = NA) 1003 value[[k]] <- value[[k]][seq_len(n)] 1004 } 1005 } 1006 dimv <- c(n, length(value)) 1007 } 1008 nrowv <- dimv[1L] 1009 if(nrowv < n && nrowv > 0L) { 1010 if(n %% nrowv == 0L) 1011 value <- value[rep_len(seq_len(nrowv), n),,drop = FALSE] 1012 else 1013 stop(sprintf(ngettext(nrowv, 1014 "%d row in value to replace %d rows", 1015 "%d rows in value to replace %d rows"), 1016 nrowv, n), domain = NA) 1017 } 1018 else if(nrowv > n) 1019 warning(sprintf(ngettext(nrowv, 1020 "replacement data has %d row to replace %d rows", 1021 "replacement data has %d rows to replace %d rows"), 1022 nrowv, n), domain = NA) 1023 ncolv <- dimv[2L] 1024 jvseq <- seq_len(p) 1025 if(ncolv < p) jvseq <- rep_len(seq_len(ncolv), p) 1026 else if(p != 0L && ncolv > p) { 1027 warning(sprintf(ngettext(ncolv, 1028 "provided %d variable to replace %d variables", 1029 "provided %d variables to replace %d variables"), 1030 ncolv, p), domain = NA) 1031 new.cols <- new.cols[seq_len(p)] 1032 } 1033 if(length(new.cols)) { 1034 ## extend and name now, as assignment of NULL may delete cols later. 1035 nm <- names(x) 1036 rows <- .row_names_info(x, 0L) 1037 a <- attributes(x); a["names"] <- NULL 1038 x <- c(x, vector("list", length(new.cols))) 1039 attributes(x) <- a 1040 names(x) <- c(nm, new.cols) 1041 attr(x, "row.names") <- rows 1042 } 1043 if(has.i) 1044 for(jjj in seq_len(p)) { 1045 jj <- jseq[jjj] 1046 vjj <- value[[ jvseq[[jjj]] ]] 1047 if(jj <= nvars) { 1048 ## if a column exists, preserve its attributes 1049 if(length(dim(x[[jj]])) != 2L) 1050 x[[jj]][iseq ] <- vjj 1051 else x[[jj]][iseq, ] <- vjj 1052 } else { 1053 ## try to make a new column match in length: may be an error 1054 x[[jj]] <- vjj[FALSE] 1055 if(length(dim(vjj)) == 2L) { 1056 length(x[[jj]]) <- nrows * ncol(vjj) 1057 dim(x[[jj]]) <- c(nrows, ncol(vjj)) 1058 x[[jj]][iseq, ] <- vjj 1059 } else { 1060 length(x[[jj]]) <- nrows 1061 x[[jj]][iseq] <- vjj 1062 } 1063 } 1064 } 1065 else if(p > 0L) 1066 for(jjj in p:1L) { # we might delete columns with NULL 1067 ## ... and for that reason, we'd better ensure that jseq is increasing! 1068 o <- order(jseq) 1069 jseq <- jseq[o] 1070 jvseq <- jvseq[o] 1071 1072 jj <- jseq[jjj] 1073 v <- value[[ jvseq[[jjj]] ]] 1074 ## This is consistent with the have.i case rather than with 1075 ## [[<- and $<- (which throw an error). But both are plausible. 1076 if (!is.null(v) && nrows > 0L && !length(v)) length(v) <- nrows 1077 x[[jj]] <- v 1078 if (!is.null(v) && is.atomic(x[[jj]]) && !is.null(names(x[[jj]]))) 1079 names(x[[jj]]) <- NULL 1080 } 1081 if(length(new.cols) > 0L) { 1082 new.cols <- names(x) # we might delete columns with NULL 1083 ## added in 1.8.0 1084 if(anyDuplicated(new.cols)) names(x) <- make.unique(new.cols) 1085 } 1086 class(x) <- cl 1087 x 1088} 1089 1090`[[<-.data.frame` <- function(x, i, j, value) 1091{ 1092 if(!all(names(sys.call()) %in% c("", "value"))) 1093 warning("named arguments are discouraged") 1094 1095 cl <- oldClass(x) 1096 ## delete class: Version 3 idiom 1097 ## to avoid any special methods for [[<- 1098 class(x) <- NULL 1099 nrows <- .row_names_info(x, 2L) 1100 if(is.atomic(value) && !is.null(names(value))) names(value) <- NULL 1101 if(nargs() < 4L) { 1102 ## really ambiguous, but follow common use as if list 1103 nc <- length(x) 1104 if(!is.null(value)) { 1105 N <- NROW(value) 1106 if(N > nrows) 1107 stop(sprintf(ngettext(N, 1108 "replacement has %d row, data has %d", 1109 "replacement has %d rows, data has %d"), 1110 N, nrows), domain = NA) 1111 if(N < nrows) 1112 if(N > 0L && (nrows %% N == 0L) && length(dim(value)) <= 1L) 1113 value <- rep(value, length.out = nrows) 1114 else 1115 stop(sprintf(ngettext(N, 1116 "replacement has %d row, data has %d", 1117 "replacement has %d rows, data has %d"), 1118 N, nrows), domain = NA) 1119 } 1120 x[[i]] <- value 1121 ## added in 1.8.0 -- make sure there is a name 1122 if(length(x) > nc) { 1123 nc <- length(x) 1124 if(names(x)[nc] == "") names(x)[nc] <- paste0("V", nc) 1125 names(x) <- make.unique(names(x)) 1126 } 1127 class(x) <- cl 1128 return(x) 1129 } 1130 if(missing(i) || missing(j)) 1131 stop("only valid calls are x[[j]] <- value or x[[i,j]] <- value") 1132 rows <- attr(x, "row.names") 1133 nvars <- length(x) 1134 if(n <- is.character(i)) { 1135 ii <- match(i, rows) 1136 n <- sum(new.rows <- is.na(ii)) 1137 if(n > 0L) { 1138 ii[new.rows] <- seq.int(from = nrows + 1L, length.out = n) 1139 new.rows <- i[new.rows] 1140 } 1141 i <- ii 1142 } 1143 if(all(i >= 0L) && (nn <- max(i)) > nrows) { 1144 ## expand 1145 if(n == 0L) { 1146 nrr <- (nrows + 1L):nn 1147 if(inherits(value, "data.frame") && 1148 (dim(value)[1L]) >= length(nrr)) { 1149 new.rows <- attr(value, "row.names")[seq_len(nrr)] 1150 repl <- duplicated(new.rows) | match(new.rows, rows, 0L) 1151 if(any(repl)) new.rows[repl] <- nrr[repl] 1152 } 1153 else new.rows <- nrr 1154 } 1155 x <- xpdrows.data.frame(x, rows, new.rows) 1156 rows <- attr(x, "row.names") 1157 nrows <- length(rows) 1158 } 1159 1160 ## FIXME: this is wasteful and probably unnecessary 1161 iseq <- seq_len(nrows)[i] 1162 if(anyNA(iseq)) 1163 stop("non-existent rows not allowed") 1164 1165 if(is.character(j)) { 1166 if("" %in% j) stop("column name \"\" cannot match any column") 1167 jseq <- match(j, names(x)) 1168 if(anyNA(jseq)) 1169 stop(gettextf("replacing element in non-existent column: %s", 1170 j[is.na(jseq)]), domain = NA) 1171 } 1172 else if(is.logical(j) || min(j) < 0L) 1173 jseq <- seq_along(x)[j] 1174 else { 1175 jseq <- j 1176 if(max(jseq) > nvars) 1177 stop(gettextf("replacing element in non-existent column: %s", 1178 jseq[jseq > nvars]), domain = NA) 1179 } 1180 if(length(iseq) > 1L || length(jseq) > 1L) 1181 stop("only a single element should be replaced") 1182 x[[jseq]][[iseq]] <- value 1183 class(x) <- cl 1184 x 1185} 1186 1187## added in 1.8.0 1188`$<-.data.frame` <- function(x, name, value) 1189{ 1190 cl <- oldClass(x) 1191 ## delete class: Version 3 idiom 1192 ## to avoid any special methods for [[<- 1193 ## This forces a copy, but we are going to need one anyway 1194 ## and NAMED=1 prevents any further copying. 1195 class(x) <- NULL 1196 nrows <- .row_names_info(x, 2L) 1197 if(!is.null(value)) { 1198 N <- NROW(value) 1199 if(N > nrows) 1200 stop(sprintf(ngettext(N, 1201 "replacement has %d row, data has %d", 1202 "replacement has %d rows, data has %d"), 1203 N, nrows), domain = NA) 1204 if (N < nrows) 1205 if (N > 0L && (nrows %% N == 0L) && length(dim(value)) <= 1L) 1206 value <- rep(value, length.out = nrows) 1207 else 1208 stop(sprintf(ngettext(N, 1209 "replacement has %d row, data has %d", 1210 "replacement has %d rows, data has %d"), 1211 N, nrows), domain = NA) 1212 if(is.atomic(value) && !is.null(names(value))) names(value) <- NULL 1213 } 1214 x[[name]] <- value 1215 class(x) <- cl 1216 return(x) 1217} 1218 1219 1220xpdrows.data.frame <- function(x, old.rows, new.rows) 1221{ 1222 nc <- length(x) 1223 nro <- length(old.rows) 1224 nrn <- length(new.rows) 1225 nr <- nro + nrn 1226 for (i in seq_len(nc)) { 1227 y <- x[[i]] 1228 dy <- dim(y) 1229 cy <- oldClass(y) 1230 class(y) <- NULL 1231 if (length(dy) == 2L) { 1232 dny <- dimnames(y) 1233 if (length(dny[[1L]]) > 0L) 1234 dny[[1L]] <- c(dny[[1L]], new.rows) 1235 z <- array(y[1L], dim = c(nr, nc), dimnames = dny) 1236 z[seq_len(nro), ] <- y 1237 class(z) <- cy 1238 x[[i]] <- z 1239 } 1240 else { 1241 ay <- attributes(y) 1242 if (length(names(y)) > 0L) 1243 ay$names <- c(ay$names, new.rows) 1244 length(y) <- nr 1245 attributes(y) <- ay 1246 class(y) <- cy 1247 x[[i]] <- y 1248 } 1249 } 1250 nm <- c(old.rows, new.rows) 1251 if (any(duplicated(nm))) nm <- make.unique(as.character(nm)) 1252 attr(x, "row.names") <- nm 1253 x 1254} 1255 1256 1257### Here are the methods for rbind and cbind. 1258 1259cbind.data.frame <- function(..., deparse.level = 1) 1260 data.frame(..., check.names = FALSE) 1261 1262rbind.data.frame <- function(..., deparse.level = 1, make.row.names = TRUE, 1263 stringsAsFactors = FALSE, 1264 factor.exclude = TRUE) 1265{ 1266 match.names <- function(clabs, nmi) 1267 { 1268 if(identical(clabs, nmi)) NULL 1269 else if(length(nmi) == length(clabs) && all(nmi %in% clabs)) { 1270 ## we need 1-1 matches here 1271 m <- pmatch(nmi, clabs, 0L) 1272 if(any(m == 0L)) 1273 stop("names do not match previous names") 1274 m 1275 } else stop("names do not match previous names") 1276 } 1277 allargs <- list(...) 1278 allargs <- allargs[lengths(allargs) > 0L] 1279 if(length(allargs)) { 1280 ## drop any zero-row data frames, as they may not have proper column 1281 ## types (e.g. NULL). 1282 nr <- vapply(allargs, function(x) 1283 if(is.data.frame(x)) .row_names_info(x, 2L) 1284 else if(is.list(x)) length(x[[1L]]) 1285 # mismatched lists are checked later 1286 else length(x), 1L) 1287 if(any(n0 <- nr == 0L)) { 1288 if(all(n0)) return(allargs[[1L]]) # pretty arbitrary 1289 allargs <- allargs[!n0] 1290 } 1291 } 1292 n <- length(allargs) 1293 if(n == 0L) 1294 return(list2DF()) 1295 nms <- names(allargs) 1296 if(is.null(nms)) 1297 nms <- character(n) 1298 cl <- NULL 1299 perm <- rows <- vector("list", n) 1300 if(make.row.names) { 1301 rlabs <- rows 1302 autoRnms <- TRUE # result with 1:nrow(.) row names? [efficiency!] 1303 Make.row.names <- function(nmi, ri, ni, nrow) 1304 { 1305 if(nzchar(nmi)) { 1306 if(autoRnms) autoRnms <<- FALSE 1307 if(ni == 0L) character() # PR#8506 1308 else if(ni > 1L) paste(nmi, ri, sep = ".") 1309 else nmi 1310 } 1311 else if(autoRnms && nrow > 0L && identical(ri, seq_len(ni))) 1312 as.integer(seq.int(from = nrow + 1L, length.out = ni)) 1313 else { 1314 if(autoRnms && (nrow > 0L || !identical(ri, seq_len(ni)))) 1315 autoRnms <<- FALSE 1316 ri 1317 } 1318 } 1319 } 1320 smartX <- isTRUE(factor.exclude) 1321 1322 ## check the arguments, develop row and column labels 1323 nrow <- 0L 1324 value <- clabs <- NULL 1325 all.levs <- list() 1326 for(i in seq_len(n)) { ## check and treat arg [[ i ]] -- part 1 1327 xi <- allargs[[i]] 1328 nmi <- nms[i] 1329 ## coerce matrix to data frame 1330 if(is.matrix(xi)) allargs[[i]] <- xi <- 1331 as.data.frame(xi, stringsAsFactors = stringsAsFactors) 1332 if(inherits(xi, "data.frame")) { 1333 if(is.null(cl)) 1334 cl <- oldClass(xi) 1335 ri <- attr(xi, "row.names") 1336 ni <- length(ri) 1337 if(is.null(clabs)) ## first time 1338 clabs <- names(xi) 1339 else { 1340 if(length(xi) != length(clabs)) 1341 stop("numbers of columns of arguments do not match") 1342 pi <- match.names(clabs, names(xi)) 1343 if( !is.null(pi) ) perm[[i]] <- pi 1344 } 1345 rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni) 1346 if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow) 1347 nrow <- nrow + ni 1348 if(is.null(value)) { ## first time ==> setup once: 1349 value <- unclass(xi) 1350 nvar <- length(value) 1351 all.levs <- vector("list", nvar) 1352 has.dim <- facCol <- ordCol <- logical(nvar) 1353 if(smartX) NA.lev <- ordCol 1354 for(j in seq_len(nvar)) { 1355 xj <- value[[j]] 1356 facCol[j] <- fac <- 1357 if(!is.null(lj <- levels(xj))) { 1358 all.levs[[j]] <- lj 1359 TRUE # turn categories into factors 1360 } else 1361 is.factor(xj) 1362 if(fac) { 1363 ordCol[j] <- is.ordered(xj) 1364 if(smartX && !NA.lev[j]) 1365 NA.lev[j] <- anyNA(lj) 1366 } 1367 has.dim[j] <- length(dim(xj)) == 2L 1368 } 1369 } 1370 else for(j in seq_len(nvar)) { 1371 xij <- xi[[j]] 1372 if(is.null(pi) || is.na(jj <- pi[[j]])) jj <- j 1373 if(facCol[jj]) { 1374 if(length(lij <- levels(xij))) { 1375 all.levs[[jj]] <- unique(c(all.levs[[jj]], lij)) 1376 if(ordCol[jj]) 1377 ordCol[jj] <- is.ordered(xij) 1378 if(smartX && !NA.lev[jj]) 1379 NA.lev[jj] <- anyNA(lij) 1380 } else if(is.character(xij)) 1381 all.levs[[jj]] <- unique(c(all.levs[[jj]], xij)) 1382 } 1383 } 1384 } ## end{data.frame} 1385 else if(is.list(xi)) { 1386 ni <- range(lengths(xi)) 1387 if(ni[1L] == ni[2L]) 1388 ni <- ni[1L] 1389 else stop("invalid list argument: all variables should have the same length") 1390 ri <- seq_len(ni) 1391 rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni) 1392 if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow) 1393 nrow <- nrow + ni 1394 if(length(nmi <- names(xi)) > 0L) { 1395 if(is.null(clabs)) 1396 clabs <- nmi 1397 else { 1398 if(length(xi) != length(clabs)) 1399 stop("numbers of columns of arguments do not match") 1400 pi <- match.names(clabs, nmi) 1401 if( !is.null(pi) ) perm[[i]] <- pi 1402 } 1403 } 1404 } 1405 else if(length(xi)) { # 1 new row 1406 rows[[i]] <- nrow <- nrow + 1L 1407 if(make.row.names) 1408 rlabs[[i]] <- if(nzchar(nmi)) nmi else as.integer(nrow) 1409 } 1410 } # for(i .) 1411 1412 nvar <- length(clabs) 1413 if(nvar == 0L) 1414 nvar <- max(lengths(allargs)) # only vector args 1415 if(nvar == 0L) 1416 return(list2DF()) 1417 pseq <- seq_len(nvar) 1418 if(is.null(value)) { # this happens if there has been no data frame 1419 value <- list() 1420 value[pseq] <- list(logical(nrow)) # OK for coercion except to raw. 1421 all.levs <- vector("list", nvar) 1422 has.dim <- facCol <- ordCol <- logical(nvar) 1423 if(smartX) NA.lev <- ordCol 1424 } 1425 names(value) <- clabs 1426 for(j in pseq) 1427 if(length(lij <- all.levs[[j]])) 1428 value[[j]] <- 1429 factor(as.vector(value[[j]]), levels = lij, 1430 exclude = if(smartX) { 1431 if(!NA.lev[j]) NA # else NULL 1432 } else factor.exclude, 1433 ordered = ordCol[j]) 1434 1435 if(any(has.dim)) { # some col's are matrices or d.frame's 1436 jdim <- pseq[has.dim] 1437 if(!all(df <- vapply(jdim, function(j) inherits(value[[j]],"data.frame"), NA))) { 1438 ## Ensure matrix columns can be filled in for(i ...) below 1439 rmax <- max(unlist(rows)) 1440 for(j in jdim[!df]) { 1441 dn <- dimnames(vj <- value[[j]]) 1442 rn <- dn[[1L]] 1443 if(length(rn) > 0L) length(rn) <- rmax 1444 pj <- dim(vj)[2L] 1445 length(vj) <- rmax * pj 1446 value[[j]] <- array(vj, c(rmax, pj), list(rn, dn[[2L]])) 1447 } 1448 } 1449 } 1450 1451 for(i in seq_len(n)) { ## add arg [[i]] to result 1452 xi <- unclass(allargs[[i]]) 1453 if(!is.list(xi)) 1454 if(length(xi) != nvar) 1455 xi <- rep(xi, length.out = nvar) 1456 ri <- rows[[i]] 1457 pi <- perm[[i]] 1458 if(is.null(pi)) pi <- pseq 1459 for(j in pseq) { 1460 jj <- pi[j] 1461 xij <- xi[[j]] 1462 if(has.dim[jj]) { 1463 value[[jj]][ri, ] <- xij 1464 ## copy rownames 1465 if(!is.null(r <- rownames(xij)) && 1466 !(inherits(xij, "data.frame") && 1467 .row_names_info(xij) <= 0)) 1468 rownames(value[[jj]])[ri] <- r 1469 } else { 1470 ## coerce factors to vectors, in case lhs is character or 1471 ## level set has changed 1472 value[[jj]][ri] <- if(is.factor(xij)) as.vector(xij) else xij 1473 ## copy names if any 1474 if(!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm 1475 } 1476 } 1477 } 1478 rlabs <- if(make.row.names && !autoRnms) { 1479 rlabs <- unlist(rlabs) 1480 if(anyDuplicated(rlabs)) 1481 make.unique(as.character(rlabs), sep = "") 1482 else 1483 rlabs 1484 } # else NULL 1485 if(is.null(cl)) { 1486 as.data.frame(value, row.names = rlabs, fix.empty.names = TRUE, 1487 stringsAsFactors = stringsAsFactors) 1488 } else { 1489 structure(value, class = cl, 1490 row.names = if(is.null(rlabs)) .set_row_names(nrow) else rlabs) 1491 } 1492} 1493 1494 1495### coercion and print methods 1496 1497print.data.frame <- 1498 function(x, ..., digits = NULL, quote = FALSE, right = TRUE, 1499 row.names = TRUE, max = NULL) 1500{ 1501 n <- length(row.names(x)) 1502 if(length(x) == 0L) { 1503 cat(sprintf(ngettext(n, "data frame with 0 columns and %d row", 1504 "data frame with 0 columns and %d rows"), 1505 n), "\n", sep = "") 1506 } else if(n == 0L) { 1507 ## FIXME: header format is inconsistent here 1508 print.default(names(x), quote = FALSE) 1509 cat(gettext("<0 rows> (or 0-length row.names)\n")) 1510 } else { 1511 if(is.null(max)) max <- getOption("max.print", 99999L) 1512 if(!is.finite(max)) stop("invalid 'max' / getOption(\"max.print\"): ", max) 1513 ## format.<*>() : avoiding picking up e.g. format.AsIs 1514 omit <- (n0 <- max %/% length(x)) < n 1515 m <- as.matrix( 1516 format.data.frame(if(omit) x[seq_len(n0), , drop=FALSE] else x, 1517 digits = digits, na.encode = FALSE)) 1518 if(!isTRUE(row.names)) 1519 dimnames(m)[[1L]] <- 1520 if(isFALSE(row.names)) rep.int("", if(omit) n0 else n) 1521 else row.names 1522 print(m, ..., quote = quote, right = right, max = max) 1523 if(omit) 1524 cat(" [ reached 'max' / getOption(\"max.print\") -- omitted", 1525 n - n0, "rows ]\n") 1526 } 1527 invisible(x) 1528} 1529 1530as.matrix.data.frame <- function (x, rownames.force = NA, ...) 1531{ 1532 dm <- dim(x) 1533 rn <- if(rownames.force %in% FALSE) NULL 1534 else if(rownames.force %in% TRUE || .row_names_info(x) > 0L) 1535 row.names(x) # else NULL 1536 dn <- list(rn, names(x)) 1537 if(any(dm == 0L)) 1538 return(array(NA, dim = dm, dimnames = dn)) 1539 p <- dm[2L] # >= 1 1540 pseq <- seq_len(p) 1541 n <- dm[1L] 1542 X <- unclass(x) # will contain the result; 1543 ## the "big question" is if we return a numeric or a character matrix 1544 non.numeric <- non.atomic <- FALSE 1545 all.logical <- TRUE 1546 for (j in pseq) { 1547 xj <- X[[j]] 1548 if(inherits(xj, "data.frame"))# && ncol(xj) > 1L) 1549 X[[j]] <- xj <- as.matrix(xj) 1550 j.logic <- is.logical(xj) 1551 if(all.logical && !j.logic) all.logical <- FALSE 1552 if(length(levels(xj)) > 0L || !(j.logic || is.numeric(xj) || is.complex(xj)) 1553 || (!is.null(cl <- attr(xj, "class")) && # numeric classed objects to format: 1554 any(cl %in% c("Date", "POSIXct", "POSIXlt")))) 1555 non.numeric <- TRUE 1556 if(!is.atomic(xj) && !inherits(xj, "POSIXlt")) 1557 non.atomic <- TRUE 1558 } 1559 if(non.atomic) { 1560 for (j in pseq) { 1561 xj <- X[[j]] 1562 if(!is.recursive(xj)) 1563 X[[j]] <- as.list(as.vector(xj)) 1564 } 1565 } else if(all.logical) { 1566 ## do nothing for logical columns if a logical matrix will result. 1567 } else if(non.numeric) { 1568 for (j in pseq) { 1569 if (is.character(X[[j]])) 1570 next 1571 else if(is.logical(xj <- X[[j]])) 1572 xj <- as.character(xj) # not format(), takes care of NAs too 1573 else { 1574 miss <- is.na(xj) 1575 xj <- if(length(levels(xj))) as.vector(xj) else format(xj) 1576 is.na(xj) <- miss 1577 } 1578 X[[j]] <- xj 1579 } 1580 } 1581 ## These coercions could have changed the number of columns 1582 ## (e.g. class "Surv" coerced to character), 1583 ## so only now can we compute collabs. 1584 collabs <- as.list(dn[[2L]]) 1585 for (j in pseq) { 1586 xj <- X[[j]] 1587 dj <- dim(xj) 1588 if(length(dj) == 2L && dj[2L] > 0L) { # matrix with > 0 col 1589 if(!length(dnj <- colnames(xj))) dnj <- seq_len(dj[2L]) 1590 collabs[[j]] <- 1591 if(length(collabs)) { 1592 if(dj[2L] > 1L) 1593 paste(collabs[[j]], dnj, sep = ".") 1594 else if(is.character(collabs[[j]])) collabs[[j]] 1595 else dnj 1596 } 1597 else dnj 1598 } 1599 } 1600 nc <- vapply(X, NCOL, numeric(1), USE.NAMES=FALSE) 1601 X <- unlist(X, recursive = FALSE, use.names = FALSE) 1602 dim(X) <- c(n, length(X)/n) 1603 dimnames(X) <- list(dn[[1L]], unlist(collabs[nc > 0], use.names = FALSE)) 1604 X 1605} 1606 1607Math.data.frame <- function (x, ...) 1608{ 1609 mode.ok <- vapply(x, function(x) 1610 is.numeric(x) || is.logical(x) || is.complex(x), NA) 1611 if (all(mode.ok)) { 1612 x[] <- lapply(X = x, FUN = .Generic, ...) 1613 return(x) 1614 } else { 1615 vnames <- names(x) 1616 if (is.null(vnames)) vnames <- seq_along(x) 1617 stop("non-numeric-alike variable(s) in data frame: ", 1618 paste(vnames[!mode.ok], collapse = ", ")) 1619 } 1620} 1621 1622Ops.data.frame <- function(e1, e2 = NULL) 1623{ 1624 unary <- nargs() == 1L 1625 lclass <- nzchar(.Method[1L]) 1626 rclass <- !unary && (nzchar(.Method[2L])) 1627 value <- list() 1628 rn <- NULL 1629 ## set up call as op(left, right) 1630 ## These are used, despite 1631 ## _R_CHECK_CODETOOLS_PROFILE_="suppressLocalUnused=FALSE" 1632 FUN <- get(.Generic, envir = parent.frame(), mode = "function") 1633 f <- if (unary) quote(FUN(left)) else quote(FUN(left, right)) 1634 lscalar <- rscalar <- FALSE 1635 if(lclass && rclass) { 1636 nr <- .row_names_info(e1, 2L) 1637 if(.row_names_info(e1) > 0L) rn <- attr(e1, "row.names") 1638 cn <- names(e1) 1639 if(any(dim(e2) != dim(e1))) 1640 stop(gettextf("%s only defined for equally-sized data frames", 1641 sQuote(.Generic)), domain = NA) 1642 } else if(lclass) { 1643 ## e2 is not a data frame, but e1 is. 1644 nr <- .row_names_info(e1, 2L) 1645 if(.row_names_info(e1) > 0L) rn <- attr(e1, "row.names") 1646 cn <- names(e1) 1647 rscalar <- length(e2) <= 1L # e2 might be null 1648 if(is.list(e2)) { 1649 if(rscalar) e2 <- e2[[1L]] 1650 else if(length(e2) != ncol(e1)) 1651 stop(gettextf("list of length %d not meaningful", length(e2)), 1652 domain = NA) 1653 } else { 1654 if(!rscalar) 1655 e2 <- split(rep_len(as.vector(e2), prod(dim(e1))), 1656 rep.int(seq_len(ncol(e1)), 1657 rep.int(nrow(e1), ncol(e1)))) 1658 } 1659 } else { 1660 ## e1 is not a data frame, but e2 is. 1661 nr <- .row_names_info(e2, 2L) 1662 if(.row_names_info(e2) > 0L) rn <- attr(e2, "row.names") 1663 cn <- names(e2) 1664 lscalar <- length(e1) <= 1L 1665 if(is.list(e1)) { 1666 if(lscalar) e1 <- e1[[1L]] 1667 else if(length(e1) != ncol(e2)) 1668 stop(gettextf("list of length %d not meaningful", length(e1)), 1669 domain = NA) 1670 } else { 1671 if(!lscalar) 1672 e1 <- split(rep_len(as.vector(e1), prod(dim(e2))), 1673 rep.int(seq_len(ncol(e2)), 1674 rep.int(nrow(e2), ncol(e2)))) 1675 } 1676 } 1677 for(j in seq_along(cn)) { 1678 left <- if(!lscalar) e1[[j]] else e1 1679 right <- if(!rscalar) e2[[j]] else e2 1680 value[[j]] <- eval(f) 1681 } 1682 if(.Generic %in% c("+","-","*","^","%%","%/%","/")) {## == 'Arith' 1683 if(length(value)) { 1684 names(value) <- cn 1685 data.frame(value, row.names = rn, check.names = FALSE) 1686 } else 1687 data.frame( row.names = rn, check.names = FALSE) 1688 } 1689 else { ## 'Logic' ("&","|") and 'Compare' ("==",">","<","!=","<=",">=") : 1690 value <- unlist(value, recursive = FALSE, use.names = FALSE) 1691 if(!length(value)) 1692 matrix(logical(), nrow = nr, ncol = length(cn), dimnames = list(rn,cn)) 1693 else # nrow + possibly recycled value determine dim: 1694 matrix(value, nrow = nr, dimnames = list(rn,cn)) 1695 } 1696} 1697 1698Summary.data.frame <- function(..., na.rm) 1699{ 1700 args <- list(...) 1701 args <- lapply(args, function(x) { 1702 x <- as.matrix(x) 1703 if(!is.numeric(x) && !is.logical(x) && !is.complex(x)) 1704 stop("only defined on a data frame with all numeric-alike variables") 1705 x 1706 }) 1707 do.call(.Generic, c(args, na.rm=na.rm)) 1708} 1709 1710xtfrm.data.frame <- function(x) { 1711 if(tolower(Sys.getenv("_R_STOP_ON_XTFRM_DATA_FRAME_")) %in% 1712 c("1", "yes", "true")) 1713 stop("cannot xtfrm data frames") 1714 else { 1715 warning("cannot xtfrm data frames") 1716 NextMethod("xtfrm") 1717 } 1718} 1719 1720list2DF <- 1721function(x = list(), nrow = NULL) 1722{ 1723 stopifnot(is.list(x), is.null(nrow) || nrow >= 0L) 1724 if(n <- length(x)) { 1725 if(is.null(nrow)) 1726 nrow <- max(lengths(x), 0L) 1727 x <- lapply(x, rep_len, nrow) 1728 } else { 1729 if(is.null(nrow)) 1730 nrow <- 0L 1731 } 1732 if(is.null(names(x))) 1733 names(x) <- character(n) 1734 class(x) <- "data.frame" 1735 attr(x, "row.names") <- .set_row_names(nrow) 1736 x 1737} 1738