1# File src/library/stats/R/lm.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2020 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19 20lm <- function (formula, data, subset, weights, na.action, 21 method = "qr", model = TRUE, x = FALSE, y = FALSE, 22 qr = TRUE, singular.ok = TRUE, contrasts = NULL, 23 offset, ...) 24{ 25 ret.x <- x 26 ret.y <- y 27 cl <- match.call() 28 mf <- match.call(expand.dots = FALSE) 29 m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), 30 names(mf), 0L) 31 mf <- mf[c(1L, m)] 32 mf$drop.unused.levels <- TRUE 33 ## need stats:: for non-standard evaluation 34 mf[[1L]] <- quote(stats::model.frame) 35 mf <- eval(mf, parent.frame()) 36 if (method == "model.frame") 37 return(mf) 38 else if (method != "qr") 39 warning(gettextf("method = '%s' is not supported. Using 'qr'", method), 40 domain = NA) 41 mt <- attr(mf, "terms") # allow model.frame to update it 42 y <- model.response(mf, "numeric") 43 ## avoid any problems with 1D or nx1 arrays by as.vector. 44 w <- as.vector(model.weights(mf)) 45 if(!is.null(w) && !is.numeric(w)) 46 stop("'weights' must be a numeric vector") 47 offset <- model.offset(mf) 48 mlm <- is.matrix(y) 49 ny <- if(mlm) nrow(y) else length(y) 50 if(!is.null(offset)) { 51 if(!mlm) offset <- as.vector(offset) 52 if(NROW(offset) != ny) 53 stop(gettextf("number of offsets is %d, should equal %d (number of observations)", 54 NROW(offset), ny), domain = NA) 55 } 56 57 if (is.empty.model(mt)) { 58 x <- NULL 59 z <- list(coefficients = if(mlm) matrix(NA_real_, 0, ncol(y)) 60 else numeric(), 61 residuals = y, 62 fitted.values = 0 * y, weights = w, rank = 0L, 63 df.residual = if(!is.null(w)) sum(w != 0) else ny) 64 if(!is.null(offset)) { 65 z$fitted.values <- offset 66 z$residuals <- y - offset 67 } 68 } 69 else { 70 x <- model.matrix(mt, mf, contrasts) 71 z <- if(is.null(w)) lm.fit(x, y, offset = offset, 72 singular.ok=singular.ok, ...) 73 else lm.wfit(x, y, w, offset = offset, singular.ok=singular.ok, ...) 74 } 75 class(z) <- c(if(mlm) "mlm", "lm") 76 z$na.action <- attr(mf, "na.action") 77 z$offset <- offset 78 z$contrasts <- attr(x, "contrasts") 79 z$xlevels <- .getXlevels(mt, mf) 80 z$call <- cl 81 z$terms <- mt 82 if (model) 83 z$model <- mf 84 if (ret.x) 85 z$x <- x 86 if (ret.y) 87 z$y <- y 88 if (!qr) z$qr <- NULL 89 z 90} 91 92## lm.fit() and lm.wfit() have *MUCH* in common [say ``code re-use !''] 93lm.fit <- function (x, y, offset = NULL, method = "qr", tol = 1e-07, 94 singular.ok = TRUE, ...) 95{ 96 if (is.null(n <- nrow(x))) stop("'x' must be a matrix") 97 if(n == 0L) stop("0 (non-NA) cases") 98 p <- ncol(x) 99 if (p == 0L) { 100 ## oops, null model 101 return(list(coefficients = numeric(), residuals = y, 102 fitted.values = 0 * y, rank = 0, 103 df.residual = length(y))) 104 } 105 ny <- NCOL(y) 106 ## treat one-col matrix as vector 107 if(is.matrix(y) && ny == 1) 108 y <- drop(y) 109 if(!is.null(offset)) 110 y <- y - offset 111 if (NROW(y) != n) 112 stop("incompatible dimensions") 113 if(method != "qr") 114 warning(gettextf("method = '%s' is not supported. Using 'qr'", method), 115 domain = NA) 116 chkDots(...) 117 z <- .Call(C_Cdqrls, x, y, tol, FALSE) 118 if(!singular.ok && z$rank < p) stop("singular fit encountered") 119 coef <- z$coefficients 120 pivot <- z$pivot 121 ## careful here: the rank might be 0 122 r1 <- seq_len(z$rank) 123 dn <- colnames(x) %||% paste0("x", 1L:p) 124 nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank)) 125 r2 <- if(z$rank < p) (z$rank+1L):p else integer() 126 if (is.matrix(y)) { 127 coef[r2, ] <- NA 128 if(z$pivoted) coef[pivot, ] <- coef 129 dimnames(coef) <- list(dn, colnames(y)) 130 dimnames(z$effects) <- list(nmeffects, colnames(y)) 131 } else { 132 coef[r2] <- NA 133 ## avoid copy 134 if(z$pivoted) coef[pivot] <- coef 135 names(coef) <- dn 136 names(z$effects) <- nmeffects 137 } 138 z$coefficients <- coef 139 r1 <- y - z$residuals ; if(!is.null(offset)) r1 <- r1 + offset 140 ## avoid unnecessary copy 141 if(z$pivoted) colnames(z$qr) <- colnames(x)[z$pivot] 142 qr <- z[c("qr", "qraux", "pivot", "tol", "rank")] 143 c(z[c("coefficients", "residuals", "effects", "rank")], 144 list(fitted.values = r1, assign = attr(x, "assign"), 145 qr = structure(qr, class="qr"), 146 df.residual = n - z$rank)) 147} 148 149.lm.fit <- function(x, y, tol = 1e-07) .Call(C_Cdqrls, x, y, tol, check=TRUE) 150 151lm.wfit <- function (x, y, w, offset = NULL, method = "qr", tol = 1e-7, 152 singular.ok = TRUE, ...) 153{ 154 if(is.null(n <- nrow(x))) stop("'x' must be a matrix") 155 if(n == 0) stop("0 (non-NA) cases") 156 ny <- NCOL(y) 157 ## treat one-col matrix as vector 158 if(is.matrix(y) && ny == 1L) 159 y <- drop(y) 160 if(!is.null(offset)) 161 y <- y - offset 162 if (NROW(y) != n || length(w) != n) 163 stop("incompatible dimensions") 164 if (any(w < 0 | is.na(w))) 165 stop("missing or negative weights not allowed") 166 if(method != "qr") 167 warning(gettextf("method = '%s' is not supported. Using 'qr'", method), 168 domain = NA) 169 chkDots(...) 170 x.asgn <- attr(x, "assign")# save 171 zero.weights <- any(w == 0) 172 if (zero.weights) { 173 save.r <- y 174 save.f <- y 175 save.w <- w 176 ok <- w != 0 177 nok <- !ok 178 w <- w[ok] 179 x0 <- x[!ok, , drop = FALSE] 180 x <- x[ok, , drop = FALSE] 181 n <- nrow(x) 182 y0 <- if (ny > 1L) y[!ok, , drop = FALSE] else y[!ok] 183 y <- if (ny > 1L) y[ ok, , drop = FALSE] else y[ok] 184 } 185 p <- ncol(x) 186 if (p == 0) { 187 ## oops, null model 188 return(list(coefficients = numeric(), residuals = y, 189 fitted.values = 0 * y, weights = w, rank = 0L, 190 df.residual = length(y))) 191 } 192 if (n == 0) { # all cases have weight zero 193 return(list(coefficients = rep(NA_real_, p), residuals = y, 194 fitted.values = 0 * y, weights = w, rank = 0L, 195 df.residual = 0L)) 196 } 197 wts <- sqrt(w) 198 z <- .Call(C_Cdqrls, x * wts, y * wts, tol, FALSE) 199 if(!singular.ok && z$rank < p) stop("singular fit encountered") 200 coef <- z$coefficients 201 pivot <- z$pivot 202 r1 <- seq_len(z$rank) 203 dn <- colnames(x) %||% paste0("x", 1L:p) 204 nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank)) 205 r2 <- if(z$rank < p) (z$rank+1L):p else integer() 206 if (is.matrix(y)) { 207 coef[r2, ] <- NA 208 if(z$pivoted) coef[pivot, ] <- coef 209 dimnames(coef) <- list(dn, colnames(y)) 210 dimnames(z$effects) <- list(nmeffects,colnames(y)) 211 } else { 212 coef[r2] <- NA 213 if(z$pivoted) coef[pivot] <- coef 214 names(coef) <- dn 215 names(z$effects) <- nmeffects 216 } 217 z$coefficients <- coef 218 z$residuals <- z$residuals/wts 219 z$fitted.values <- y - z$residuals 220 z$weights <- w 221 if (zero.weights) { 222 coef[is.na(coef)] <- 0 223 f0 <- x0 %*% coef 224 if (ny > 1) { 225 save.r[ok, ] <- z$residuals 226 save.r[nok, ] <- y0 - f0 227 save.f[ok, ] <- z$fitted.values 228 save.f[nok, ] <- f0 229 } 230 else { 231 save.r[ok] <- z$residuals 232 save.r[nok] <- y0 - f0 233 save.f[ok] <- z$fitted.values 234 save.f[nok] <- f0 235 } 236 z$residuals <- save.r 237 z$fitted.values <- save.f 238 z$weights <- save.w 239 } 240 if(!is.null(offset)) 241 z$fitted.values <- z$fitted.values + offset 242 if(z$pivoted) colnames(z$qr) <- colnames(x)[z$pivot] 243 qr <- z[c("qr", "qraux", "pivot", "tol", "rank")] 244 c(z[c("coefficients", "residuals", "fitted.values", "effects", 245 "weights", "rank")], 246 list(assign = x.asgn, 247 qr = structure(qr, class="qr"), 248 df.residual = n - z$rank)) 249} 250 251print.lm <- function(x, digits = max(3L, getOption("digits") - 3L), ...) 252{ 253 cat("\nCall:\n", 254 paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") 255 if(length(coef(x))) { 256 cat("Coefficients:\n") 257 print.default(format(coef(x), digits = digits), 258 print.gap = 2L, quote = FALSE) 259 } else cat("No coefficients\n") 260 cat("\n") 261 invisible(x) 262} 263 264summary.lm <- function (object, correlation = FALSE, symbolic.cor = FALSE, ...) 265{ 266 z <- object 267 p <- z$rank 268 rdf <- z$df.residual 269 if (p == 0) { 270 r <- z$residuals 271 n <- length(r) 272 w <- z$weights 273 if (is.null(w)) { 274 rss <- sum(r^2) 275 } else { 276 rss <- sum(w * r^2) 277 r <- sqrt(w) * r 278 } 279 resvar <- rss/rdf 280 ans <- z[c("call", "terms", if(!is.null(z$weights)) "weights")] 281 class(ans) <- "summary.lm" 282 ans$aliased <- is.na(coef(object)) # used in print method 283 ans$residuals <- r 284 ans$df <- c(0L, n, length(ans$aliased)) 285 ans$coefficients <- matrix(NA_real_, 0L, 4L, dimnames = 286 list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))) 287 ans$sigma <- sqrt(resvar) 288 ans$r.squared <- ans$adj.r.squared <- 0 289 ans$cov.unscaled <- matrix(NA_real_, 0L, 0L) 290 if (correlation) ans$correlation <- ans$cov.unscaled 291 return(ans) 292 } 293 if (is.null(z$terms)) 294 stop("invalid 'lm' object: no 'terms' component") 295 if(!inherits(object, "lm")) 296 warning("calling summary.lm(<fake-lm-object>) ...") 297 Qr <- qr.lm(object) 298 n <- NROW(Qr$qr) 299 if(is.na(z$df.residual) || n - p != z$df.residual) 300 warning("residual degrees of freedom in object suggest this is not an \"lm\" fit") 301 ## do not want missing values substituted here 302 r <- z$residuals 303 f <- z$fitted.values 304 w <- z$weights 305 if (is.null(w)) { 306 mss <- if (attr(z$terms, "intercept")) 307 sum((f - mean(f))^2) else sum(f^2) 308 rss <- sum(r^2) 309 } else { 310 mss <- if (attr(z$terms, "intercept")) { 311 m <- sum(w * f /sum(w)) 312 sum(w * (f - m)^2) 313 } else sum(w * f^2) 314 rss <- sum(w * r^2) 315 r <- sqrt(w) * r 316 } 317 resvar <- rss/rdf 318 ## see thread at https://stat.ethz.ch/pipermail/r-help/2014-March/367585.html 319 if (is.finite(resvar) && 320 resvar < (mean(f)^2 + var(c(f))) * 1e-30) # a few times .Machine$double.eps^2 321 warning("essentially perfect fit: summary may be unreliable") 322 p1 <- 1L:p 323 R <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) 324 se <- sqrt(diag(R) * resvar) 325 est <- z$coefficients[Qr$pivot[p1]] 326 tval <- est/se 327 ans <- z[c("call", "terms", if(!is.null(z$weights)) "weights")] 328 ans$residuals <- r 329 ans$coefficients <- 330 cbind(Estimate = est, "Std. Error" = se, "t value" = tval, 331 "Pr(>|t|)" = 2*pt(abs(tval), rdf, lower.tail = FALSE)) 332 ans$aliased <- is.na(z$coefficients) # used in print method 333 ans$sigma <- sqrt(resvar) 334 ans$df <- c(p, rdf, NCOL(Qr$qr)) 335 if (p != attr(z$terms, "intercept")) { 336 df.int <- if (attr(z$terms, "intercept")) 1L else 0L 337 ans$r.squared <- mss/(mss + rss) 338 ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - df.int)/rdf) 339 ans$fstatistic <- c(value = (mss/(p - df.int))/resvar, 340 numdf = p - df.int, dendf = rdf) 341 } else ans$r.squared <- ans$adj.r.squared <- 0 342 ans$cov.unscaled <- R 343 dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,1)] 344 if (correlation) { 345 ans$correlation <- (R * resvar)/outer(se, se) 346 dimnames(ans$correlation) <- dimnames(ans$cov.unscaled) 347 ans$symbolic.cor <- symbolic.cor 348 } 349 if(!is.null(z$na.action)) ans$na.action <- z$na.action 350 class(ans) <- "summary.lm" 351 ans 352} 353 354print.summary.lm <- 355 function (x, digits = max(3L, getOption("digits") - 3L), 356 symbolic.cor = x$symbolic.cor, 357 signif.stars = getOption("show.signif.stars"), ...) 358{ 359 cat("\nCall:\n", # S has ' ' instead of '\n' 360 paste(deparse(x$call), sep="\n", collapse = "\n"), "\n\n", sep = "") 361 resid <- x$residuals 362 df <- x$df 363 rdf <- df[2L] 364 cat(if(!is.null(x$weights) && diff(range(x$weights))) "Weighted ", 365 "Residuals:\n", sep = "") 366 if (rdf > 5L) { 367 nam <- c("Min", "1Q", "Median", "3Q", "Max") 368 rq <- if (length(dim(resid)) == 2L) 369 structure(apply(t(resid), 1L, quantile), 370 dimnames = list(nam, dimnames(resid)[[2L]])) 371 else { 372 zz <- zapsmall(quantile(resid), digits + 1L) 373 structure(zz, names = nam) 374 } 375 print(rq, digits = digits, ...) 376 } 377 else if (rdf > 0L) { 378 print(resid, digits = digits, ...) 379 } else { # rdf == 0 : perfect fit! 380 cat("ALL", df[1L], "residuals are 0: no residual degrees of freedom!") 381 cat("\n") 382 } 383 if (length(x$aliased) == 0L) { 384 cat("\nNo Coefficients\n") 385 } else { 386 if (nsingular <- df[3L] - df[1L]) 387 cat("\nCoefficients: (", nsingular, 388 " not defined because of singularities)\n", sep = "") 389 else cat("\nCoefficients:\n") 390 coefs <- x$coefficients 391 if(any(aliased <- x$aliased)) { 392 cn <- names(aliased) 393 coefs <- matrix(NA, length(aliased), 4, dimnames=list(cn, colnames(coefs))) 394 coefs[!aliased, ] <- x$coefficients 395 } 396 397 printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 398 na.print = "NA", ...) 399 } 400 ## 401 cat("\nResidual standard error:", 402 format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom") 403 cat("\n") 404 if(nzchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep = "") 405 if (!is.null(x$fstatistic)) { 406 cat("Multiple R-squared: ", formatC(x$r.squared, digits = digits)) 407 cat(",\tAdjusted R-squared: ",formatC(x$adj.r.squared, digits = digits), 408 "\nF-statistic:", formatC(x$fstatistic[1L], digits = digits), 409 "on", x$fstatistic[2L], "and", 410 x$fstatistic[3L], "DF, p-value:", 411 format.pval(pf(x$fstatistic[1L], x$fstatistic[2L], 412 x$fstatistic[3L], lower.tail = FALSE), 413 digits = digits)) 414 cat("\n") 415 } 416 correl <- x$correlation 417 if (!is.null(correl)) { 418 p <- NCOL(correl) 419 if (p > 1L) { 420 cat("\nCorrelation of Coefficients:\n") 421 if(is.logical(symbolic.cor) && symbolic.cor) {# NULL < 1.7.0 objects 422 print(symnum(correl, abbr.colnames = NULL)) 423 } else { 424 correl <- format(round(correl, 2), nsmall = 2, digits = digits) 425 correl[!lower.tri(correl)] <- "" 426 print(correl[-1, -p, drop=FALSE], quote = FALSE) 427 } 428 } 429 } 430 cat("\n")#- not in S 431 invisible(x) 432} 433 434residuals.lm <- 435 function(object, 436 type = c("working","response", "deviance","pearson", "partial"), 437 ...) 438{ 439 type <- match.arg(type) 440 r <- object$residuals 441 res <- switch(type, 442 working =, response = r, 443 deviance=, pearson = 444 if(is.null(object$weights)) r else r * sqrt(object$weights), 445 partial = r 446 ) 447 res <- naresid(object$na.action, res) 448 if (type=="partial") ## predict already does naresid 449 res <- res + predict(object,type="terms") 450 res 451} 452 453## using qr(<lm>) as interface to <lm>$qr : 454qr.lm <- function(x, ...) { 455 x$qr %||% 456 stop("lm object does not have a proper 'qr' component. 457 Rank zero or should not have used lm(.., qr=FALSE).") 458} 459 460## The lm method includes objects of class "glm" 461simulate.lm <- function(object, nsim = 1, seed = NULL, ...) 462{ 463 if(!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) 464 runif(1) # initialize the RNG if necessary 465 if(is.null(seed)) 466 RNGstate <- get(".Random.seed", envir = .GlobalEnv) 467 else { 468 R.seed <- get(".Random.seed", envir = .GlobalEnv) 469 set.seed(seed) 470 RNGstate <- structure(seed, kind = as.list(RNGkind())) 471 on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) 472 } 473 fam <- if(isGlm <- inherits(object, "glm")) object$family$family else "gaussian" 474 ftd <- fitted(object) # == napredict(*, object$fitted) 475 isMlm <- identical(fam, "gaussian") && is.matrix(ftd) 476 nm <- if(isMlm) dimnames(ftd) else names(ftd) 477 if(isMlm) ## Not hard. Biggest question: how exactly the data frame should look 478 stop("simulate() is not yet implemented for multivariate lm()") 479 n <- length(ftd) 480 ntot <- n * nsim 481 val <- switch(fam, 482 "gaussian" = { 483 vars <- deviance(object)/ df.residual(object) 484 if(isMlm) { 485 ## _TODO_ 486 ## weights ==> "vars / weights" as matrix with dim(ftd) 487 } else { 488 if(isGlm) { 489 if(!is.null(object$prior.weights)) 490 vars <- vars/object$prior.weights 491 } else # lm() 492 if(!(is.null(w <- object$weights) || 493 (length(w) == 1L && w == 1))) 494 vars <- vars/w 495 ftd + rnorm(ntot, sd = sqrt(vars)) 496 } 497 }, 498 if(!is.null(object$family$simulate)) 499 object$family$simulate(object, nsim) 500 else stop(gettextf("family '%s' not implemented", fam), 501 domain = NA) 502 ) 503 504 if(isMlm) { 505 ## _TODO_ 506 } else if(!is.list(val)) { 507 dim(val) <- c(n, nsim) 508 val <- as.data.frame(val) 509 } else 510 class(val) <- "data.frame" 511 ## isMlm: conceptually, each "sim_i" could be a *matrix* [unusually] 512 names(val) <- paste0("sim_", seq_len(nsim)) 513 if (!is.null(nm)) row.names(val) <- nm 514 attr(val, "seed") <- RNGstate 515 val 516} 517 518deviance.lm <- function(object, ...) 519 sum(weighted.residuals(object)^2, na.rm=TRUE) 520 521formula.lm <- function(x, ...) 522{ 523 form <- x$formula 524 if( !is.null(form) ) { 525 form <- formula(x$terms) # has . expanded 526 environment(form) <- environment(x$formula) 527 form 528 } else formula(x$terms) 529} 530 531family.lm <- function(object, ...) { gaussian() } 532 533model.frame.lm <- function(formula, ...) 534{ 535 dots <- list(...) 536 nargs <- dots[match(c("data", "na.action", "subset"), names(dots), 0)] 537 if (length(nargs) || is.null(formula$model)) { 538 ## mimic lm(method = "model.frame") 539 fcall <- formula$call 540 m <- match(c("formula", "data", "subset", "weights", "na.action", 541 "offset"), names(fcall), 0L) 542 fcall <- fcall[c(1L, m)] 543 fcall$drop.unused.levels <- TRUE 544 ## need stats:: for non-standard evaluation 545 fcall[[1L]] <- quote(stats::model.frame) 546 fcall$xlev <- formula$xlevels 547 ## We want to copy over attributes here, especially predvars. 548 fcall$formula <- terms(formula) 549 fcall[names(nargs)] <- nargs 550 env <- environment(formula$terms) %||% parent.frame() 551 eval(fcall, env) # 2-arg form as env is an environment 552 } 553 else formula$model 554} 555 556variable.names.lm <- function(object, full = FALSE, ...) 557{ 558 if(full) dimnames(qr.lm(object)$qr)[[2L]] 559 else if(object$rank) dimnames(qr.lm(object)$qr)[[2L]][seq_len(object$rank)] 560 else character() 561} 562 563case.names.lm <- function(object, full = FALSE, ...) 564{ 565 w <- weights(object) 566 dn <- names(residuals(object)) 567 if(full || is.null(w)) dn else dn[w!=0] 568} 569 570anova.lm <- function(object, ...) 571{ 572 ## Do not copy this: anova.lmlist is not an exported object. 573 ## See anova.glm for further comments. 574 if(length(list(object, ...)) > 1L) return(anova.lmlist(object, ...)) 575 576 if(!inherits(object, "lm")) 577 warning("calling anova.lm(<fake-lm-object>) ...") 578 w <- object$weights 579 ssr <- sum(if(is.null(w)) object$residuals^2 else w*object$residuals^2) 580 mss <- sum(if(is.null(w)) object$fitted.values^2 else w*object$fitted.values^2) 581 if(ssr < 1e-10*mss) 582 warning("ANOVA F-tests on an essentially perfect fit are unreliable") 583 dfr <- df.residual(object) 584 p <- object$rank 585 if(p > 0L) { 586 p1 <- 1L:p 587 comp <- object$effects[p1] 588 asgn <- object$assign[qr.lm(object)$pivot][p1] 589 nmeffects <- c("(Intercept)", attr(object$terms, "term.labels")) 590 tlabels <- nmeffects[1 + unique(asgn)] 591 ss <- c(vapply( split(comp^2,asgn), sum, 1.0), ssr) 592 df <- c(lengths(split(asgn, asgn)), dfr) 593 } else { 594 ss <- ssr 595 df <- dfr 596 tlabels <- character() 597 } 598 ms <- ss/df 599 f <- ms/(ssr/dfr) 600 P <- pf(f, df, dfr, lower.tail = FALSE) 601 table <- data.frame(df, ss, ms, f, P) 602 table[length(P), 4:5] <- NA 603 dimnames(table) <- list(c(tlabels, "Residuals"), 604 c("Df","Sum Sq", "Mean Sq", "F value", "Pr(>F)")) 605 if(attr(object$terms,"intercept")) table <- table[-1, ] 606 structure(table, heading = c("Analysis of Variance Table\n", 607 paste("Response:", deparse(formula(object)[[2L]]))), 608 class = c("anova", "data.frame"))# was "tabular" 609} 610 611anova.lmlist <- function (object, ..., scale = 0, test = "F") 612{ 613 objects <- list(object, ...) 614 responses <- as.character(lapply(objects, 615 function(x) deparse(x$terms[[2L]]))) 616 sameresp <- responses == responses[1L] 617 if (!all(sameresp)) { 618 objects <- objects[sameresp] 619 warning(gettextf("models with response %s removed because response differs from model 1", 620 sQuote(deparse(responses[!sameresp]))), 621 domain = NA) 622 } 623 624 ns <- sapply(objects, function(x) length(x$residuals)) 625 if(any(ns != ns[1L])) 626 stop("models were not all fitted to the same size of dataset") 627 628 ## calculate the number of models 629 nmodels <- length(objects) 630 if (nmodels == 1) 631 return(anova.lm(object)) 632 633 ## extract statistics 634 635 resdf <- as.numeric(lapply(objects, df.residual)) 636 resdev <- as.numeric(lapply(objects, deviance)) 637 638 ## construct table and title 639 640 table <- data.frame(resdf, resdev, c(NA, -diff(resdf)), 641 c(NA, -diff(resdev)) ) 642 variables <- lapply(objects, function(x) 643 paste(deparse(formula(x)), collapse="\n") ) 644 dimnames(table) <- list(1L:nmodels, 645 c("Res.Df", "RSS", "Df", "Sum of Sq")) 646 647 title <- "Analysis of Variance Table\n" 648 topnote <- paste0("Model ", format(1L:nmodels), ": ", variables, 649 collapse = "\n") 650 651 ## calculate test statistic if needed 652 653 if(!is.null(test)) { 654 bigmodel <- order(resdf)[1L] 655 scale <- if(scale > 0) scale else resdev[bigmodel]/resdf[bigmodel] 656 table <- stat.anova(table = table, test = test, 657 scale = scale, 658 df.scale = resdf[bigmodel], 659 n = length(objects[[bigmodel]]$residuals)) 660 } 661 structure(table, heading = c(title, topnote), 662 class = c("anova", "data.frame")) 663} 664 665## code originally from John Maindonald 26Jul2000 666predict.lm <- 667 function(object, newdata, se.fit = FALSE, scale = NULL, df = Inf, 668 interval = c("none", "confidence", "prediction"), 669 level = .95, type = c("response", "terms"), 670 terms = NULL, na.action = na.pass, pred.var = res.var/weights, 671 weights = 1, ...) 672{ 673 tt <- terms(object) 674 if(!inherits(object, "lm")) 675 warning("calling predict.lm(<fake-lm-object>) ...") 676 if(missing(newdata) || is.null(newdata)) { 677 mm <- X <- model.matrix(object) 678 mmDone <- TRUE 679 offset <- object$offset 680 } 681 else { 682 Terms <- delete.response(tt) 683 m <- model.frame(Terms, newdata, na.action = na.action, 684 xlev = object$xlevels) 685 if(!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m) 686 X <- model.matrix(Terms, m, contrasts.arg = object$contrasts) 687 offset <- rep(0, nrow(X)) 688 if (!is.null(off.num <- attr(tt, "offset"))) 689 for(i in off.num) 690 offset <- offset + eval(attr(tt, "variables")[[i+1]], newdata) 691 if (!is.null(object$call$offset)) 692 offset <- offset + eval(object$call$offset, newdata) 693 mmDone <- FALSE 694 } 695 n <- length(object$residuals) # NROW(qr(object)$qr) 696 p <- object$rank 697 p1 <- seq_len(p) 698 piv <- if(p) qr.lm(object)$pivot[p1] 699 if(p < ncol(X) && !(missing(newdata) || is.null(newdata))) 700 warning("prediction from a rank-deficient fit may be misleading") 701### NB: Q[p1,] %*% X[,piv] = R[p1,p1] 702 beta <- object$coefficients 703 predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv]) 704 if (!is.null(offset)) 705 predictor <- predictor + offset 706 707 interval <- match.arg(interval) 708 if (interval == "prediction") { 709 if (missing(newdata)) 710 warning("predictions on current data refer to _future_ responses\n") 711 if (missing(newdata) && missing(weights)) { 712 w <- weights.default(object) 713 if (!is.null(w)) { 714 weights <- w 715 warning("assuming prediction variance inversely proportional to weights used for fitting\n") 716 } 717 } 718 if (!missing(newdata) && missing(weights) && !is.null(object$weights) && missing(pred.var)) 719 warning("Assuming constant prediction variance even though model fit is weighted\n") 720 if (inherits(weights, "formula")){ 721 if (length(weights) != 2L) 722 stop("'weights' as formula should be one-sided") 723 d <- if(missing(newdata) || is.null(newdata)) 724 model.frame(object) 725 else 726 newdata 727 weights <- eval(weights[[2L]], d, environment(weights)) 728 } 729 } 730 731 type <- match.arg(type) 732 if(se.fit || interval != "none") { 733 ## w is needed for interval = "confidence" 734 w <- object$weights 735 res.var <- 736 if (is.null(scale)) { 737 r <- object$residuals 738 rss <- sum(if(is.null(w)) r^2 else r^2 * w) 739 df <- object$df.residual 740 rss/df 741 } else scale^2 742 if(type != "terms") { 743 if(p > 0) { 744 XRinv <- 745 if(missing(newdata) && is.null(w)) 746 qr.Q(qr.lm(object))[, p1, drop = FALSE] 747 else 748 X[, piv] %*% qr.solve(qr.R(qr.lm(object))[p1, p1]) 749# NB: 750# qr.Q(qr.lm(object))[, p1, drop = FALSE] / sqrt(w) 751# looks faster than the above, but it's slower, and doesn't handle zero 752# weights properly 753# 754 ip <- drop(XRinv^2 %*% rep(res.var, p)) 755 } else ip <- rep(0, n) 756 } 757 } 758 759 if (type == "terms") { ## type == "terms" ------------ 760 if(!mmDone) { 761 mm <- model.matrix(object) 762 mmDone <- TRUE 763 } 764 aa <- attr(mm, "assign") 765 ll <- attr(tt, "term.labels") 766 hasintercept <- attr(tt, "intercept") > 0L 767 if (hasintercept) ll <- c("(Intercept)", ll) 768 aaa <- factor(aa, labels = ll) 769 asgn <- split(order(aa), aaa) 770 if (hasintercept) { 771 asgn$"(Intercept)" <- NULL 772 avx <- colMeans(mm) 773 termsconst <- sum(avx[piv] * beta[piv]) 774 } 775 nterms <- length(asgn) 776 if(nterms > 0) { 777 predictor <- matrix(ncol = nterms, nrow = NROW(X)) 778 dimnames(predictor) <- list(rownames(X), names(asgn)) 779 780 if (se.fit || interval != "none") { 781 ip <- matrix(ncol = nterms, nrow = NROW(X)) 782 dimnames(ip) <- list(rownames(X), names(asgn)) 783 Rinv <- qr.solve(qr.R(qr.lm(object))[p1, p1]) 784 } 785 if(hasintercept) 786 X <- sweep(X, 2L, avx, check.margin=FALSE) 787 unpiv <- rep.int(0L, NCOL(X)) 788 unpiv[piv] <- p1 789 ## Predicted values will be set to 0 for any term that 790 ## corresponds to columns of the X-matrix that are 791 ## completely aliased with earlier columns. 792 for (i in seq.int(1L, nterms, length.out = nterms)) { 793 iipiv <- asgn[[i]] # Columns of X, ith term 794 ii <- unpiv[iipiv] # Corresponding rows of Rinv 795 iipiv[ii == 0L] <- 0L 796 predictor[, i] <- 797 if(any(iipiv > 0L)) X[, iipiv, drop = FALSE] %*% beta[iipiv] 798 else 0 799 if (se.fit || interval != "none") 800 ip[, i] <- 801 if(any(iipiv > 0L)) 802 as.matrix(X[, iipiv, drop = FALSE] %*% 803 Rinv[ii, , drop = FALSE])^2 %*% rep.int(res.var, p) 804 else 0 805 } 806 if (!is.null(terms)) { 807 predictor <- predictor[, terms, drop = FALSE] 808 if (se.fit) 809 ip <- ip[, terms, drop = FALSE] 810 } 811 } else { # no terms 812 predictor <- ip <- matrix(0, n, 0L) 813 } 814 attr(predictor, 'constant') <- if (hasintercept) termsconst else 0 815 } 816 817### Now construct elements of the list that will be returned 818 819 if(interval != "none") { 820 tfrac <- qt((1 - level)/2, df) 821 hwid <- tfrac * switch(interval, 822 confidence = sqrt(ip), 823 prediction = sqrt(ip+pred.var) 824 ) 825 if(type != "terms") { 826 predictor <- cbind(predictor, predictor + hwid %o% c(1, -1)) 827 colnames(predictor) <- c("fit", "lwr", "upr") 828 } else { 829 if (!is.null(terms)) hwid <- hwid[, terms, drop = FALSE] 830 lwr <- predictor + hwid 831 upr <- predictor - hwid 832 } 833 } 834 if(se.fit || interval != "none") { 835 se <- sqrt(ip) 836 if(type == "terms" && !is.null(terms) && !se.fit) 837 se <- se[, terms, drop = FALSE] 838 } 839 if(missing(newdata) && !is.null(na.act <- object$na.action)) { 840 predictor <- napredict(na.act, predictor) 841 if(se.fit) se <- napredict(na.act, se) 842 } 843 if(type == "terms" && interval != "none") { 844 if(missing(newdata) && !is.null(na.act)) { 845 lwr <- napredict(na.act, lwr) 846 upr <- napredict(na.act, upr) 847 } 848 list(fit = predictor, se.fit = se, lwr = lwr, upr = upr, 849 df = df, residual.scale = sqrt(res.var)) 850 } else if (se.fit) 851 list(fit = predictor, se.fit = se, 852 df = df, residual.scale = sqrt(res.var)) 853 else predictor 854} 855 856effects.lm <- function(object, set.sign = FALSE, ...) 857{ 858 eff <- object$effects 859 if(is.null(eff)) stop("'object' has no 'effects' component") 860 if(set.sign) { 861 dd <- coef(object) 862 if(is.matrix(eff)) { 863 r <- 1L:dim(dd)[1L] 864 eff[r, ] <- sign(dd) * abs(eff[r, ]) 865 } else { 866 r <- seq_along(dd) 867 eff[r] <- sign(dd) * abs(eff[r]) 868 } 869 } 870 structure(eff, assign = object$assign, class = "coef") 871} 872 873## plot.lm --> now in ./plot.lm.R 874 875model.matrix.lm <- function(object, ...) 876{ 877 if(n_match <- match("x", names(object), 0L)) object[[n_match]] 878 else { 879 data <- model.frame(object, xlev = object$xlevels, ...) 880 if(exists(".GenericCallEnv", inherits = FALSE)) # in dispatch 881 NextMethod("model.matrix", data = data, 882 contrasts.arg = object$contrasts) 883 else { 884 ## model.matrix.lm() is exported for historic reasons. If 885 ## called directly, calling NextMethod() will not work "as 886 ## expected", so call the "next" method directly. 887 dots <- list(...) 888 dots$data <- dots$contrasts.arg <- NULL 889 do.call("model.matrix.default", 890 c(list(object = object, data = data, 891 contrasts.arg = object$contrasts), 892 dots)) 893 } 894 } 895} 896 897##---> SEE ./mlm.R for more methods, etc. !! 898predict.mlm <- 899 function(object, newdata, se.fit = FALSE, na.action = na.pass, ...) 900{ 901 if(missing(newdata)) return(object$fitted.values) 902 if(se.fit) 903 stop("the 'se.fit' argument is not yet implemented for \"mlm\" objects") 904 if(missing(newdata)) { 905 X <- model.matrix(object) 906 offset <- object$offset 907 } 908 else { 909 tt <- terms(object) 910 Terms <- delete.response(tt) 911 m <- model.frame(Terms, newdata, na.action = na.action, 912 xlev = object$xlevels) 913 if(!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m) 914 X <- model.matrix(Terms, m, contrasts.arg = object$contrasts) 915 offset <- if (!is.null(off.num <- attr(tt, "offset"))) 916 eval(attr(tt, "variables")[[off.num+1]], newdata) 917 else if (!is.null(object$offset)) 918 eval(object$call$offset, newdata) 919 } 920 piv <- qr.lm(object)$pivot[seq(object$rank)] 921 pred <- X[, piv, drop = FALSE] %*% object$coefficients[piv,] 922 if ( !is.null(offset) ) pred <- pred + offset 923 if(inherits(object, "mlm")) pred else pred[, 1L] 924} 925 926## from base/R/labels.R 927labels.lm <- function(object, ...) 928{ 929 tl <- attr(object$terms, "term.labels") 930 asgn <- object$assign[qr.lm(object)$pivot[1L:object$rank]] 931 tl[unique(asgn)] 932} 933