1# File src/library/stats/R/glm.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2020 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19utils::globalVariables("n", add = TRUE) 20 21### This function fits a generalized linear model via 22### iteratively reweighted least squares for any family. 23### Written by Simon Davies, Dec 1995 24### glm.fit modified by Thomas Lumley, Apr 1997, and then others.. 25 26glm <- function(formula, family = gaussian, data, weights, 27 subset, na.action, start = NULL, 28 etastart, mustart, offset, 29 control = list(...), 30 model = TRUE, method = "glm.fit", 31 x = FALSE, y = TRUE, 32 singular.ok = TRUE, contrasts = NULL, ...) 33{ 34 cal <- match.call() 35 ## family 36 if(is.character(family)) 37 family <- get(family, mode = "function", envir = parent.frame()) 38 if(is.function(family)) family <- family() 39 if(is.null(family$family)) { 40 print(family) 41 stop("'family' not recognized") 42 } 43 44 ## extract x, y, etc from the model formula and frame 45 if(missing(data)) data <- environment(formula) 46 mf <- match.call(expand.dots = FALSE) 47 m <- match(c("formula", "data", "subset", "weights", "na.action", 48 "etastart", "mustart", "offset"), names(mf), 0L) 49 mf <- mf[c(1L, m)] 50 mf$drop.unused.levels <- TRUE 51 ## need stats:: for non-standard evaluation 52 mf[[1L]] <- quote(stats::model.frame) 53 mf <- eval(mf, parent.frame()) 54 if(identical(method, "model.frame")) return(mf) 55 56 if (!is.character(method) && !is.function(method)) 57 stop("invalid 'method' argument") 58 ## for back-compatibility in return result 59 if (identical(method, "glm.fit")) 60 control <- do.call("glm.control", control) 61 62 mt <- attr(mf, "terms") # allow model.frame to have updated it 63 64 Y <- model.response(mf, "any") # e.g. factors are allowed 65 ## avoid problems with 1D arrays, but keep names 66 if(length(dim(Y)) == 1L) { 67 nm <- rownames(Y) 68 dim(Y) <- NULL 69 if(!is.null(nm)) names(Y) <- nm 70 } 71 ## null model support 72 X <- if (!is.empty.model(mt)) model.matrix(mt, mf, contrasts) else matrix(,NROW(Y), 0L) 73 ## avoid any problems with 1D or nx1 arrays by as.vector. 74 weights <- as.vector(model.weights(mf)) 75 if(!is.null(weights) && !is.numeric(weights)) 76 stop("'weights' must be a numeric vector") 77 ## check weights and offset 78 if( !is.null(weights) && any(weights < 0) ) 79 stop("negative weights not allowed") 80 81 offset <- as.vector(model.offset(mf)) 82 if(!is.null(offset)) { 83 if(length(offset) != NROW(Y)) 84 stop(gettextf("number of offsets is %d should equal %d (number of observations)", 85 length(offset), NROW(Y)), domain = NA) 86 } 87 ## these allow starting values to be expressed in terms of other vars. 88 mustart <- model.extract(mf, "mustart") 89 etastart <- model.extract(mf, "etastart") 90 91 ## We want to set the name on this call and the one below for the 92 ## sake of messages from the fitter function 93 fit <- eval(call(if(is.function(method)) "method" else method, 94 x = X, y = Y, weights = weights, start = start, 95 etastart = etastart, mustart = mustart, 96 offset = offset, family = family, control = control, 97 intercept = attr(mt, "intercept") > 0L, singular.ok = singular.ok)) 98 99 ## This calculated the null deviance from the intercept-only model 100 ## if there is one, otherwise from the offset-only model. 101 ## We need to recalculate by a proper fit if there is intercept and 102 ## offset. 103 ## 104 ## The glm.fit calculation could be wrong if the link depends on the 105 ## observations, so we allow the null deviance to be forced to be 106 ## re-calculated by setting an offset (provided there is an intercept). 107 ## Prior to 2.4.0 this was only done for non-zero offsets. 108 if(length(offset) && attr(mt, "intercept") > 0L) { 109 fit2 <- 110 eval(call(if(is.function(method)) "method" else method, 111 x = X[, "(Intercept)", drop=FALSE], y = Y, 112 ## starting values potentially required (PR#16877): 113 mustart = fit$fitted.values, 114 weights = weights, offset = offset, family = family, 115 control = control, intercept = TRUE)) 116 ## That fit might not have converged .... 117 if(!fit2$converged) 118 warning("fitting to calculate the null deviance did not converge -- increase 'maxit'?") 119 fit$null.deviance <- fit2$deviance 120 } 121 if(model) fit$model <- mf 122 fit$na.action <- attr(mf, "na.action") 123 if(x) fit$x <- X 124 if(!y) fit$y <- NULL 125 structure(c(fit, 126 list(call = cal, formula = formula, 127 terms = mt, data = data, 128 offset = offset, control = control, method = method, 129 contrasts = attr(X, "contrasts"), 130 xlevels = .getXlevels(mt, mf))), 131 class = c(fit$class, c("glm", "lm"))) 132} 133 134 135glm.control <- function(epsilon = 1e-8, maxit = 25, trace = FALSE) 136{ 137 if(!is.numeric(epsilon) || epsilon <= 0) 138 stop("value of 'epsilon' must be > 0") 139 if(!is.numeric(maxit) || maxit <= 0) 140 stop("maximum number of iterations must be > 0") 141 list(epsilon = epsilon, maxit = maxit, trace = trace) 142} 143 144## Modified by Thomas Lumley 26 Apr 97 145## Added boundary checks and step halving 146## Modified detection of fitted 0/1 in binomial 147## Updated by KH as suggested by BDR on 1998/06/16 148 149glm.fit <- 150 function (x, y, weights = rep.int(1, nobs), start = NULL, 151 etastart = NULL, mustart = NULL, offset = rep.int(0, nobs), 152 family = gaussian(), control = list(), intercept = TRUE, 153 singular.ok = TRUE) 154{ 155 control <- do.call("glm.control", control) 156 x <- as.matrix(x) 157 xnames <- dimnames(x)[[2L]] 158 ynames <- if(is.matrix(y)) rownames(y) else names(y) 159 conv <- FALSE 160 nobs <- NROW(y) 161 nvars <- ncol(x) 162 EMPTY <- nvars == 0 163 ## define weights and offset if needed 164 if (is.null(weights)) 165 weights <- rep.int(1, nobs) 166 if (is.null(offset)) 167 offset <- rep.int(0, nobs) 168 169 ## get family functions: 170 variance <- family$variance 171 linkinv <- family$linkinv 172 if (!is.function(variance) || !is.function(linkinv) ) 173 stop("'family' argument seems not to be a valid family object", call. = FALSE) 174 dev.resids <- family$dev.resids 175 aic <- family$aic 176 mu.eta <- family$mu.eta 177 valideta <- family$valideta %||% function(eta) TRUE 178 validmu <- family$validmu %||% function(mu) TRUE 179 if(is.null(mustart)) { 180 ## calculates mustart and may change y and weights and set n (!) 181 eval(family$initialize) 182 } else { 183 mukeep <- mustart 184 eval(family$initialize) 185 mustart <- mukeep 186 } 187 if(EMPTY) { 188 eta <- rep.int(0, nobs) + offset 189 if (!valideta(eta)) 190 stop("invalid linear predictor values in empty model", call. = FALSE) 191 mu <- linkinv(eta) 192 ## calculate initial deviance and coefficient 193 if (!validmu(mu)) 194 stop("invalid fitted means in empty model", call. = FALSE) 195 dev <- sum(dev.resids(y, mu, weights)) 196 w <- sqrt((weights * mu.eta(eta)^2)/variance(mu)) 197 residuals <- (y - mu)/mu.eta(eta) 198 good <- rep_len(TRUE, length(residuals)) 199 boundary <- conv <- TRUE 200 coef <- numeric() 201 iter <- 0L 202 } else { 203 coefold <- NULL 204 eta <- etastart %||% { 205 if(!is.null(start)) 206 if (length(start) != nvars) 207 stop(gettextf( 208 "length of 'start' should equal %d and correspond to initial coefs for %s", 209 nvars, paste(deparse(xnames), collapse=", ")), 210 domain = NA) 211 else { 212 coefold <- start 213 offset + as.vector(if (NCOL(x) == 1L) x * start else x %*% start) 214 } 215 else family$linkfun(mustart) 216 } 217 mu <- linkinv(eta) 218 if (!(validmu(mu) && valideta(eta))) 219 stop("cannot find valid starting values: please specify some", call. = FALSE) 220 ## calculate initial deviance and coefficient 221 devold <- sum(dev.resids(y, mu, weights)) 222 boundary <- conv <- FALSE 223 224 ##------------- THE Iteratively Reweighting L.S. iteration ----------- 225 for (iter in 1L:control$maxit) { 226 good <- weights > 0 227 varmu <- variance(mu)[good] 228 if (anyNA(varmu)) 229 stop("NAs in V(mu)") 230 if (any(varmu == 0)) 231 stop("0s in V(mu)") 232 mu.eta.val <- mu.eta(eta) 233 if (any(is.na(mu.eta.val[good]))) 234 stop("NAs in d(mu)/d(eta)") 235 ## drop observations for which w will be zero 236 good <- (weights > 0) & (mu.eta.val != 0) 237 238 if (all(!good)) { 239 conv <- FALSE 240 warning(gettextf("no observations informative at iteration %d", 241 iter), domain = NA) 242 break 243 } 244 z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good] 245 w <- sqrt((weights[good] * mu.eta.val[good]^2)/variance(mu)[good]) 246 ## call Fortran code via C wrapper 247 fit <- .Call(C_Cdqrls, x[good, , drop = FALSE] * w, z * w, 248 min(1e-7, control$epsilon/1000), check=FALSE) 249 if (any(!is.finite(fit$coefficients))) { 250 conv <- FALSE 251 warning(gettextf("non-finite coefficients at iteration %d", iter), domain = NA) 252 break 253 } 254 ## stop if not enough parameters 255 if (nobs < fit$rank) 256 stop(sprintf(ngettext(nobs, 257 "X matrix has rank %d, but only %d observation", 258 "X matrix has rank %d, but only %d observations"), 259 fit$rank, nobs), domain = NA) 260 if(!singular.ok && fit$rank < nvars) stop("singular fit encountered") 261 ## calculate updated values of eta and mu with the new coef: 262 start[fit$pivot] <- fit$coefficients 263 eta <- drop(x %*% start) 264 mu <- linkinv(eta <- eta + offset) 265 dev <- sum(dev.resids(y, mu, weights)) 266 if (control$trace) 267 cat("Deviance = ", dev, " Iterations - ", iter, "\n", sep = "") 268 ## check for divergence 269 boundary <- FALSE 270 if (!is.finite(dev)) { 271 if(is.null(coefold)) 272 stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE) 273 warning("step size truncated due to divergence", call. = FALSE) 274 ii <- 1 275 while (!is.finite(dev)) { 276 if (ii > control$maxit) 277 stop("inner loop 1; cannot correct step size", call. = FALSE) 278 ii <- ii + 1 279 start <- (start + coefold)/2 280 eta <- drop(x %*% start) 281 mu <- linkinv(eta <- eta + offset) 282 dev <- sum(dev.resids(y, mu, weights)) 283 } 284 boundary <- TRUE 285 if (control$trace) 286 cat("Step halved: new deviance = ", dev, "\n", sep = "") 287 } 288 ## check for fitted values outside domain. 289 if (!(valideta(eta) && validmu(mu))) { 290 if(is.null(coefold)) 291 stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE) 292 warning("step size truncated: out of bounds", call. = FALSE) 293 ii <- 1 294 while (!(valideta(eta) && validmu(mu))) { 295 if (ii > control$maxit) 296 stop("inner loop 2; cannot correct step size", call. = FALSE) 297 ii <- ii + 1 298 start <- (start + coefold)/2 299 eta <- drop(x %*% start) 300 mu <- linkinv(eta <- eta + offset) 301 } 302 boundary <- TRUE 303 dev <- sum(dev.resids(y, mu, weights)) 304 if (control$trace) 305 cat("Step halved: new deviance = ", dev, "\n", sep = "") 306 } 307 ## check for convergence 308 if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) { 309 conv <- TRUE 310 coef <- start 311 break 312 } else { 313 devold <- dev 314 coef <- coefold <- start 315 } 316 } ##-------------- end IRLS iteration ------------------------------- 317 318 if (!conv) 319 warning("glm.fit: algorithm did not converge", call. = FALSE) 320 if (boundary) 321 warning("glm.fit: algorithm stopped at boundary value", call. = FALSE) 322 eps <- 10*.Machine$double.eps 323 if (family$family == "binomial") { 324 if (any(mu > 1 - eps) || any(mu < eps)) 325 warning("glm.fit: fitted probabilities numerically 0 or 1 occurred", call. = FALSE) 326 } 327 if (family$family == "poisson") { 328 if (any(mu < eps)) 329 warning("glm.fit: fitted rates numerically 0 occurred", call. = FALSE) 330 } 331 ## If X matrix was not full rank then columns were pivoted, 332 ## hence we need to re-label the names ... 333 ## Original code changed as suggested by BDR---give NA rather 334 ## than 0 for non-estimable parameters 335 if (fit$rank < nvars) coef[fit$pivot][seq.int(fit$rank+1, nvars)] <- NA 336 xxnames <- xnames[fit$pivot] 337 ## update by accurate calculation, including 0-weight cases. 338 residuals <- (y - mu)/mu.eta(eta) 339## residuals <- rep.int(NA, nobs) 340## residuals[good] <- z - (eta - offset)[good] # z does not have offset in. 341 fit$qr <- as.matrix(fit$qr) 342 nr <- min(sum(good), nvars) 343 if (nr < nvars) { 344 Rmat <- diag(nvars) 345 Rmat[1L:nr, 1L:nvars] <- fit$qr[1L:nr, 1L:nvars] 346 } 347 else Rmat <- fit$qr[1L:nvars, 1L:nvars] 348 Rmat <- as.matrix(Rmat) 349 Rmat[row(Rmat) > col(Rmat)] <- 0 350 names(coef) <- xnames 351 colnames(fit$qr) <- xxnames 352 dimnames(Rmat) <- list(xxnames, xxnames) 353 } 354 names(residuals) <- ynames 355 names(mu) <- ynames 356 names(eta) <- ynames 357 # for compatibility with lm, which has a full-length weights vector 358 wt <- rep.int(0, nobs) 359 wt[good] <- w^2 360 names(wt) <- ynames 361 names(weights) <- ynames 362 names(y) <- ynames 363 if(!EMPTY) 364 names(fit$effects) <- 365 c(xxnames[seq_len(fit$rank)], rep.int("", sum(good) - fit$rank)) 366 ## calculate null deviance -- corrected in glm() if offset and intercept 367 wtdmu <- 368 if (intercept) sum(weights * y)/sum(weights) else linkinv(offset) 369 nulldev <- sum(dev.resids(y, wtdmu, weights)) 370 ## calculate df 371 n.ok <- nobs - sum(weights==0) 372 nulldf <- n.ok - as.integer(intercept) 373 rank <- if(EMPTY) 0 else fit$rank 374 resdf <- n.ok - rank 375 ## calculate AIC 376 aic.model <- 377 aic(y, n, mu, weights, dev) + 2*rank 378 ## ^^ is only initialize()d for "binomial" [yuck!] 379 list(coefficients = coef, residuals = residuals, fitted.values = mu, 380 effects = if(!EMPTY) fit$effects, R = if(!EMPTY) Rmat, rank = rank, 381 qr = if(!EMPTY) structure(fit[c("qr", "rank", "qraux", "pivot", "tol")], class = "qr"), 382 family = family, 383 linear.predictors = eta, deviance = dev, aic = aic.model, 384 null.deviance = nulldev, iter = iter, weights = wt, 385 prior.weights = weights, df.residual = resdf, df.null = nulldf, 386 y = y, converged = conv, boundary = boundary) 387} 388 389 390print.glm <- function(x, digits = max(3L, getOption("digits") - 3L), ...) 391{ 392 cat("\nCall: ", 393 paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") 394 if(length(coef(x))) { 395 cat("Coefficients") 396 if(is.character(co <- x$contrasts)) 397 cat(" [contrasts: ", 398 apply(cbind(names(co),co), 1L, paste, collapse = "="), "]") 399 cat(":\n") 400 print.default(format(x$coefficients, digits = digits), 401 print.gap = 2, quote = FALSE) 402 } else cat("No coefficients\n\n") 403 cat("\nDegrees of Freedom:", x$df.null, "Total (i.e. Null); ", 404 x$df.residual, "Residual\n") 405 if(nzchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep = "") 406 cat("Null Deviance: ", format(signif(x$null.deviance, digits)), 407 "\nResidual Deviance:", format(signif(x$deviance, digits)), 408 "\tAIC:", format(signif(x$aic, digits))) 409 cat("\n") 410 invisible(x) 411} 412 413 414anova.glm <- function(object, ..., dispersion = NULL, test = NULL) 415{ 416 ## check for multiple objects 417 dotargs <- list(...) 418 named <- if (is.null(names(dotargs))) 419 rep_len(FALSE, length(dotargs)) else (names(dotargs) != "") 420 if(any(named)) 421 warning("the following arguments to 'anova.glm' are invalid and dropped: ", 422 paste(deparse(dotargs[named]), collapse=", ")) 423 dotargs <- dotargs[!named] 424 is.glm <- vapply(dotargs,function(x) inherits(x,"glm"), NA) 425 dotargs <- dotargs[is.glm] 426 427 ## do not copy this: anova.glmlist is not an exported object. 428 ## use anova(structure(list(object, dotargs), class = "glmlist")) 429 if (length(dotargs)) 430 return(anova.glmlist(c(list(object), dotargs), 431 dispersion = dispersion, test = test)) 432 433 ## score tests require a bit of extra computing 434 doscore <- !is.null(test) && test=="Rao" 435 ## extract variables from model 436 437 varlist <- attr(object$terms, "variables") 438 ## must avoid partial matching here. 439 x <- 440 if (n <- match("x", names(object), 0L)) 441 object[[n]] 442 else model.matrix(object) 443 varseq <- attr(x, "assign") 444 nvars <- max(0, varseq) 445 resdev <- resdf <- NULL 446 447 if (doscore){ 448 score <- numeric(nvars) 449 # fit a null model 450 method <- object$method 451 y <- object$y 452 fit <- eval(call(if(is.function(method)) "method" else method, 453 x=x[, varseq == 0, drop = FALSE], 454 y=y, 455 weights=object$prior.weights, 456 start =object$start, 457 offset =object$offset, 458 family =object$family, 459 control=object$control)) 460 r <- fit$residuals 461 w <- fit$weights 462 icpt <- attr(object$terms, "intercept") 463 } 464 465 ## if there is more than one explanatory variable then 466 ## recall glm.fit to fit variables sequentially 467 468 ## for score tests, we need to do so in any case 469 if(nvars > 1 || doscore) { 470 method <- object$method 471 ## allow for 'y = FALSE' in the call (PR#13098) 472 y <- object$y 473 if(is.null(y)) { ## code from residuals.glm 474 mu.eta <- object$family$mu.eta 475 eta <- object$linear.predictors 476 y <- object$fitted.values + object$residuals * mu.eta(eta) 477 } 478 for(i in seq_len(max(nvars - 1L, 0))) { # nvars == 0 can happen 479 ## explanatory variables up to i are kept in the model 480 ## use method from glm to find residual deviance 481 ## and df for each sequential fit 482 fit <- eval(call(if(is.function(method)) "method" else method, 483 x=x[, varseq <= i, drop = FALSE], 484 y=y, 485 weights=object$prior.weights, 486 start =object$start, 487 offset =object$offset, 488 family =object$family, 489 control=object$control)) 490 if (doscore) { 491 zz <- eval(call(if(is.function(method)) "method" else method, 492 x=x[, varseq <= i, drop = FALSE], 493 y=r, 494 weights=w, 495 intercept=icpt)) 496 score[i] <- zz$null.deviance - zz$deviance 497 r <- fit$residuals 498 w <- fit$weights 499 } 500 resdev <- c(resdev, fit$deviance) 501 resdf <- c(resdf, fit$df.residual) 502 } 503 if (doscore) { 504 zz <- eval(call(if(is.function(method)) "method" else method, 505 x=x, 506 y=r, 507 weights=w, 508 intercept=icpt)) 509 score[nvars] <- zz$null.deviance - zz$deviance 510 } 511 } 512 513 ## add values from null and full model 514 515 resdf <- c(object$df.null, resdf, object$df.residual) 516 resdev <- c(object$null.deviance, resdev, object$deviance) 517 518 ## construct table and title 519 520 table <- data.frame(c(NA, -diff(resdf)), 521 c(NA, pmax(0, -diff(resdev))), resdf, resdev) 522 tl <- attr(object$terms, "term.labels") 523 if (length(tl) == 0L) table <- table[1,,drop=FALSE] # kludge for null model 524 dimnames(table) <- list(c("NULL", tl), 525 c("Df", "Deviance", "Resid. Df", "Resid. Dev")) 526 if (doscore) 527 table <- cbind(table, Rao=c(NA,score)) 528 title <- paste0("Analysis of Deviance Table", "\n\nModel: ", 529 object$family$family, ", link: ", object$family$link, 530 "\n\nResponse: ", as.character(varlist[-1L])[1L], 531 "\n\nTerms added sequentially (first to last)\n\n") 532 533 ## calculate test statistics if needed 534 535 df.dispersion <- Inf 536 if(is.null(dispersion)) { 537 dispersion <- summary(object, dispersion=dispersion)$dispersion 538 df.dispersion <- if (dispersion == 1) Inf else object$df.residual 539 } 540 if(!is.null(test)) { 541 if(test == "F" && df.dispersion == Inf) { 542 fam <- object$family$family 543 if(fam == "binomial" || fam == "poisson") 544 warning(gettextf("using F test with a '%s' family is inappropriate", 545 fam), 546 domain = NA) 547 else 548 warning("using F test with a fixed dispersion is inappropriate") 549 } 550 table <- stat.anova(table=table, test=test, scale=dispersion, 551 df.scale=df.dispersion, n=NROW(x)) 552 } 553 structure(table, heading = title, class = c("anova", "data.frame")) 554} 555 556 557anova.glmlist <- function(object, ..., dispersion=NULL, test=NULL) 558{ 559 560 doscore <- !is.null(test) && test=="Rao" 561 562 ## find responses for all models and remove 563 ## any models with a different response 564 565 responses <- as.character(lapply(object, function(x) { 566 deparse(formula(x)[[2L]])} )) 567 sameresp <- responses==responses[1L] 568 if(!all(sameresp)) { 569 object <- object[sameresp] 570 warning(gettextf("models with response %s removed because response differs from model 1", 571 sQuote(deparse(responses[!sameresp]))), 572 domain = NA) 573 } 574 575 ns <- sapply(object, function(x) length(x$residuals)) 576 if(any(ns != ns[1L])) 577 stop("models were not all fitted to the same size of dataset") 578 579 ## calculate the number of models 580 581 nmodels <- length(object) 582 if(nmodels==1) 583 return(anova.glm(object[[1L]], dispersion=dispersion, test=test)) 584 585 ## extract statistics 586 587 resdf <- as.numeric(lapply(object, function(x) x$df.residual)) 588 resdev <- as.numeric(lapply(object, function(x) x$deviance)) 589 590 if (doscore){ 591 score <- numeric(nmodels) 592 score[1] <- NA 593 df <- -diff(resdf) 594 595 for (i in seq_len(nmodels-1)) { 596 m1 <- if (df[i] > 0) object[[i]] else object[[i+1]] 597 m2 <- if (df[i] > 0) object[[i+1]] else object[[i]] 598 r <- m1$residuals 599 w <- m1$weights 600 method <- m2$method 601 icpt <- attr(m1$terms, "intercept") 602 zz <- eval(call(if(is.function(method)) "method" else method, 603 x=model.matrix(m2), 604 y=r, 605 weights=w, 606 intercept=icpt)) 607 score[i+1] <- zz$null.deviance - zz$deviance 608 if (df[i] < 0) score[i+1] <- - score[i+1] 609 } 610 } 611 612 ## construct table and title 613 614 table <- data.frame(resdf, resdev, c(NA, -diff(resdf)), 615 c(NA, -diff(resdev)) ) 616 variables <- lapply(object, function(x) 617 paste(deparse(formula(x)), collapse="\n") ) 618 dimnames(table) <- list(1L:nmodels, c("Resid. Df", "Resid. Dev", "Df", 619 "Deviance")) 620 if (doscore) 621 table <- cbind(table, Rao=score) 622 623 title <- "Analysis of Deviance Table\n" 624 topnote <- paste0("Model ", format(1L:nmodels), ": ", variables, 625 collapse = "\n") 626 627 ## calculate test statistic if needed 628 629 if(!is.null(test)) { 630 bigmodel <- object[[order(resdf)[1L]]] 631 dispersion <- summary(bigmodel, dispersion=dispersion)$dispersion 632 df.dispersion <- if (dispersion == 1) Inf else min(resdf) 633 if(test == "F" && df.dispersion == Inf) { 634 fam <- bigmodel$family$family 635 if(fam == "binomial" || fam == "poisson") 636 warning(gettextf("using F test with a '%s' family is inappropriate", 637 fam), 638 domain = NA, call. = FALSE) 639 else 640 warning("using F test with a fixed dispersion is inappropriate") 641 } 642 table <- stat.anova(table = table, test = test, 643 scale = dispersion, df.scale = df.dispersion, 644 n = length(bigmodel$residuals)) 645 } 646 structure(table, heading = c(title, topnote), 647 class = c("anova", "data.frame")) 648} 649 650 651summary.glm <- function(object, dispersion = NULL, 652 correlation = FALSE, symbolic.cor = FALSE, ...) 653{ 654 est.disp <- FALSE 655 df.r <- object$df.residual 656 if(is.null(dispersion)) # calculate dispersion if needed 657 dispersion <- 658 if(object$family$family %in% c("poisson", "binomial")) 1 659 else if(df.r > 0) { 660 est.disp <- TRUE 661 if(any(object$weights==0)) 662 warning("observations with zero weight not used for calculating dispersion") 663 sum((object$weights*object$residuals^2)[object$weights > 0])/ df.r 664 } else { 665 est.disp <- TRUE 666 NaN 667 } 668 669 ## calculate scaled and unscaled covariance matrix 670 671 aliased <- is.na(coef(object)) # used in print method 672 p <- object$rank 673 if (p > 0) { 674 p1 <- 1L:p 675 Qr <- qr.lm(object) 676 ## WATCHIT! doesn't this rely on pivoting not permuting 1L:p? -- that's quaranteed 677 coef.p <- object$coefficients[Qr$pivot[p1]] 678 covmat.unscaled <- chol2inv(Qr$qr[p1,p1,drop=FALSE]) 679 dimnames(covmat.unscaled) <- list(names(coef.p),names(coef.p)) 680 covmat <- dispersion*covmat.unscaled 681 var.cf <- diag(covmat) 682 683 ## calculate coef table 684 685 s.err <- sqrt(var.cf) 686 tvalue <- coef.p/s.err 687 688 dn <- c("Estimate", "Std. Error") 689 if(!est.disp) { # known dispersion 690 pvalue <- 2*pnorm(-abs(tvalue)) 691 coef.table <- cbind(coef.p, s.err, tvalue, pvalue) 692 dimnames(coef.table) <- list(names(coef.p), 693 c(dn, "z value","Pr(>|z|)")) 694 } else if(df.r > 0) { 695 pvalue <- 2*pt(-abs(tvalue), df.r) 696 coef.table <- cbind(coef.p, s.err, tvalue, pvalue) 697 dimnames(coef.table) <- list(names(coef.p), 698 c(dn, "t value","Pr(>|t|)")) 699 } else { # df.r == 0 700 coef.table <- cbind(coef.p, NaN, NaN, NaN) 701 dimnames(coef.table) <- list(names(coef.p), 702 c(dn, "t value","Pr(>|t|)")) 703 } 704 df.f <- NCOL(Qr$qr) 705 } else { 706 coef.table <- matrix(, 0L, 4L) 707 dimnames(coef.table) <- 708 list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) 709 covmat.unscaled <- covmat <- matrix(, 0L, 0L) 710 df.f <- length(aliased) 711 } 712 ## return answer 713 714 ## these need not all exist, e.g. na.action. 715 keep <- match(c("call","terms","family","deviance", "aic", 716 "contrasts", "df.residual","null.deviance","df.null", 717 "iter", "na.action"), names(object), 0L) 718 ans <- c(object[keep], 719 list(deviance.resid = residuals(object, type = "deviance"), 720 coefficients = coef.table, 721 aliased = aliased, 722 dispersion = dispersion, 723 df = c(object$rank, df.r, df.f), 724 cov.unscaled = covmat.unscaled, 725 cov.scaled = covmat)) 726 727 if(correlation && p > 0) { 728 dd <- sqrt(diag(covmat.unscaled)) 729 ans$correlation <- 730 covmat.unscaled/outer(dd,dd) 731 ans$symbolic.cor <- symbolic.cor 732 } 733 class(ans) <- "summary.glm" 734 return(ans) 735} 736 737print.summary.glm <- 738 function (x, digits = max(3L, getOption("digits") - 3L), 739 symbolic.cor = x$symbolic.cor, 740 signif.stars = getOption("show.signif.stars"), ...) 741{ 742 cat("\nCall:\n", 743 paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") 744 cat("Deviance Residuals: \n") 745 if(x$df.residual > 5) { 746 x$deviance.resid <- setNames(quantile(x$deviance.resid, na.rm = TRUE), 747 c("Min", "1Q", "Median", "3Q", "Max")) 748 } 749 xx <- zapsmall(x$deviance.resid, digits + 1L) 750 print.default(xx, digits = digits, na.print = "", print.gap = 2L) 751 752 if(length(x$aliased) == 0L) { 753 cat("\nNo Coefficients\n") 754 } else { 755 ## df component added in 1.8.0 756 ## partial matching problem here. 757 df <- if ("df" %in% names(x)) x[["df"]] else NULL 758 if (!is.null(df) && (nsingular <- df[3L] - df[1L])) 759 cat("\nCoefficients: (", nsingular, 760 " not defined because of singularities)\n", sep = "") 761 else cat("\nCoefficients:\n") 762 coefs <- x$coefficients 763 if(!is.null(aliased <- x$aliased) && any(aliased)) { 764 cn <- names(aliased) 765 coefs <- matrix(NA, length(aliased), 4L, 766 dimnames=list(cn, colnames(coefs))) 767 coefs[!aliased, ] <- x$coefficients 768 } 769 printCoefmat(coefs, digits = digits, signif.stars = signif.stars, 770 na.print = "NA", ...) 771 } 772 ## 773 cat("\n(Dispersion parameter for ", x$family$family, 774 " family taken to be ", format(x$dispersion), ")\n\n", 775 apply(cbind(paste(format(c("Null","Residual"), justify="right"), 776 "deviance:"), 777 format(unlist(x[c("null.deviance","deviance")]), 778 digits = max(5L, digits + 1L)), " on", 779 format(unlist(x[c("df.null","df.residual")])), 780 " degrees of freedom\n"), 781 1L, paste, collapse = " "), sep = "") 782 if(nzchar(mess <- naprint(x$na.action))) cat(" (", mess, ")\n", sep = "") 783 cat("AIC: ", format(x$aic, digits = max(4L, digits + 1L)),"\n\n", 784 "Number of Fisher Scoring iterations: ", x$iter, 785 "\n", sep = "") 786 787 correl <- x$correlation 788 if(!is.null(correl)) { 789# looks most sensible not to give NAs for undefined coefficients 790# if(!is.null(aliased) && any(aliased)) { 791# nc <- length(aliased) 792# correl <- matrix(NA, nc, nc, dimnames = list(cn, cn)) 793# correl[!aliased, !aliased] <- x$correl 794# } 795 p <- NCOL(correl) 796 if(p > 1) { 797 cat("\nCorrelation of Coefficients:\n") 798 if(is.logical(symbolic.cor) && symbolic.cor) {# NULL < 1.7.0 objects 799 print(symnum(correl, abbr.colnames = NULL)) 800 } else { 801 correl <- format(round(correl, 2L), nsmall = 2L, 802 digits = digits) 803 correl[!lower.tri(correl)] <- "" 804 print(correl[-1, -p, drop=FALSE], quote = FALSE) 805 } 806 } 807 } 808 cat("\n") 809 invisible(x) 810} 811 812 813## GLM Methods for Generic Functions : 814 815## needed to avoid deviance.lm 816deviance.glm <- function(object, ...) object$deviance 817effects.glm <- function(object, ...) object$effects 818family.glm <- function(object, ...) object$family 819 820residuals.glm <- 821 function(object, 822 type = c("deviance", "pearson", "working", "response", "partial"), 823 ...) 824{ 825 type <- match.arg(type) 826 y <- object$y 827 r <- object$residuals 828 mu <- object$fitted.values 829 wts <- object$prior.weights 830 switch(type, 831 deviance=,pearson=,response= 832 if(is.null(y)) { 833 mu.eta <- object$family$mu.eta 834 eta <- object$linear.predictors 835 y <- mu + r * mu.eta(eta) 836 }) 837 res <- switch(type, 838 deviance = if(object$df.residual > 0) { 839 d.res <- sqrt(pmax((object$family$dev.resids)(y, mu, wts), 0)) 840 ifelse(y > mu, d.res, -d.res) 841 } else rep.int(0, length(mu)), 842 pearson = (y-mu)*sqrt(wts)/sqrt(object$family$variance(mu)), 843 working = r, 844 response = y - mu, 845 partial = r 846 ) 847 if(!is.null(object$na.action)) 848 res <- naresid(object$na.action, res) 849 if (type == "partial") ## need to avoid doing naresid() twice. 850 res <- res+predict(object, type="terms") 851 res 852} 853 854## For influence.glm() ... --> ./lm.influence.R 855 856## KH on 1998/06/22: update.default() is now used ... 857 858model.frame.glm <- function (formula, ...) 859{ 860 dots <- list(...) 861 nargs <- dots[match(c("data", "na.action", "subset"), names(dots), 0L)] 862 if (length(nargs) || is.null(formula$model)) { 863 fcall <- formula$call 864 fcall$method <- "model.frame" 865 ## need stats:: for non-standard evaluation 866 fcall[[1L]] <- quote(stats::glm) 867 fcall[names(nargs)] <- nargs 868 env <- environment(formula$terms) %||% parent.frame() 869 eval(fcall, env) 870 } 871 else formula$model 872} 873 874weights.glm <- function(object, type = c("prior", "working"), ...) 875{ 876 type <- match.arg(type) 877 res <- if(type == "prior") object$prior.weights else object$weights 878 if(is.null(object$na.action)) res 879 else naresid(object$na.action, res) 880} 881 882formula.glm <- function(x, ...) 883{ 884 form <- x$formula 885 if( !is.null(form) ) { 886 form <- formula(x$terms) # has . expanded 887 environment(form) <- environment(x$formula) 888 form 889 } else formula(x$terms) 890} 891