1#  File src/library/stats/R/glm.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2020 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19utils::globalVariables("n", add = TRUE)
20
21### This function fits a generalized linear model via
22### iteratively reweighted least squares for any family.
23### Written by Simon Davies, Dec 1995
24### glm.fit modified by Thomas Lumley, Apr 1997, and then others..
25
26glm <- function(formula, family = gaussian, data, weights,
27		subset, na.action, start = NULL,
28		etastart, mustart, offset,
29		control = list(...),
30                model = TRUE, method = "glm.fit",
31                x = FALSE, y = TRUE,
32		singular.ok = TRUE, contrasts = NULL, ...)
33{
34    cal <- match.call()
35    ## family
36    if(is.character(family))
37        family <- get(family, mode = "function", envir = parent.frame())
38    if(is.function(family)) family <- family()
39    if(is.null(family$family)) {
40	print(family)
41	stop("'family' not recognized")
42    }
43
44    ## extract x, y, etc from the model formula and frame
45    if(missing(data)) data <- environment(formula)
46    mf <- match.call(expand.dots = FALSE)
47    m <- match(c("formula", "data", "subset", "weights", "na.action",
48                 "etastart", "mustart", "offset"), names(mf), 0L)
49    mf <- mf[c(1L, m)]
50    mf$drop.unused.levels <- TRUE
51    ## need stats:: for non-standard evaluation
52    mf[[1L]] <- quote(stats::model.frame)
53    mf <- eval(mf, parent.frame())
54    if(identical(method, "model.frame")) return(mf)
55
56    if (!is.character(method) && !is.function(method))
57        stop("invalid 'method' argument")
58    ## for back-compatibility in return result
59    if (identical(method, "glm.fit"))
60        control <- do.call("glm.control", control)
61
62    mt <- attr(mf, "terms") # allow model.frame to have updated it
63
64    Y <- model.response(mf, "any") # e.g. factors are allowed
65    ## avoid problems with 1D arrays, but keep names
66    if(length(dim(Y)) == 1L) {
67        nm <- rownames(Y)
68        dim(Y) <- NULL
69        if(!is.null(nm)) names(Y) <- nm
70    }
71    ## null model support
72    X <- if (!is.empty.model(mt)) model.matrix(mt, mf, contrasts) else matrix(,NROW(Y), 0L)
73    ## avoid any problems with 1D or nx1 arrays by as.vector.
74    weights <- as.vector(model.weights(mf))
75    if(!is.null(weights) && !is.numeric(weights))
76        stop("'weights' must be a numeric vector")
77    ## check weights and offset
78    if( !is.null(weights) && any(weights < 0) )
79	stop("negative weights not allowed")
80
81    offset <- as.vector(model.offset(mf))
82    if(!is.null(offset)) {
83        if(length(offset) != NROW(Y))
84	    stop(gettextf("number of offsets is %d should equal %d (number of observations)",
85			  length(offset), NROW(Y)), domain = NA)
86    }
87    ## these allow starting values to be expressed in terms of other vars.
88    mustart <- model.extract(mf, "mustart")
89    etastart <- model.extract(mf, "etastart")
90
91    ## We want to set the name on this call and the one below for the
92    ## sake of messages from the fitter function
93    fit <- eval(call(if(is.function(method)) "method" else method,
94                     x = X, y = Y, weights = weights, start = start,
95                     etastart = etastart, mustart = mustart,
96                     offset = offset, family = family, control = control,
97                     intercept = attr(mt, "intercept") > 0L, singular.ok = singular.ok))
98
99    ## This calculated the null deviance from the intercept-only model
100    ## if there is one, otherwise from the offset-only model.
101    ## We need to recalculate by a proper fit if there is intercept and
102    ## offset.
103    ##
104    ## The glm.fit calculation could be wrong if the link depends on the
105    ## observations, so we allow the null deviance to be forced to be
106    ## re-calculated by setting an offset (provided there is an intercept).
107    ## Prior to 2.4.0 this was only done for non-zero offsets.
108    if(length(offset) && attr(mt, "intercept") > 0L) {
109        fit2 <-
110            eval(call(if(is.function(method)) "method" else method,
111                      x = X[, "(Intercept)", drop=FALSE], y = Y,
112                      ## starting values potentially required (PR#16877):
113                      mustart = fit$fitted.values,
114                      weights = weights, offset = offset, family = family,
115                      control = control, intercept = TRUE))
116        ## That fit might not have converged ....
117        if(!fit2$converged)
118            warning("fitting to calculate the null deviance did not converge -- increase 'maxit'?")
119        fit$null.deviance <- fit2$deviance
120    }
121    if(model) fit$model <- mf
122    fit$na.action <- attr(mf, "na.action")
123    if(x) fit$x <- X
124    if(!y) fit$y <- NULL
125    structure(c(fit,
126		list(call = cal, formula = formula,
127		     terms = mt, data = data,
128		     offset = offset, control = control, method = method,
129		     contrasts = attr(X, "contrasts"),
130		     xlevels = .getXlevels(mt, mf))),
131	      class = c(fit$class, c("glm", "lm")))
132}
133
134
135glm.control <- function(epsilon = 1e-8, maxit = 25, trace = FALSE)
136{
137    if(!is.numeric(epsilon) || epsilon <= 0)
138	stop("value of 'epsilon' must be > 0")
139    if(!is.numeric(maxit) || maxit <= 0)
140	stop("maximum number of iterations must be > 0")
141    list(epsilon = epsilon, maxit = maxit, trace = trace)
142}
143
144## Modified by Thomas Lumley 26 Apr 97
145## Added boundary checks and step halving
146## Modified detection of fitted 0/1 in binomial
147## Updated by KH as suggested by BDR on 1998/06/16
148
149glm.fit <-
150    function (x, y, weights = rep.int(1, nobs), start = NULL,
151	      etastart = NULL, mustart = NULL, offset = rep.int(0, nobs),
152	      family = gaussian(), control = list(), intercept = TRUE,
153	      singular.ok = TRUE)
154{
155    control <- do.call("glm.control", control)
156    x <- as.matrix(x)
157    xnames <- dimnames(x)[[2L]]
158    ynames <- if(is.matrix(y)) rownames(y) else names(y)
159    conv <- FALSE
160    nobs <- NROW(y)
161    nvars <- ncol(x)
162    EMPTY <- nvars == 0
163    ## define weights and offset if needed
164    if (is.null(weights))
165	weights <- rep.int(1, nobs)
166    if (is.null(offset))
167	offset <- rep.int(0, nobs)
168
169    ## get family functions:
170    variance <- family$variance
171    linkinv  <- family$linkinv
172    if (!is.function(variance) || !is.function(linkinv) )
173	stop("'family' argument seems not to be a valid family object", call. = FALSE)
174    dev.resids <- family$dev.resids
175    aic <- family$aic
176    mu.eta <- family$mu.eta
177    valideta <- family$valideta %||% function(eta) TRUE
178    validmu  <- family$validmu  %||% function(mu)  TRUE
179    if(is.null(mustart)) {
180        ## calculates mustart and may change y and weights and set n (!)
181        eval(family$initialize)
182    } else {
183        mukeep <- mustart
184        eval(family$initialize)
185        mustart <- mukeep
186    }
187    if(EMPTY) {
188        eta <- rep.int(0, nobs) + offset
189        if (!valideta(eta))
190            stop("invalid linear predictor values in empty model", call. = FALSE)
191        mu <- linkinv(eta)
192        ## calculate initial deviance and coefficient
193        if (!validmu(mu))
194            stop("invalid fitted means in empty model", call. = FALSE)
195        dev <- sum(dev.resids(y, mu, weights))
196        w <- sqrt((weights * mu.eta(eta)^2)/variance(mu))
197        residuals <- (y - mu)/mu.eta(eta)
198        good <- rep_len(TRUE, length(residuals))
199        boundary <- conv <- TRUE
200        coef <- numeric()
201        iter <- 0L
202    } else {
203        coefold <- NULL
204        eta <- etastart %||% {
205            if(!is.null(start))
206                if (length(start) != nvars)
207                    stop(gettextf(
208                      "length of 'start' should equal %d and correspond to initial coefs for %s",
209                                  nvars, paste(deparse(xnames), collapse=", ")),
210                         domain = NA)
211                else {
212                    coefold <- start
213                    offset + as.vector(if (NCOL(x) == 1L) x * start else x %*% start)
214                }
215            else family$linkfun(mustart)
216        }
217        mu <- linkinv(eta)
218        if (!(validmu(mu) && valideta(eta)))
219            stop("cannot find valid starting values: please specify some", call. = FALSE)
220        ## calculate initial deviance and coefficient
221        devold <- sum(dev.resids(y, mu, weights))
222        boundary <- conv <- FALSE
223
224        ##------------- THE Iteratively Reweighting L.S. iteration -----------
225        for (iter in 1L:control$maxit) {
226            good <- weights > 0
227            varmu <- variance(mu)[good]
228            if (anyNA(varmu))
229                stop("NAs in V(mu)")
230            if (any(varmu == 0))
231                stop("0s in V(mu)")
232            mu.eta.val <- mu.eta(eta)
233            if (any(is.na(mu.eta.val[good])))
234                stop("NAs in d(mu)/d(eta)")
235            ## drop observations for which w will be zero
236            good <- (weights > 0) & (mu.eta.val != 0)
237
238            if (all(!good)) {
239                conv <- FALSE
240                warning(gettextf("no observations informative at iteration %d",
241                                 iter), domain = NA)
242                break
243            }
244            z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good]
245            w <- sqrt((weights[good] * mu.eta.val[good]^2)/variance(mu)[good])
246            ## call Fortran code via C wrapper
247            fit <- .Call(C_Cdqrls, x[good, , drop = FALSE] * w, z * w,
248                         min(1e-7, control$epsilon/1000), check=FALSE)
249            if (any(!is.finite(fit$coefficients))) {
250                conv <- FALSE
251                warning(gettextf("non-finite coefficients at iteration %d", iter), domain = NA)
252                break
253            }
254            ## stop if not enough parameters
255            if (nobs < fit$rank)
256                stop(sprintf(ngettext(nobs,
257                                      "X matrix has rank %d, but only %d observation",
258                                      "X matrix has rank %d, but only %d observations"),
259                             fit$rank, nobs), domain = NA)
260            if(!singular.ok && fit$rank < nvars) stop("singular fit encountered")
261            ## calculate updated values of eta and mu with the new coef:
262            start[fit$pivot] <- fit$coefficients
263            eta <- drop(x %*% start)
264            mu <- linkinv(eta <- eta + offset)
265            dev <- sum(dev.resids(y, mu, weights))
266            if (control$trace)
267                cat("Deviance = ", dev, " Iterations - ", iter, "\n", sep = "")
268            ## check for divergence
269            boundary <- FALSE
270            if (!is.finite(dev)) {
271                if(is.null(coefold))
272                    stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE)
273                warning("step size truncated due to divergence", call. = FALSE)
274                ii <- 1
275                while (!is.finite(dev)) {
276                    if (ii > control$maxit)
277                        stop("inner loop 1; cannot correct step size", call. = FALSE)
278                    ii <- ii + 1
279                    start <- (start + coefold)/2
280                    eta <- drop(x %*% start)
281                    mu <- linkinv(eta <- eta + offset)
282                    dev <- sum(dev.resids(y, mu, weights))
283                }
284                boundary <- TRUE
285                if (control$trace)
286                    cat("Step halved: new deviance = ", dev, "\n", sep = "")
287            }
288            ## check for fitted values outside domain.
289            if (!(valideta(eta) && validmu(mu))) {
290                if(is.null(coefold))
291                    stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE)
292                warning("step size truncated: out of bounds", call. = FALSE)
293                ii <- 1
294                while (!(valideta(eta) && validmu(mu))) {
295                    if (ii > control$maxit)
296                        stop("inner loop 2; cannot correct step size", call. = FALSE)
297                    ii <- ii + 1
298                    start <- (start + coefold)/2
299                    eta <- drop(x %*% start)
300                    mu <- linkinv(eta <- eta + offset)
301                }
302                boundary <- TRUE
303                dev <- sum(dev.resids(y, mu, weights))
304                if (control$trace)
305                    cat("Step halved: new deviance = ", dev, "\n", sep = "")
306            }
307            ## check for convergence
308            if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
309                conv <- TRUE
310                coef <- start
311                break
312            } else {
313                devold <- dev
314                coef <- coefold <- start
315            }
316        } ##-------------- end IRLS iteration -------------------------------
317
318        if (!conv)
319            warning("glm.fit: algorithm did not converge", call. = FALSE)
320        if (boundary)
321            warning("glm.fit: algorithm stopped at boundary value", call. = FALSE)
322        eps <- 10*.Machine$double.eps
323        if (family$family == "binomial") {
324            if (any(mu > 1 - eps) || any(mu < eps))
325                warning("glm.fit: fitted probabilities numerically 0 or 1 occurred", call. = FALSE)
326        }
327        if (family$family == "poisson") {
328            if (any(mu < eps))
329                warning("glm.fit: fitted rates numerically 0 occurred", call. = FALSE)
330        }
331        ## If X matrix was not full rank then columns were pivoted,
332        ## hence we need to re-label the names ...
333        ## Original code changed as suggested by BDR---give NA rather
334        ## than 0 for non-estimable parameters
335        if (fit$rank < nvars) coef[fit$pivot][seq.int(fit$rank+1, nvars)] <- NA
336        xxnames <- xnames[fit$pivot]
337        ## update by accurate calculation, including 0-weight cases.
338        residuals <-  (y - mu)/mu.eta(eta)
339##        residuals <- rep.int(NA, nobs)
340##        residuals[good] <- z - (eta - offset)[good] # z does not have offset in.
341        fit$qr <- as.matrix(fit$qr)
342        nr <- min(sum(good), nvars)
343        if (nr < nvars) {
344            Rmat <- diag(nvars)
345            Rmat[1L:nr, 1L:nvars] <- fit$qr[1L:nr, 1L:nvars]
346        }
347        else Rmat <- fit$qr[1L:nvars, 1L:nvars]
348        Rmat <- as.matrix(Rmat)
349        Rmat[row(Rmat) > col(Rmat)] <- 0
350        names(coef) <- xnames
351        colnames(fit$qr) <- xxnames
352        dimnames(Rmat) <- list(xxnames, xxnames)
353    }
354    names(residuals) <- ynames
355    names(mu) <- ynames
356    names(eta) <- ynames
357    # for compatibility with lm, which has a full-length weights vector
358    wt <- rep.int(0, nobs)
359    wt[good] <- w^2
360    names(wt) <- ynames
361    names(weights) <- ynames
362    names(y) <- ynames
363    if(!EMPTY)
364        names(fit$effects) <-
365            c(xxnames[seq_len(fit$rank)], rep.int("", sum(good) - fit$rank))
366    ## calculate null deviance -- corrected in glm() if offset and intercept
367    wtdmu <-
368	if (intercept) sum(weights * y)/sum(weights) else linkinv(offset)
369    nulldev <- sum(dev.resids(y, wtdmu, weights))
370    ## calculate df
371    n.ok <- nobs - sum(weights==0)
372    nulldf <- n.ok - as.integer(intercept)
373    rank <- if(EMPTY) 0 else fit$rank
374    resdf  <- n.ok - rank
375    ## calculate AIC
376    aic.model <-
377	aic(y, n, mu, weights, dev) + 2*rank
378	##     ^^ is only initialize()d for "binomial" [yuck!]
379    list(coefficients = coef, residuals = residuals, fitted.values = mu,
380	 effects = if(!EMPTY) fit$effects, R = if(!EMPTY) Rmat, rank = rank,
381	 qr = if(!EMPTY) structure(fit[c("qr", "rank", "qraux", "pivot", "tol")], class = "qr"),
382         family = family,
383	 linear.predictors = eta, deviance = dev, aic = aic.model,
384	 null.deviance = nulldev, iter = iter, weights = wt,
385	 prior.weights = weights, df.residual = resdf, df.null = nulldf,
386	 y = y, converged = conv, boundary = boundary)
387}
388
389
390print.glm <- function(x, digits = max(3L, getOption("digits") - 3L), ...)
391{
392    cat("\nCall:  ",
393	paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")
394    if(length(coef(x))) {
395        cat("Coefficients")
396        if(is.character(co <- x$contrasts))
397            cat("  [contrasts: ",
398                apply(cbind(names(co),co), 1L, paste, collapse = "="), "]")
399        cat(":\n")
400        print.default(format(x$coefficients, digits = digits),
401                      print.gap = 2, quote = FALSE)
402    } else cat("No coefficients\n\n")
403    cat("\nDegrees of Freedom:", x$df.null, "Total (i.e. Null); ",
404        x$df.residual, "Residual\n")
405    if(nzchar(mess <- naprint(x$na.action))) cat("  (",mess, ")\n", sep = "")
406    cat("Null Deviance:	   ",	format(signif(x$null.deviance, digits)),
407	"\nResidual Deviance:", format(signif(x$deviance, digits)),
408	"\tAIC:", format(signif(x$aic, digits)))
409    cat("\n")
410    invisible(x)
411}
412
413
414anova.glm <- function(object, ..., dispersion = NULL, test = NULL)
415{
416    ## check for multiple objects
417    dotargs <- list(...)
418    named <- if (is.null(names(dotargs)))
419	rep_len(FALSE, length(dotargs)) else (names(dotargs) != "")
420    if(any(named))
421	warning("the following arguments to 'anova.glm' are invalid and dropped: ",
422		paste(deparse(dotargs[named]), collapse=", "))
423    dotargs <- dotargs[!named]
424    is.glm <- vapply(dotargs,function(x) inherits(x,"glm"), NA)
425    dotargs <- dotargs[is.glm]
426
427    ## do not copy this: anova.glmlist is not an exported object.
428    ## use anova(structure(list(object, dotargs), class = "glmlist"))
429    if (length(dotargs))
430	return(anova.glmlist(c(list(object), dotargs),
431			     dispersion = dispersion, test = test))
432
433    ## score tests require a bit of extra computing
434    doscore <- !is.null(test) && test=="Rao"
435    ## extract variables from model
436
437    varlist <- attr(object$terms, "variables")
438    ## must avoid partial matching here.
439    x <-
440	if (n <- match("x", names(object), 0L))
441	    object[[n]]
442	else model.matrix(object)
443    varseq <- attr(x, "assign")
444    nvars <- max(0, varseq)
445    resdev <- resdf <- NULL
446
447    if (doscore){
448      score <- numeric(nvars)
449      # fit a null model
450      method <- object$method
451      y <- object$y
452      fit <- eval(call(if(is.function(method)) "method" else method,
453                       x=x[, varseq == 0, drop = FALSE],
454                       y=y,
455                       weights=object$prior.weights,
456                       start  =object$start,
457                       offset =object$offset,
458                       family =object$family,
459                       control=object$control))
460      r <- fit$residuals
461      w <- fit$weights
462      icpt <- attr(object$terms, "intercept")
463    }
464
465    ## if there is more than one explanatory variable then
466    ## recall glm.fit to fit variables sequentially
467
468    ## for score tests, we need to do so in any case
469    if(nvars > 1 || doscore) {
470	method <- object$method
471        ## allow for 'y = FALSE' in the call (PR#13098)
472        y <- object$y
473        if(is.null(y)) { ## code from residuals.glm
474            mu.eta <- object$family$mu.eta
475            eta <- object$linear.predictors
476            y <- object$fitted.values + object$residuals * mu.eta(eta)
477        }
478	for(i in seq_len(max(nvars - 1L, 0))) { # nvars == 0 can happen
479	    ## explanatory variables up to i are kept in the model
480	    ## use method from glm to find residual deviance
481	    ## and df for each sequential fit
482	    fit <- eval(call(if(is.function(method)) "method" else method,
483                             x=x[, varseq <= i, drop = FALSE],
484                             y=y,
485                             weights=object$prior.weights,
486                             start  =object$start,
487                             offset =object$offset,
488                             family =object$family,
489                             control=object$control))
490            if (doscore) {
491              zz <- eval(call(if(is.function(method)) "method" else method,
492                             x=x[, varseq <= i, drop = FALSE],
493                             y=r,
494                             weights=w,
495                             intercept=icpt))
496              score[i] <-  zz$null.deviance - zz$deviance
497              r <- fit$residuals
498              w <- fit$weights
499            }
500	    resdev <- c(resdev, fit$deviance)
501	    resdf <- c(resdf, fit$df.residual)
502	}
503        if (doscore) {
504          zz <- eval(call(if(is.function(method)) "method" else method,
505                          x=x,
506                          y=r,
507                          weights=w,
508                          intercept=icpt))
509          score[nvars] <- zz$null.deviance - zz$deviance
510        }
511    }
512
513    ## add values from null and full model
514
515    resdf <- c(object$df.null, resdf, object$df.residual)
516    resdev <- c(object$null.deviance, resdev, object$deviance)
517
518    ## construct table and title
519
520    table <- data.frame(c(NA, -diff(resdf)),
521			c(NA, pmax(0, -diff(resdev))), resdf, resdev)
522    tl <- attr(object$terms, "term.labels")
523    if (length(tl) == 0L) table <- table[1,,drop=FALSE] # kludge for null model
524    dimnames(table) <- list(c("NULL", tl),
525			    c("Df", "Deviance", "Resid. Df", "Resid. Dev"))
526    if (doscore)
527      table <- cbind(table, Rao=c(NA,score))
528    title <- paste0("Analysis of Deviance Table", "\n\nModel: ",
529                    object$family$family, ", link: ", object$family$link,
530                    "\n\nResponse: ", as.character(varlist[-1L])[1L],
531                    "\n\nTerms added sequentially (first to last)\n\n")
532
533    ## calculate test statistics if needed
534
535    df.dispersion <- Inf
536    if(is.null(dispersion)) {
537	dispersion <- summary(object, dispersion=dispersion)$dispersion
538	df.dispersion <- if (dispersion == 1) Inf else object$df.residual
539    }
540    if(!is.null(test)) {
541        if(test == "F" && df.dispersion == Inf) {
542            fam <- object$family$family
543            if(fam == "binomial" || fam == "poisson")
544                warning(gettextf("using F test with a '%s' family is inappropriate",
545                                 fam),
546                        domain = NA)
547            else
548                warning("using F test with a fixed dispersion is inappropriate")
549        }
550	table <- stat.anova(table=table, test=test, scale=dispersion,
551			    df.scale=df.dispersion, n=NROW(x))
552    }
553    structure(table, heading = title, class = c("anova", "data.frame"))
554}
555
556
557anova.glmlist <- function(object, ..., dispersion=NULL, test=NULL)
558{
559
560    doscore <- !is.null(test) && test=="Rao"
561
562    ## find responses for all models and remove
563    ## any models with a different response
564
565    responses <- as.character(lapply(object, function(x) {
566	deparse(formula(x)[[2L]])} ))
567    sameresp <- responses==responses[1L]
568    if(!all(sameresp)) {
569	object <- object[sameresp]
570        warning(gettextf("models with response %s removed because response differs from model 1",
571                         sQuote(deparse(responses[!sameresp]))),
572                domain = NA)
573    }
574
575    ns <- sapply(object, function(x) length(x$residuals))
576    if(any(ns != ns[1L]))
577	stop("models were not all fitted to the same size of dataset")
578
579    ## calculate the number of models
580
581    nmodels <- length(object)
582    if(nmodels==1)
583	return(anova.glm(object[[1L]], dispersion=dispersion, test=test))
584
585    ## extract statistics
586
587    resdf  <- as.numeric(lapply(object, function(x) x$df.residual))
588    resdev <- as.numeric(lapply(object, function(x) x$deviance))
589
590    if (doscore){
591      score <- numeric(nmodels)
592      score[1] <- NA
593      df <- -diff(resdf)
594
595      for (i in seq_len(nmodels-1)) {
596        m1 <- if (df[i] > 0) object[[i]] else object[[i+1]]
597        m2 <- if (df[i] > 0) object[[i+1]] else object[[i]]
598        r <- m1$residuals
599        w <- m1$weights
600        method <- m2$method
601        icpt <- attr(m1$terms, "intercept")
602        zz <- eval(call(if(is.function(method)) "method" else method,
603                        x=model.matrix(m2),
604                        y=r,
605                        weights=w,
606                        intercept=icpt))
607        score[i+1] <-  zz$null.deviance - zz$deviance
608        if (df[i] < 0) score[i+1] <- - score[i+1]
609      }
610    }
611
612    ## construct table and title
613
614    table <- data.frame(resdf, resdev, c(NA, -diff(resdf)),
615			c(NA, -diff(resdev)) )
616    variables <- lapply(object, function(x)
617			paste(deparse(formula(x)), collapse="\n") )
618    dimnames(table) <- list(1L:nmodels, c("Resid. Df", "Resid. Dev", "Df",
619					 "Deviance"))
620    if (doscore)
621      table <- cbind(table, Rao=score)
622
623    title <- "Analysis of Deviance Table\n"
624    topnote <- paste0("Model ", format(1L:nmodels), ": ", variables,
625                      collapse = "\n")
626
627    ## calculate test statistic if needed
628
629    if(!is.null(test)) {
630	bigmodel <- object[[order(resdf)[1L]]]
631	dispersion <- summary(bigmodel, dispersion=dispersion)$dispersion
632	df.dispersion <- if (dispersion == 1) Inf else min(resdf)
633        if(test == "F" && df.dispersion == Inf) {
634            fam <- bigmodel$family$family
635            if(fam == "binomial" || fam == "poisson")
636                warning(gettextf("using F test with a '%s' family is inappropriate",
637                                 fam),
638                        domain = NA, call. = FALSE)
639            else
640                warning("using F test with a fixed dispersion is inappropriate")
641        }
642	table <- stat.anova(table = table, test = test,
643			    scale = dispersion, df.scale = df.dispersion,
644			    n = length(bigmodel$residuals))
645    }
646    structure(table, heading = c(title, topnote),
647	      class = c("anova", "data.frame"))
648}
649
650
651summary.glm <- function(object, dispersion = NULL,
652			correlation = FALSE, symbolic.cor = FALSE, ...)
653{
654    est.disp <- FALSE
655    df.r <- object$df.residual
656    if(is.null(dispersion))	# calculate dispersion if needed
657	dispersion <-
658	    if(object$family$family %in% c("poisson", "binomial"))  1
659	    else if(df.r > 0) {
660                est.disp <- TRUE
661		if(any(object$weights==0))
662		    warning("observations with zero weight not used for calculating dispersion")
663		sum((object$weights*object$residuals^2)[object$weights > 0])/ df.r
664	    } else {
665                est.disp <- TRUE
666                NaN
667            }
668
669    ## calculate scaled and unscaled covariance matrix
670
671    aliased <- is.na(coef(object))  # used in print method
672    p <- object$rank
673    if (p > 0) {
674        p1 <- 1L:p
675	Qr <- qr.lm(object)
676        ## WATCHIT! doesn't this rely on pivoting not permuting 1L:p? -- that's quaranteed
677        coef.p <- object$coefficients[Qr$pivot[p1]]
678        covmat.unscaled <- chol2inv(Qr$qr[p1,p1,drop=FALSE])
679        dimnames(covmat.unscaled) <- list(names(coef.p),names(coef.p))
680        covmat <- dispersion*covmat.unscaled
681        var.cf <- diag(covmat)
682
683        ## calculate coef table
684
685        s.err <- sqrt(var.cf)
686        tvalue <- coef.p/s.err
687
688        dn <- c("Estimate", "Std. Error")
689        if(!est.disp) { # known dispersion
690            pvalue <- 2*pnorm(-abs(tvalue))
691            coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
692            dimnames(coef.table) <- list(names(coef.p),
693                                         c(dn, "z value","Pr(>|z|)"))
694        } else if(df.r > 0) {
695            pvalue <- 2*pt(-abs(tvalue), df.r)
696            coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
697            dimnames(coef.table) <- list(names(coef.p),
698                                         c(dn, "t value","Pr(>|t|)"))
699        } else { # df.r == 0
700            coef.table <- cbind(coef.p, NaN, NaN, NaN)
701            dimnames(coef.table) <- list(names(coef.p),
702                                         c(dn, "t value","Pr(>|t|)"))
703        }
704        df.f <- NCOL(Qr$qr)
705    } else {
706        coef.table <- matrix(, 0L, 4L)
707        dimnames(coef.table) <-
708            list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
709        covmat.unscaled <- covmat <- matrix(, 0L, 0L)
710        df.f <- length(aliased)
711    }
712    ## return answer
713
714    ## these need not all exist, e.g. na.action.
715    keep <- match(c("call","terms","family","deviance", "aic",
716		      "contrasts", "df.residual","null.deviance","df.null",
717                      "iter", "na.action"), names(object), 0L)
718    ans <- c(object[keep],
719	     list(deviance.resid = residuals(object, type = "deviance"),
720		  coefficients = coef.table,
721                  aliased = aliased,
722		  dispersion = dispersion,
723		  df = c(object$rank, df.r, df.f),
724		  cov.unscaled = covmat.unscaled,
725		  cov.scaled = covmat))
726
727    if(correlation && p > 0) {
728	dd <- sqrt(diag(covmat.unscaled))
729	ans$correlation <-
730	    covmat.unscaled/outer(dd,dd)
731	ans$symbolic.cor <- symbolic.cor
732    }
733    class(ans) <- "summary.glm"
734    return(ans)
735}
736
737print.summary.glm <-
738    function (x, digits = max(3L, getOption("digits") - 3L),
739	      symbolic.cor = x$symbolic.cor,
740	      signif.stars = getOption("show.signif.stars"), ...)
741{
742    cat("\nCall:\n",
743	paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")
744    cat("Deviance Residuals: \n")
745    if(x$df.residual > 5) {
746	x$deviance.resid <- setNames(quantile(x$deviance.resid, na.rm = TRUE),
747				     c("Min", "1Q", "Median", "3Q", "Max"))
748    }
749    xx <- zapsmall(x$deviance.resid, digits + 1L)
750    print.default(xx, digits = digits, na.print = "", print.gap = 2L)
751
752    if(length(x$aliased) == 0L) {
753        cat("\nNo Coefficients\n")
754    } else {
755        ## df component added in 1.8.0
756        ## partial matching problem here.
757        df <- if ("df" %in% names(x)) x[["df"]] else NULL
758        if (!is.null(df) && (nsingular <- df[3L] - df[1L]))
759            cat("\nCoefficients: (", nsingular,
760                " not defined because of singularities)\n", sep = "")
761        else cat("\nCoefficients:\n")
762        coefs <- x$coefficients
763        if(!is.null(aliased <- x$aliased) && any(aliased)) {
764            cn <- names(aliased)
765            coefs <- matrix(NA, length(aliased), 4L,
766                            dimnames=list(cn, colnames(coefs)))
767            coefs[!aliased, ] <- x$coefficients
768        }
769        printCoefmat(coefs, digits = digits, signif.stars = signif.stars,
770                     na.print = "NA", ...)
771    }
772    ##
773    cat("\n(Dispersion parameter for ", x$family$family,
774	" family taken to be ", format(x$dispersion), ")\n\n",
775	apply(cbind(paste(format(c("Null","Residual"), justify="right"),
776                          "deviance:"),
777		    format(unlist(x[c("null.deviance","deviance")]),
778			   digits = max(5L, digits + 1L)), " on",
779		    format(unlist(x[c("df.null","df.residual")])),
780		    " degrees of freedom\n"),
781	      1L, paste, collapse = " "), sep = "")
782    if(nzchar(mess <- naprint(x$na.action))) cat("  (", mess, ")\n", sep = "")
783    cat("AIC: ", format(x$aic, digits = max(4L, digits + 1L)),"\n\n",
784	"Number of Fisher Scoring iterations: ", x$iter,
785	"\n", sep = "")
786
787    correl <- x$correlation
788    if(!is.null(correl)) {
789# looks most sensible not to give NAs for undefined coefficients
790#         if(!is.null(aliased) && any(aliased)) {
791#             nc <- length(aliased)
792#             correl <- matrix(NA, nc, nc, dimnames = list(cn, cn))
793#             correl[!aliased, !aliased] <- x$correl
794#         }
795	p <- NCOL(correl)
796	if(p > 1) {
797	    cat("\nCorrelation of Coefficients:\n")
798	    if(is.logical(symbolic.cor) && symbolic.cor) {# NULL < 1.7.0 objects
799		print(symnum(correl, abbr.colnames = NULL))
800	    } else {
801		correl <- format(round(correl, 2L), nsmall = 2L,
802                                 digits = digits)
803		correl[!lower.tri(correl)] <- ""
804		print(correl[-1, -p, drop=FALSE], quote = FALSE)
805	    }
806	}
807    }
808    cat("\n")
809    invisible(x)
810}
811
812
813## GLM Methods for Generic Functions :
814
815## needed to avoid deviance.lm
816deviance.glm <- function(object, ...) object$deviance
817effects.glm <- function(object, ...) object$effects
818family.glm <- function(object, ...) object$family
819
820residuals.glm <-
821    function(object,
822	     type = c("deviance", "pearson", "working", "response", "partial"),
823	     ...)
824{
825    type <- match.arg(type)
826    y <- object$y
827    r <- object$residuals
828    mu <- object$fitted.values
829    wts <- object$prior.weights
830    switch(type,
831           deviance=,pearson=,response=
832           if(is.null(y)) {
833               mu.eta <- object$family$mu.eta
834               eta <- object$linear.predictors
835               y <-  mu + r * mu.eta(eta)
836           })
837    res <- switch(type,
838		  deviance = if(object$df.residual > 0) {
839		      d.res <- sqrt(pmax((object$family$dev.resids)(y, mu, wts), 0))
840		      ifelse(y > mu, d.res, -d.res)
841		  } else rep.int(0, length(mu)),
842		  pearson = (y-mu)*sqrt(wts)/sqrt(object$family$variance(mu)),
843		  working = r,
844		  response = y - mu,
845		  partial = r
846		  )
847    if(!is.null(object$na.action))
848        res <- naresid(object$na.action, res)
849    if (type == "partial") ## need to avoid doing naresid() twice.
850        res <- res+predict(object, type="terms")
851    res
852}
853
854## For influence.glm() ... --> ./lm.influence.R
855
856## KH on 1998/06/22: update.default() is now used ...
857
858model.frame.glm <- function (formula, ...)
859{
860    dots <- list(...)
861    nargs <- dots[match(c("data", "na.action", "subset"), names(dots), 0L)]
862    if (length(nargs) || is.null(formula$model)) {
863	fcall <- formula$call
864	fcall$method <- "model.frame"
865        ## need stats:: for non-standard evaluation
866	fcall[[1L]] <- quote(stats::glm)
867        fcall[names(nargs)] <- nargs
868	env <- environment(formula$terms) %||% parent.frame()
869	eval(fcall, env)
870    }
871    else formula$model
872}
873
874weights.glm <- function(object, type = c("prior", "working"), ...)
875{
876    type <- match.arg(type)
877    res <- if(type == "prior") object$prior.weights else object$weights
878    if(is.null(object$na.action)) res
879    else naresid(object$na.action, res)
880}
881
882formula.glm <- function(x, ...)
883{
884    form <- x$formula
885    if( !is.null(form) ) {
886        form <- formula(x$terms) # has . expanded
887        environment(form) <- environment(x$formula)
888        form
889    } else formula(x$terms)
890}
891