1#  File src/library/stats/R/loglin.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2013 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19loglin <- function(table, margin, start = rep(1, length(table)), fit =
20                   FALSE, eps = 0.1, iter = 20L, param = FALSE, print =
21                   TRUE) {
22    rfit <- fit
23
24    dtab <- dim(table)
25    nvar <- length(dtab)
26
27    ncon <- length(margin)
28    conf <- matrix(0L, nrow = nvar, ncol = ncon)
29    nmar <- 0
30    varnames <- names(dimnames(table))
31    for (k in seq_along(margin)) {
32        tmp <- margin[[k]]
33        if (is.character(tmp)) {
34            ## Rewrite margin names to numbers
35            tmp <- match(tmp, varnames)
36            margin[[k]] <- tmp
37        }
38        if (!is.numeric(tmp) || any(is.na(tmp) | tmp <= 0))
39            stop("'margin' must contain names or numbers corresponding to 'table'")
40        conf[seq_along(tmp), k] <- tmp
41        nmar <- nmar + prod(dtab[tmp])
42    }
43
44    ntab <- length(table)
45    if (length(start) != ntab ) stop("'start' and 'table' must be same length")
46
47    z <- .Call(C_LogLin, dtab, conf, table, start, nmar, eps, iter)
48
49    if (print)
50        cat(z$nlast, "iterations: deviation", z$dev[z$nlast], "\n")
51
52    fit <- z$fit
53    attributes(fit) <- attributes(table)
54
55    ## Pearson chi-sq test statistic
56    observed <- as.vector(table[start > 0])
57    expected <- as.vector(fit[start > 0])
58    pearson <- sum((observed - expected)^2 / expected)
59
60    ## Likelihood Ratio Test statistic
61    observed <- as.vector(table[table * fit > 0])
62    expected <- as.vector(fit[table * fit > 0])
63    lrt <- 2 * sum(observed * log(observed / expected))
64
65    ## Compute degrees of freedom.
66    ## Use a dyadic-style representation for the (possible) subsets B.
67    ## Let u_i(B) = 1 if i is contained in B and 0 otherwise.  Then B
68    ## <-> u(B) = (u_1(B),...,u_N(B)) <-> \sum_{i=1}^N u_i(B) 2^{i-1}.
69    ## See also the code for 'dyadic' below which computes the u_i(B).
70    subsets <- function(x) {
71        y <- list(vector(mode(x), length = 0))
72        for (i in seq_along(x)) {
73            y <- c(y, lapply(y, "c", x[i]))
74        }
75        y[-1L]
76    }
77    df <- rep.int(0, 2^nvar)
78    for (k in seq_along(margin)) {
79        terms <- subsets(margin[[k]])
80        for (j in seq_along(terms))
81            df[sum(2 ^ (terms[[j]] - 1))] <- prod(dtab[terms[[j]]] - 1)
82    }
83
84    ## Rewrite margin numbers to names if possible
85    if (!is.null(varnames) && all(nzchar(varnames))) {
86        for (k in seq_along(margin))
87            margin[[k]] <- varnames[margin[[k]]]
88    } else {
89        varnames <- as.character(1 : ntab)
90    }
91
92    y <- list(lrt = lrt,
93              pearson = pearson,
94              df = ntab - sum(df) - 1,
95              margin = margin)
96
97    if (rfit)
98        y$fit <- fit
99
100    if (param) {
101        fit <- log(fit)
102        terms <- seq_along(df)[df > 0]
103
104        parlen <- length(terms) + 1
105        parval <- list(parlen)
106        parnam <- character(parlen)
107
108        parval[[1L]] <- mean(fit)
109        parnam[1L] <- "(Intercept)"
110        fit <- fit - parval[[1L]]
111
112        ## Get the u_i(B) in the rows of 'dyadic', see above.
113        dyadic <- NULL
114        while(any(terms > 0)) {
115            dyadic <- cbind(dyadic, terms %% 2)
116            terms <- terms %/% 2
117        }
118        dyadic <- dyadic[order(rowSums(dyadic)), , drop = FALSE]
119
120        for (i in 2 : parlen) {
121            vars <- which(dyadic[i - 1, ] > 0)
122            parval[[i]] <- apply(fit, vars, mean)
123            parnam[i] <- paste(varnames[vars], collapse = ".")
124            fit <- sweep(fit, vars, parval[[i]], check.margin=FALSE)
125        }
126
127        names(parval) <- parnam
128        y$param <- parval
129    }
130
131    return(y)
132}
133