stats/R/loglin.R

#  File src/library/stats/R/loglin.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2013 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

loglin <- function(table, margin, start = rep(1, length(table)), fit =
                   FALSE, eps = 0.1, iter = 20L, param = FALSE, print =
                   TRUE) {
    rfit <- fit

    dtab <- dim(table)
    nvar <- length(dtab)

    ncon <- length(margin)
    conf <- matrix(0L, nrow = nvar, ncol = ncon)
    nmar <- 0
    varnames <- names(dimnames(table))
    for (k in seq_along(margin)) {
        tmp <- margin[[k]]
        if (is.character(tmp)) {
            ## Rewrite margin names to numbers
            tmp <- match(tmp, varnames)
            margin[[k]] <- tmp
        }
        if (!is.numeric(tmp) || any(is.na(tmp) | tmp <= 0))
            stop("'margin' must contain names or numbers corresponding to 'table'")
        conf[seq_along(tmp), k] <- tmp
        nmar <- nmar + prod(dtab[tmp])
    }

    ntab <- length(table)
    if (length(start) != ntab ) stop("'start' and 'table' must be same length")

    z <- .Call(C_LogLin, dtab, conf, table, start, nmar, eps, iter)

    if (print)
        cat(z$nlast, "iterations: deviation", z$dev[z$nlast], "\n")

    fit <- z$fit
    attributes(fit) <- attributes(table)

    ## Pearson chi-sq test statistic
    observed <- as.vector(table[start > 0])
    expected <- as.vector(fit[start > 0])
    pearson <- sum((observed - expected)^2 / expected)

    ## Likelihood Ratio Test statistic
    observed <- as.vector(table[table * fit > 0])
    expected <- as.vector(fit[table * fit > 0])
    lrt <- 2 * sum(observed * log(observed / expected))

    ## Compute degrees of freedom.
    ## Use a dyadic-style representation for the (possible) subsets B.
    ## Let u_i(B) = 1 if i is contained in B and 0 otherwise.  Then B
    ## <-> u(B) = (u_1(B),...,u_N(B)) <-> \sum_{i=1}^N u_i(B) 2^{i-1}.
    ## See also the code for 'dyadic' below which computes the u_i(B).
    subsets <- function(x) {
        y <- list(vector(mode(x), length = 0))
        for (i in seq_along(x)) {
            y <- c(y, lapply(y, "c", x[i]))
        }
        y[-1L]
    }
    df <- rep.int(0, 2^nvar)
    for (k in seq_along(margin)) {
        terms <- subsets(margin[[k]])
        for (j in seq_along(terms))
            df[sum(2 ^ (terms[[j]] - 1))] <- prod(dtab[terms[[j]]] - 1)
    }

    ## Rewrite margin numbers to names if possible
    if (!is.null(varnames) && all(nzchar(varnames))) {
        for (k in seq_along(margin))
            margin[[k]] <- varnames[margin[[k]]]
    } else {
        varnames <- as.character(1 : ntab)
    }

    y <- list(lrt = lrt,
              pearson = pearson,
              df = ntab - sum(df) - 1,
              margin = margin)

    if (rfit)
        y$fit <- fit

    if (param) {
        fit <- log(fit)
        terms <- seq_along(df)[df > 0]

        parlen <- length(terms) + 1
        parval <- list(parlen)
        parnam <- character(parlen)

        parval[[1L]] <- mean(fit)
        parnam[1L] <- "(Intercept)"
        fit <- fit - parval[[1L]]

        ## Get the u_i(B) in the rows of 'dyadic', see above.
        dyadic <- NULL
        while(any(terms > 0)) {
            dyadic <- cbind(dyadic, terms %% 2)
            terms <- terms %/% 2
        }
        dyadic <- dyadic[order(rowSums(dyadic)), , drop = FALSE]

        for (i in 2 : parlen) {
            vars <- which(dyadic[i - 1, ] > 0)
            parval[[i]] <- apply(fit, vars, mean)
            parnam[i] <- paste(varnames[vars], collapse = ".")
            fit <- sweep(fit, vars, parval[[i]], check.margin=FALSE)
        }

        names(parval) <- parnam
        y$param <- parval
    }

    return(y)
}