1# File src/library/stats/R/loglin.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2013 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19loglin <- function(table, margin, start = rep(1, length(table)), fit = 20 FALSE, eps = 0.1, iter = 20L, param = FALSE, print = 21 TRUE) { 22 rfit <- fit 23 24 dtab <- dim(table) 25 nvar <- length(dtab) 26 27 ncon <- length(margin) 28 conf <- matrix(0L, nrow = nvar, ncol = ncon) 29 nmar <- 0 30 varnames <- names(dimnames(table)) 31 for (k in seq_along(margin)) { 32 tmp <- margin[[k]] 33 if (is.character(tmp)) { 34 ## Rewrite margin names to numbers 35 tmp <- match(tmp, varnames) 36 margin[[k]] <- tmp 37 } 38 if (!is.numeric(tmp) || any(is.na(tmp) | tmp <= 0)) 39 stop("'margin' must contain names or numbers corresponding to 'table'") 40 conf[seq_along(tmp), k] <- tmp 41 nmar <- nmar + prod(dtab[tmp]) 42 } 43 44 ntab <- length(table) 45 if (length(start) != ntab ) stop("'start' and 'table' must be same length") 46 47 z <- .Call(C_LogLin, dtab, conf, table, start, nmar, eps, iter) 48 49 if (print) 50 cat(z$nlast, "iterations: deviation", z$dev[z$nlast], "\n") 51 52 fit <- z$fit 53 attributes(fit) <- attributes(table) 54 55 ## Pearson chi-sq test statistic 56 observed <- as.vector(table[start > 0]) 57 expected <- as.vector(fit[start > 0]) 58 pearson <- sum((observed - expected)^2 / expected) 59 60 ## Likelihood Ratio Test statistic 61 observed <- as.vector(table[table * fit > 0]) 62 expected <- as.vector(fit[table * fit > 0]) 63 lrt <- 2 * sum(observed * log(observed / expected)) 64 65 ## Compute degrees of freedom. 66 ## Use a dyadic-style representation for the (possible) subsets B. 67 ## Let u_i(B) = 1 if i is contained in B and 0 otherwise. Then B 68 ## <-> u(B) = (u_1(B),...,u_N(B)) <-> \sum_{i=1}^N u_i(B) 2^{i-1}. 69 ## See also the code for 'dyadic' below which computes the u_i(B). 70 subsets <- function(x) { 71 y <- list(vector(mode(x), length = 0)) 72 for (i in seq_along(x)) { 73 y <- c(y, lapply(y, "c", x[i])) 74 } 75 y[-1L] 76 } 77 df <- rep.int(0, 2^nvar) 78 for (k in seq_along(margin)) { 79 terms <- subsets(margin[[k]]) 80 for (j in seq_along(terms)) 81 df[sum(2 ^ (terms[[j]] - 1))] <- prod(dtab[terms[[j]]] - 1) 82 } 83 84 ## Rewrite margin numbers to names if possible 85 if (!is.null(varnames) && all(nzchar(varnames))) { 86 for (k in seq_along(margin)) 87 margin[[k]] <- varnames[margin[[k]]] 88 } else { 89 varnames <- as.character(1 : ntab) 90 } 91 92 y <- list(lrt = lrt, 93 pearson = pearson, 94 df = ntab - sum(df) - 1, 95 margin = margin) 96 97 if (rfit) 98 y$fit <- fit 99 100 if (param) { 101 fit <- log(fit) 102 terms <- seq_along(df)[df > 0] 103 104 parlen <- length(terms) + 1 105 parval <- list(parlen) 106 parnam <- character(parlen) 107 108 parval[[1L]] <- mean(fit) 109 parnam[1L] <- "(Intercept)" 110 fit <- fit - parval[[1L]] 111 112 ## Get the u_i(B) in the rows of 'dyadic', see above. 113 dyadic <- NULL 114 while(any(terms > 0)) { 115 dyadic <- cbind(dyadic, terms %% 2) 116 terms <- terms %/% 2 117 } 118 dyadic <- dyadic[order(rowSums(dyadic)), , drop = FALSE] 119 120 for (i in 2 : parlen) { 121 vars <- which(dyadic[i - 1, ] > 0) 122 parval[[i]] <- apply(fit, vars, mean) 123 parnam[i] <- paste(varnames[vars], collapse = ".") 124 fit <- sweep(fit, vars, parval[[i]], check.margin=FALSE) 125 } 126 127 names(parval) <- parnam 128 y$param <- parval 129 } 130 131 return(y) 132} 133