1splitMatrix <- function (x, f, drop=FALSE) { 2 lapply(split(seq_len(nrow(x)), f, drop = drop), 3 function(ind) x[ind, , drop = FALSE]) 4} 5 6fixEvent <- function(event) 7{ 8 ### Convert outcome in clogit model to 0/1 binary coding 9 10 if (any(is.na(event))) 11 stop("Event contains missing values") 12 13 if (is.logical(event)) { 14 status <- is.numeric(event) 15 } 16 else if (is.numeric(event)) { 17 status <- if (max(event) == 2) event - 1 else event 18 temp <- (status == 0 | status == 1) 19 if (!all(temp)) { 20 warning("If outcome is numeric then it must be coded 0/1 or 1/2") 21 } 22 } 23 else if (is.factor(event)) { 24 if (nlevels(event) != 2) 25 stop("If outcome is a factor then it must have 2 levels") 26 status <- event == levels(event)[2] 27 } 28 return(as.integer(status)) 29} 30 31isInformative <- function(Xsplit, ysplit, strata) 32{ 33 ## Identify which observations are informative in a conditional 34 ## logistic regression. 35 36 is.homogeneous <- function(x) { all(x==x[1]) } 37 y.bad <- sapply(ysplit, is.homogeneous) 38 X.bad <- sapply(Xsplit, function(x) { all(apply(x, 2, is.homogeneous)) }) 39 40 is.informative <- vector("list", length(ysplit)) 41 for (i in seq(along=is.informative)) { 42 canuse <- (!y.bad[i]) && (!X.bad[i]) 43 is.informative[[i]] <- rep(canuse, length(ysplit[[i]])) 44 } 45 return(unsplit(is.informative, strata)) 46} 47 48fitClogit <- function(X, y, offset, strata, init, iter.max, eps, toler.chol) 49{ 50 ## Safe wrapper around the C function "clogit" that ensures all 51 ## arguments have the correct type and storage mode. 52 53 y <- fixEvent(y) 54 if (!is.matrix(X)) { 55 X <- as.matrix(X) 56 } 57 if (!is.double(X)) { 58 X <- matrix(as.double(X), nrow(X), ncol(X)) 59 } 60 if (is.null(offset)) { 61 offset <- rep(0, nrow(X)) 62 } 63 offset <- as.double(offset); 64 65 ## Split into strata 66 Xsplit <- splitMatrix(X, strata, drop=TRUE) 67 ysplit <- split(y, strata, drop=TRUE) 68 osplit <- split(offset, strata, drop=TRUE) 69 70 info <- isInformative(Xsplit, ysplit, strata) 71 if (!any(info)) { 72 stop("There are no informative observations") 73 } 74 75 ans <- .Call(C_clogit, Xsplit, ysplit, osplit, as.double(init), 76 as.integer(iter.max), as.double(eps), as.double(toler.chol)) 77 ans$informative <- info 78 return(ans) 79} 80 81clogistic <- function (formula, strata, data, subset, na.action, 82 init, model = TRUE, x = FALSE, y = TRUE, 83 contrasts = NULL, iter.max=20, eps=1e-6, 84 toler.chol = sqrt(.Machine$double.eps)) 85{ 86 ## User interface, edited version of glm 87 88 call <- match.call() 89 if (missing(data)) 90 data <- environment(formula) 91 mf <- match.call(expand.dots = FALSE) 92 m <- match(c("formula", "data", "subset", 93 "na.action", "offset", "strata"), names(mf), 0L) 94 mf <- mf[c(1, m)] 95 mf$drop.unused.levels <- TRUE 96 mf[[1L]] <- as.name("model.frame") 97 mf <- eval(mf, parent.frame()) 98 99 mt <- attr(mf, "terms") 100 Y <- model.response(mf, "any") 101 if (is.null(Y)) stop("missing outcome") 102 if (length(dim(Y)) == 1L) { 103 nm <- rownames(Y) 104 dim(Y) <- NULL 105 if (!is.null(nm)) 106 names(Y) <- nm 107 } 108 X <- if (!is.empty.model(mt)) 109 model.matrix(mt, mf, contrasts) 110 else stop("Invalid model matrix") 111 offset <- as.vector(model.offset(mf)) 112 if (!is.null(offset)) { 113 if (length(offset) != NROW(Y)) 114 stop(gettextf("number of offsets is %d should equal %d (number of observations)", 115 length(offset), NROW(Y)), domain = NA) 116 } 117 118 strata <- model.extract(mf, "strata") 119 if (is.null(strata)) stop("argument 'strata' missing") 120 121 contrasts <- attr(X, "contrasts") 122 if (attr(mt, "intercept") > 0) { 123 X <- X[,-1, drop=FALSE] 124 } 125 if (missing(init)) 126 init <- rep(0, ncol(X)) 127 128 if (iter.max < 0) 129 stop("'iter.max' must be non-negative") 130 if (eps <= 0) 131 stop("'eps' must be positive") 132 if (toler.chol <= 0) 133 stop("'toler.chol' must be positive") 134 if (eps < toler.chol) 135 stop("'toler.chol' must be smaller than 'eps'") 136 137 fit <- fitClogit(X = X, y = Y, offset = offset, strata=strata, init=init, 138 toler.chol=toler.chol, eps=eps, iter.max=iter.max) 139 if (fit$flag <= 0) { 140 stop("Information matrix is not positive definite") 141 } 142 else if (fit$flag == 1000) { 143 warning("Iteration limit exceeded") 144 } 145 146 nvar <- length(init) 147 which.sing <- if (fit$flag < nvar) { 148 diag(fit$var)==0 149 } else { 150 rep(FALSE, nvar) 151 } 152 fit$coefficients[which.sing] <- NA 153 fit$flag <- NULL 154 155 ## Add back in parameter names 156 cfnames <- colnames(X) 157 names(fit$coefficients) <- cfnames 158 dimnames(fit$var) <- list(cfnames, cfnames) 159 160 fit$n <- sum(fit$informative) 161 if (model) { 162 fit$model <- mf 163 } 164 else { 165 ## Without model frame this cannot be interpreted 166 fit$informative <- NULL 167 } 168 fit$na.action <- attr(mf, "na.action") 169 if (x) 170 fit$x <- X 171 if (!y) 172 fit$y <- NULL 173 fit <- c(fit, list(call = call, formula = formula, terms = mt, 174 contrasts = contrasts, xlevels = .getXlevels(mt, mf))) 175 class(fit) <- c("clogistic") 176 fit 177} 178 179coef.clogistic <- function(object,...) { object$coefficients } 180 181vcov.clogistic <- function(object, ...) { object$var } 182 183print.clogistic <- function (x, digits = max(options()$digits - 4, 3), ...) 184{ 185 ## Print method for clogistic objects, edited from print.coxph 186 187 cat("\nCall: ", deparse(x$call), "\n\n", sep="\n") 188 189 savedig <- options(digits = digits) 190 on.exit(options(savedig)) 191 coef <- coef.clogistic(x) 192 se <- sqrt(diag(vcov.clogistic(x))) 193 if (is.null(coef) | is.null(se)) 194 stop("Input is not valid") 195 196 coefmat <- cbind(coef, exp(coef), se, coef/se, 197 signif(1 - pchisq((coef/se)^2, 1), digits - 1)) 198 dimnames(coefmat) <- list(names(coef), 199 c("coef", "exp(coef)", "se(coef)", "z", "p")) 200 cat("\n") 201 prmatrix(coefmat) 202 logtest <- -2 * (x$loglik[1] - x$loglik[2]) 203 if (is.null(x$df)) 204 df <- sum(!is.na(coef)) 205 else df <- round(sum(x$df), 2) 206 cat("\n") 207 cat("Likelihood ratio test=", format(round(logtest, 2)), 208 " on ", df, " df,", " p=", format(1 - pchisq(logtest, df)), 209 ", n=", x$n, sep = "") 210 cat("\n") 211 invisible() 212} 213