1splitMatrix <- function (x, f, drop=FALSE) {
2    lapply(split(seq_len(nrow(x)), f, drop = drop),
3           function(ind) x[ind, , drop = FALSE])
4}
5
6fixEvent <- function(event)
7{
8    ### Convert outcome in clogit model to 0/1 binary coding
9
10    if (any(is.na(event)))
11        stop("Event contains missing values")
12
13    if (is.logical(event)) {
14        status <- is.numeric(event)
15    }
16    else if (is.numeric(event)) {
17        status <- if (max(event) == 2) event - 1  else event
18        temp <- (status == 0 | status == 1)
19        if (!all(temp)) {
20            warning("If outcome is numeric then it must be coded 0/1 or 1/2")
21        }
22    }
23    else if (is.factor(event)) {
24        if (nlevels(event) != 2)
25            stop("If outcome is a factor then it must have 2 levels")
26        status <- event == levels(event)[2]
27    }
28    return(as.integer(status))
29}
30
31isInformative <- function(Xsplit, ysplit, strata)
32{
33    ## Identify which observations are informative in a conditional
34    ## logistic regression.
35
36    is.homogeneous <- function(x) { all(x==x[1]) }
37    y.bad <- sapply(ysplit, is.homogeneous)
38    X.bad <- sapply(Xsplit, function(x) { all(apply(x, 2, is.homogeneous)) })
39
40    is.informative <- vector("list", length(ysplit))
41    for (i in seq(along=is.informative)) {
42        canuse <- (!y.bad[i]) && (!X.bad[i])
43        is.informative[[i]] <- rep(canuse, length(ysplit[[i]]))
44    }
45    return(unsplit(is.informative, strata))
46}
47
48fitClogit <- function(X, y, offset, strata, init, iter.max, eps, toler.chol)
49{
50    ## Safe wrapper around the C function "clogit" that ensures all
51    ## arguments have the correct type and storage mode.
52
53    y <- fixEvent(y)
54    if (!is.matrix(X)) {
55        X <- as.matrix(X)
56    }
57    if (!is.double(X)) {
58        X <- matrix(as.double(X), nrow(X), ncol(X))
59    }
60    if (is.null(offset)) {
61        offset <- rep(0, nrow(X))
62    }
63    offset <- as.double(offset);
64
65    ## Split into strata
66    Xsplit <- splitMatrix(X, strata, drop=TRUE)
67    ysplit <- split(y, strata, drop=TRUE)
68    osplit <- split(offset, strata, drop=TRUE)
69
70    info <- isInformative(Xsplit, ysplit, strata)
71    if (!any(info)) {
72        stop("There are no informative observations")
73    }
74
75    ans <- .Call(C_clogit, Xsplit, ysplit, osplit, as.double(init),
76                 as.integer(iter.max), as.double(eps), as.double(toler.chol))
77    ans$informative <- info
78    return(ans)
79}
80
81clogistic <- function (formula, strata, data, subset, na.action,
82                       init, model = TRUE, x = FALSE, y = TRUE,
83                       contrasts = NULL, iter.max=20, eps=1e-6,
84                       toler.chol = sqrt(.Machine$double.eps))
85{
86    ## User interface, edited version of glm
87
88    call <- match.call()
89    if (missing(data))
90        data <- environment(formula)
91    mf <- match.call(expand.dots = FALSE)
92    m <- match(c("formula", "data", "subset",
93                 "na.action", "offset", "strata"), names(mf), 0L)
94    mf <- mf[c(1, m)]
95    mf$drop.unused.levels <- TRUE
96    mf[[1L]] <- as.name("model.frame")
97    mf <- eval(mf, parent.frame())
98
99    mt <- attr(mf, "terms")
100    Y <- model.response(mf, "any")
101    if (is.null(Y)) stop("missing outcome")
102    if (length(dim(Y)) == 1L) {
103        nm <- rownames(Y)
104        dim(Y) <- NULL
105        if (!is.null(nm))
106            names(Y) <- nm
107    }
108    X <- if (!is.empty.model(mt))
109        model.matrix(mt, mf, contrasts)
110    else stop("Invalid model matrix")
111    offset <- as.vector(model.offset(mf))
112    if (!is.null(offset)) {
113        if (length(offset) != NROW(Y))
114            stop(gettextf("number of offsets is %d should equal %d (number of observations)",
115                length(offset), NROW(Y)), domain = NA)
116    }
117
118    strata <- model.extract(mf, "strata")
119    if (is.null(strata)) stop("argument 'strata' missing")
120
121    contrasts <- attr(X, "contrasts")
122    if (attr(mt, "intercept") > 0) {
123        X <- X[,-1, drop=FALSE]
124    }
125    if (missing(init))
126        init <- rep(0, ncol(X))
127
128    if (iter.max < 0)
129        stop("'iter.max' must be non-negative")
130    if (eps <= 0)
131        stop("'eps' must be positive")
132    if (toler.chol <= 0)
133        stop("'toler.chol' must be positive")
134    if (eps < toler.chol)
135        stop("'toler.chol' must be smaller than 'eps'")
136
137    fit <- fitClogit(X = X, y = Y, offset = offset, strata=strata, init=init,
138                     toler.chol=toler.chol, eps=eps, iter.max=iter.max)
139    if (fit$flag <= 0) {
140        stop("Information matrix is not positive definite")
141    }
142    else if (fit$flag == 1000) {
143        warning("Iteration limit exceeded")
144    }
145
146    nvar <- length(init)
147    which.sing <- if (fit$flag < nvar) {
148        diag(fit$var)==0
149    } else {
150        rep(FALSE, nvar)
151    }
152    fit$coefficients[which.sing] <- NA
153    fit$flag <- NULL
154
155    ## Add back in parameter names
156    cfnames <- colnames(X)
157    names(fit$coefficients) <- cfnames
158    dimnames(fit$var) <- list(cfnames, cfnames)
159
160    fit$n <- sum(fit$informative)
161    if (model) {
162        fit$model <- mf
163    }
164    else {
165        ## Without model frame this cannot be interpreted
166        fit$informative <- NULL
167    }
168    fit$na.action <- attr(mf, "na.action")
169    if (x)
170        fit$x <- X
171    if (!y)
172        fit$y <- NULL
173    fit <- c(fit, list(call = call, formula = formula, terms = mt,
174                       contrasts = contrasts, xlevels = .getXlevels(mt, mf)))
175    class(fit) <- c("clogistic")
176    fit
177}
178
179coef.clogistic <- function(object,...) { object$coefficients }
180
181vcov.clogistic <- function(object, ...) { object$var }
182
183print.clogistic <- function (x, digits = max(options()$digits - 4, 3), ...)
184{
185    ## Print method for clogistic objects, edited from print.coxph
186
187    cat("\nCall: ", deparse(x$call), "\n\n", sep="\n")
188
189    savedig <- options(digits = digits)
190    on.exit(options(savedig))
191    coef <- coef.clogistic(x)
192    se <- sqrt(diag(vcov.clogistic(x)))
193    if (is.null(coef) | is.null(se))
194        stop("Input is not valid")
195
196    coefmat <- cbind(coef, exp(coef), se, coef/se,
197                     signif(1 - pchisq((coef/se)^2, 1), digits - 1))
198    dimnames(coefmat) <- list(names(coef),
199                              c("coef", "exp(coef)", "se(coef)", "z", "p"))
200    cat("\n")
201    prmatrix(coefmat)
202    logtest <- -2 * (x$loglik[1] - x$loglik[2])
203    if (is.null(x$df))
204        df <- sum(!is.na(coef))
205    else df <- round(sum(x$df), 2)
206    cat("\n")
207    cat("Likelihood ratio test=", format(round(logtest, 2)),
208        "  on ", df, " df,", " p=", format(1 - pchisq(logtest, df)),
209        ", n=", x$n, sep = "")
210    cat("\n")
211    invisible()
212}
213