1goodfit <- function(x, type = c("poisson", "binomial", "nbinomial"),
2                    method = c("ML", "MinChisq"), par = NULL)
3{
4    if(is.vector(x)) {
5        x <- table(x)
6    }
7    if(is.table(x)) {
8        if(length(dim(x)) > 1) stop ("x must be a 1-way table")
9        freq <- as.vector(x)
10        count <- as.numeric(names(x))
11    } else {
12          if(!(!is.null(ncol(x)) && ncol(x) == 2))
13              stop("x must be a 2-column matrix or data.frame")
14          freq <- as.vector(x[,1])
15          count <- as.vector(x[,2])
16      }
17
18    ## fill-in possibly missing cells
19    nfreq <- rep(0, max(count) + 1)
20    nfreq[count + 1] <- freq
21    freq <- nfreq
22    count <- 0:max(count)
23    n <- length(count)
24
25    ## starting value for degrees of freedom
26    df <- -1
27
28    type <- match.arg(type)
29    method <- match.arg(method)
30
31    switch(type,
32
33           "poisson" = {
34
35               if(!is.null(par)) {
36                   if(!is.list(par))
37                       stop("`par' must be a named list")
38                   if(names(par) != "lambda")
39                       stop("`par' must specify `lambda'")
40
41                   par <- par$lambda
42                   method <- "fixed"
43               }
44               else if(method == "ML") {
45                   df <- df - 1
46                   par <- weighted.mean(count,freq)
47               }
48               else if(method == "MinChisq") {
49                   df <- df - 1
50
51                   chi2 <- function(x)
52                   {
53                   p.hat <- diff(c(0, ppois(count[-n], lambda = x), 1))
54                   expected <- sum(freq) * p.hat
55                   sum((freq - expected)^2/expected)
56                   }
57
58                   par <- optimize(chi2, range(count))$minimum
59               }
60               par <- list(lambda = par)
61               p.hat <- dpois(count, lambda = par$lambda)
62           },
63
64           "binomial" = {
65               size <- par$size
66               if(is.null(size)) {
67                   size <- max(count)
68                   warning("size was not given, taken as maximum count")
69               }
70
71               if(size > max(count)) {
72                   nfreq <- rep(0, size + 1)
73                   nfreq[count + 1] <- freq
74                   freq <- nfreq
75                   count <- 0:size
76                   n <- length(count)
77               }
78
79               if(!is.null(par$prob)) {
80                   if(!is.list(par))
81                       stop("`par' must be a named list and specify `prob'")
82                   par <- par$prob
83                   method <- "fixed"
84               }
85               else if(method == "ML") {
86                   df <- df - 1
87                   par <- weighted.mean(count/size, freq)
88               }
89               else if(method == "MinChisq") {
90                   df <- df - 1
91
92                   chi2 <- function(x)
93                   {
94                   p.hat <- diff(c(0, pbinom(count[-n], prob = x, size = size), 1))
95                   expected <- sum(freq) * p.hat
96                   sum((freq - expected)^2/expected)
97                   }
98
99                   par <- optimize(chi2, c(0,1))$minimum
100               }
101               par <- list(prob = par, size = size)
102               p.hat <- dbinom(count, prob = par$prob, size = par$size)
103           },
104
105
106           "nbinomial" = {
107
108               if(!is.null(par)) {
109                   if(!is.list(par)) stop("`par' must be a named list")
110                   if(!(isTRUE(all.equal(names(par), "size")) | isTRUE(all.equal(sort(names(par)), c("prob", "size")))))
111                       stop("`par' must specify `size' and possibly `prob'")
112                   if(!is.null(par$prob)) method <- "fixed"
113               }
114
115               switch(method,
116
117                      "ML" = {
118                          if(is.null(par$size)) {
119                              df <- df - 2
120                              par <- fitdistr(rep(count, freq), "negative binomial")$estimate
121                              par <- par[1]/c(1, sum(par))
122                          } else {
123                                df <- df - 1
124                                method <- c("ML", "with size fixed")
125
126                                size <- par$size
127                                xbar <- weighted.mean(count,freq)
128                                par <- c(size, size/(xbar+size))
129                            }
130                      },
131
132                      "MinChisq" = {
133                          if(is.null(par$size)) {
134                              df <- df - 2
135
136                              ## MM
137                              xbar <- weighted.mean(count,freq)
138                              s2 <- var(rep(count,freq))
139                              p <- xbar / s2
140                              size <- xbar^2/(s2 - xbar)
141                              par1 <- c(size, p)
142
143                              ## minChisq
144                              chi2 <- function(x)
145                              {
146                              p.hat <- diff(c(0, pnbinom(count[-n], size = x[1], prob = x[2]), 1))
147                              expected <- sum(freq) * p.hat
148                              sum((freq - expected)^2/expected)
149                              }
150
151                              par <- optim(par1, chi2)$par
152                          } else {
153                                df <- df - 1
154                                method <- c("MinChisq", "with size fixed")
155
156                                chi2 <- function(x)
157                                {
158                                p.hat <- diff(c(0, pnbinom(count[-n], size = par$size, prob = x), 1))
159                                expected <- sum(freq) * p.hat
160                                sum((freq - expected)^2/expected)
161}
162                                par <- c(par$size, optimize(chi2, c(0, 1))$minimum)
163                            }
164                      },
165
166                      "fixed" = {
167                          par <- c(par$size, par$prob)
168                      })
169
170               par <- list(size = par[1], prob = par[2])
171               p.hat <- dnbinom(count, size = par$size, prob = par$prob)
172           })
173
174    expected <- sum(freq) * p.hat
175
176    df <- switch(method[1],
177                 "MinChisq" = { length(freq) + df },
178                 "ML" = { sum(freq > 0) + df },
179                 "fixed" = { c(length(freq), sum(freq > 0)) + df }
180                 )
181
182    structure(list(observed = freq,
183                   count = count,
184                   fitted = expected,
185                   type = type,
186                   method = method,
187                   df = df,
188                   par = par),
189              class = "goodfit")
190}
191
192# does this need a residuals_type arg?
193print.goodfit <- function(x, residuals_type = c("pearson", "deviance", "raw"), ...)
194{
195    residuals_type <- match.arg(residuals_type)
196    cat(paste("\nObserved and fitted values for", x$type, "distribution\n"))
197    if(x$method[1] == "fixed")
198      cat("with fixed parameters \n\n")
199    else
200      cat(paste("with parameters estimated by `", paste(x$method, collapse = " "), "' \n\n", sep = ""))
201
202    resids <- residuals(x, type = residuals_type)
203    RVAL <- cbind(x$count, x$observed, x$fitted, resids)
204    colnames(RVAL) <- c("count", "observed", "fitted",
205                        paste(residuals_type, "residual"))
206    rownames(RVAL) <- rep("", nrow(RVAL))
207    print(RVAL, ...)
208    invisible(x)
209}
210
211summary.goodfit <- function(object, ...)
212{
213    df <- object$df
214    obsrvd <- object$observed
215    count <- object$count
216    expctd <- fitted(object)
217
218    G2 <- sum(ifelse(obsrvd == 0, 0, obsrvd * log(obsrvd/expctd))) * 2
219
220    n <- length(obsrvd)
221    pfun <- switch(object$type,
222                   poisson = "ppois",
223                   binomial = "pbinom",
224                   nbinomial = "pnbinom")
225    p.hat <- diff(c(0, do.call(pfun, c(list(q = count[-n]), object$par)), 1))
226    expctd <- p.hat * sum(obsrvd)
227    X2 <- sum((obsrvd - expctd)^2 / expctd)
228
229    names(G2) <- "Likelihood Ratio"
230    names(X2) <- "Pearson"
231    if(any(expctd < 5) & object$method[1] != "ML")
232        warning("Chi-squared approximation may be incorrect")
233
234    RVAL <- switch(object$method[1],
235                   ML = G2,
236                   MinChisq = X2,
237                   fixed = c(X2, G2)
238                   )
239
240    RVAL <- cbind(RVAL, df, pchisq(RVAL, df = df, lower.tail = FALSE))
241    colnames(RVAL) <- c("X^2", "df", "P(> X^2)")
242
243    cat(paste("\n\t Goodness-of-fit test for", object$type, "distribution\n\n"))
244    print(RVAL, ...)
245    invisible(RVAL)
246}
247
248plot.goodfit <- function(x, ...)
249{
250    rootogram(x, ...)
251}
252
253fitted.goodfit <- function(object, ...)
254{
255    object$fitted
256}
257
258residuals.goodfit <- function(object, type = c("pearson", "deviance", "raw"),  ...)
259{
260    obsrvd <- object$observed
261    expctd <- fitted(object)
262    count <- object$count
263    n <- length(obsrvd)
264    pfun <- switch(object$type,
265                   poisson = "ppois",
266                   binomial = "pbinom",
267                   nbinomial = "pnbinom")
268    p.hat <- diff(c(0, do.call(pfun, c(list(q = count[-n]), object$par)), 1))
269    expctd <- p.hat * sum(obsrvd)
270
271    res <- switch(match.arg(type),
272                  pearson = (obsrvd - expctd) / sqrt(expctd),
273                  deviance = ifelse(obsrvd == 0, 0,
274                                    obsrvd * log(obsrvd / expctd)),
275                  obsrvd - expctd)
276
277    return(res)
278}
279
280predict.goodfit <- function(object, newcount = NULL, type = c("response", "prob"), ...)
281{
282    if(is.null(newcount)) newcount <- object$count
283    type <- match.arg(type)
284
285    densfun <- switch(object$type,
286                      poisson = "dpois",
287                      binomial = "dbinom",
288                      nbinomial = "dnbinom")
289
290    RVAL <- do.call(densfun, c(list(x = newcount), object$par))
291    if (type == "response") RVAL <- RVAL * sum(object$observed)
292    return(RVAL)
293}
294
295