1goodfit <- function(x, type = c("poisson", "binomial", "nbinomial"), 2 method = c("ML", "MinChisq"), par = NULL) 3{ 4 if(is.vector(x)) { 5 x <- table(x) 6 } 7 if(is.table(x)) { 8 if(length(dim(x)) > 1) stop ("x must be a 1-way table") 9 freq <- as.vector(x) 10 count <- as.numeric(names(x)) 11 } else { 12 if(!(!is.null(ncol(x)) && ncol(x) == 2)) 13 stop("x must be a 2-column matrix or data.frame") 14 freq <- as.vector(x[,1]) 15 count <- as.vector(x[,2]) 16 } 17 18 ## fill-in possibly missing cells 19 nfreq <- rep(0, max(count) + 1) 20 nfreq[count + 1] <- freq 21 freq <- nfreq 22 count <- 0:max(count) 23 n <- length(count) 24 25 ## starting value for degrees of freedom 26 df <- -1 27 28 type <- match.arg(type) 29 method <- match.arg(method) 30 31 switch(type, 32 33 "poisson" = { 34 35 if(!is.null(par)) { 36 if(!is.list(par)) 37 stop("`par' must be a named list") 38 if(names(par) != "lambda") 39 stop("`par' must specify `lambda'") 40 41 par <- par$lambda 42 method <- "fixed" 43 } 44 else if(method == "ML") { 45 df <- df - 1 46 par <- weighted.mean(count,freq) 47 } 48 else if(method == "MinChisq") { 49 df <- df - 1 50 51 chi2 <- function(x) 52 { 53 p.hat <- diff(c(0, ppois(count[-n], lambda = x), 1)) 54 expected <- sum(freq) * p.hat 55 sum((freq - expected)^2/expected) 56 } 57 58 par <- optimize(chi2, range(count))$minimum 59 } 60 par <- list(lambda = par) 61 p.hat <- dpois(count, lambda = par$lambda) 62 }, 63 64 "binomial" = { 65 size <- par$size 66 if(is.null(size)) { 67 size <- max(count) 68 warning("size was not given, taken as maximum count") 69 } 70 71 if(size > max(count)) { 72 nfreq <- rep(0, size + 1) 73 nfreq[count + 1] <- freq 74 freq <- nfreq 75 count <- 0:size 76 n <- length(count) 77 } 78 79 if(!is.null(par$prob)) { 80 if(!is.list(par)) 81 stop("`par' must be a named list and specify `prob'") 82 par <- par$prob 83 method <- "fixed" 84 } 85 else if(method == "ML") { 86 df <- df - 1 87 par <- weighted.mean(count/size, freq) 88 } 89 else if(method == "MinChisq") { 90 df <- df - 1 91 92 chi2 <- function(x) 93 { 94 p.hat <- diff(c(0, pbinom(count[-n], prob = x, size = size), 1)) 95 expected <- sum(freq) * p.hat 96 sum((freq - expected)^2/expected) 97 } 98 99 par <- optimize(chi2, c(0,1))$minimum 100 } 101 par <- list(prob = par, size = size) 102 p.hat <- dbinom(count, prob = par$prob, size = par$size) 103 }, 104 105 106 "nbinomial" = { 107 108 if(!is.null(par)) { 109 if(!is.list(par)) stop("`par' must be a named list") 110 if(!(isTRUE(all.equal(names(par), "size")) | isTRUE(all.equal(sort(names(par)), c("prob", "size"))))) 111 stop("`par' must specify `size' and possibly `prob'") 112 if(!is.null(par$prob)) method <- "fixed" 113 } 114 115 switch(method, 116 117 "ML" = { 118 if(is.null(par$size)) { 119 df <- df - 2 120 par <- fitdistr(rep(count, freq), "negative binomial")$estimate 121 par <- par[1]/c(1, sum(par)) 122 } else { 123 df <- df - 1 124 method <- c("ML", "with size fixed") 125 126 size <- par$size 127 xbar <- weighted.mean(count,freq) 128 par <- c(size, size/(xbar+size)) 129 } 130 }, 131 132 "MinChisq" = { 133 if(is.null(par$size)) { 134 df <- df - 2 135 136 ## MM 137 xbar <- weighted.mean(count,freq) 138 s2 <- var(rep(count,freq)) 139 p <- xbar / s2 140 size <- xbar^2/(s2 - xbar) 141 par1 <- c(size, p) 142 143 ## minChisq 144 chi2 <- function(x) 145 { 146 p.hat <- diff(c(0, pnbinom(count[-n], size = x[1], prob = x[2]), 1)) 147 expected <- sum(freq) * p.hat 148 sum((freq - expected)^2/expected) 149 } 150 151 par <- optim(par1, chi2)$par 152 } else { 153 df <- df - 1 154 method <- c("MinChisq", "with size fixed") 155 156 chi2 <- function(x) 157 { 158 p.hat <- diff(c(0, pnbinom(count[-n], size = par$size, prob = x), 1)) 159 expected <- sum(freq) * p.hat 160 sum((freq - expected)^2/expected) 161} 162 par <- c(par$size, optimize(chi2, c(0, 1))$minimum) 163 } 164 }, 165 166 "fixed" = { 167 par <- c(par$size, par$prob) 168 }) 169 170 par <- list(size = par[1], prob = par[2]) 171 p.hat <- dnbinom(count, size = par$size, prob = par$prob) 172 }) 173 174 expected <- sum(freq) * p.hat 175 176 df <- switch(method[1], 177 "MinChisq" = { length(freq) + df }, 178 "ML" = { sum(freq > 0) + df }, 179 "fixed" = { c(length(freq), sum(freq > 0)) + df } 180 ) 181 182 structure(list(observed = freq, 183 count = count, 184 fitted = expected, 185 type = type, 186 method = method, 187 df = df, 188 par = par), 189 class = "goodfit") 190} 191 192# does this need a residuals_type arg? 193print.goodfit <- function(x, residuals_type = c("pearson", "deviance", "raw"), ...) 194{ 195 residuals_type <- match.arg(residuals_type) 196 cat(paste("\nObserved and fitted values for", x$type, "distribution\n")) 197 if(x$method[1] == "fixed") 198 cat("with fixed parameters \n\n") 199 else 200 cat(paste("with parameters estimated by `", paste(x$method, collapse = " "), "' \n\n", sep = "")) 201 202 resids <- residuals(x, type = residuals_type) 203 RVAL <- cbind(x$count, x$observed, x$fitted, resids) 204 colnames(RVAL) <- c("count", "observed", "fitted", 205 paste(residuals_type, "residual")) 206 rownames(RVAL) <- rep("", nrow(RVAL)) 207 print(RVAL, ...) 208 invisible(x) 209} 210 211summary.goodfit <- function(object, ...) 212{ 213 df <- object$df 214 obsrvd <- object$observed 215 count <- object$count 216 expctd <- fitted(object) 217 218 G2 <- sum(ifelse(obsrvd == 0, 0, obsrvd * log(obsrvd/expctd))) * 2 219 220 n <- length(obsrvd) 221 pfun <- switch(object$type, 222 poisson = "ppois", 223 binomial = "pbinom", 224 nbinomial = "pnbinom") 225 p.hat <- diff(c(0, do.call(pfun, c(list(q = count[-n]), object$par)), 1)) 226 expctd <- p.hat * sum(obsrvd) 227 X2 <- sum((obsrvd - expctd)^2 / expctd) 228 229 names(G2) <- "Likelihood Ratio" 230 names(X2) <- "Pearson" 231 if(any(expctd < 5) & object$method[1] != "ML") 232 warning("Chi-squared approximation may be incorrect") 233 234 RVAL <- switch(object$method[1], 235 ML = G2, 236 MinChisq = X2, 237 fixed = c(X2, G2) 238 ) 239 240 RVAL <- cbind(RVAL, df, pchisq(RVAL, df = df, lower.tail = FALSE)) 241 colnames(RVAL) <- c("X^2", "df", "P(> X^2)") 242 243 cat(paste("\n\t Goodness-of-fit test for", object$type, "distribution\n\n")) 244 print(RVAL, ...) 245 invisible(RVAL) 246} 247 248plot.goodfit <- function(x, ...) 249{ 250 rootogram(x, ...) 251} 252 253fitted.goodfit <- function(object, ...) 254{ 255 object$fitted 256} 257 258residuals.goodfit <- function(object, type = c("pearson", "deviance", "raw"), ...) 259{ 260 obsrvd <- object$observed 261 expctd <- fitted(object) 262 count <- object$count 263 n <- length(obsrvd) 264 pfun <- switch(object$type, 265 poisson = "ppois", 266 binomial = "pbinom", 267 nbinomial = "pnbinom") 268 p.hat <- diff(c(0, do.call(pfun, c(list(q = count[-n]), object$par)), 1)) 269 expctd <- p.hat * sum(obsrvd) 270 271 res <- switch(match.arg(type), 272 pearson = (obsrvd - expctd) / sqrt(expctd), 273 deviance = ifelse(obsrvd == 0, 0, 274 obsrvd * log(obsrvd / expctd)), 275 obsrvd - expctd) 276 277 return(res) 278} 279 280predict.goodfit <- function(object, newcount = NULL, type = c("response", "prob"), ...) 281{ 282 if(is.null(newcount)) newcount <- object$count 283 type <- match.arg(type) 284 285 densfun <- switch(object$type, 286 poisson = "dpois", 287 binomial = "dbinom", 288 nbinomial = "dnbinom") 289 290 RVAL <- do.call(densfun, c(list(x = newcount), object$par)) 291 if (type == "response") RVAL <- RVAL * sum(object$observed) 292 return(RVAL) 293} 294 295