1################################ 2## HDP-HMM Negative Binomial Model 3## 4## Matthew Blackwell 5/22/2017 5################################ 6 7#' Markov Chain Monte Carlo for HDP-HSMM with a Negative 8#' Binomial outcome distribution 9#' 10#' This function generates a sample from the posterior distribution of 11#' a Hidden Semi-Markov Model with a Heirarchical Dirichlet Process 12#' and a Negative Binomial outcome distribution 13#' (Johnson and Willsky, 2013). The user supplies data and priors, and a 14#' sample from the posterior distribution is returned as an mcmc 15#' object, which can be subsequently analyzed with functions provided 16#' in the coda package. 17#' 18#' \code{HDPHSMMnegbin} simulates from the posterior distribution of a 19#' HDP-HSMM with a Negative Binomial outcome distribution, 20#' allowing for multiple, arbitrary changepoints in the model. The details of the 21#' model are discussed in Johnson & Willsky (2013). The implementation here is 22#' based on a weak-limit approximation, where there is a large, though 23#' finite number of regimes that can be switched between. Unlike other 24#' changepoint models in \code{MCMCpack}, the HDP-HSMM approach allows 25#' for the state sequence to return to previous visited states. 26#' 27#' The model takes the following form, where we show the fixed-limit version: 28#' 29#' \deqn{y_t \sim \mathcal{P}oisson(\nu_t\mu_t)} 30#' 31#' \deqn{\mu_t = x_t ' \beta_k,\;\; k = 1, \ldots, K} 32#' 33#' \deqn{\nu_t \sim \mathcal{G}amma(\rho_k, \rho_k)} 34#' 35#' Where \eqn{K} is an upper bound on the number of states and 36#' \eqn{\beta_k} and \eqn{\rho_k} are parameters when a state is 37#' \eqn{k} at \eqn{t}. 38#' 39#' In the HDP-HSMM, there is a super-state sequence that, for a given 40#' observation, is drawn from the transition distribution and then a 41#' duration is drawn from a duration distribution to determin how long 42#' that state will stay active. After that duration, a new super-state 43#' is drawn from the transition distribution, where self-transitions 44#' are disallowed. The transition probabilities between states are 45#' assumed to follow a heirarchical Dirichlet process: 46#' 47#' \deqn{\pi_k \sim \mathcal{D}irichlet(\alpha\delta_1, \ldots , 48#' \alpha\delta_K)} 49#' 50#' \deqn{\delta \sim \mathcal{D}irichlet(\gamma/K, \ldots, \gamma/K)} 51#' 52#' In the algorithm itself, these \eqn{\pi} vectors are modified to 53#' remove self-transitions as discussed above. There is a unique 54#' duration distribution for each regime with the following 55#' parameters: 56#' 57#' \deqn{D_k \sim \mathcal{N}egBin(r, \omega_k)} 58#' 59#' \deqn{\omega_k \sim \mathcal{B}eta(a_{\omega,k}, b_{\omega, k})} 60#' 61#' We assume Gaussian distribution for prior of \eqn{\beta}: 62#' 63#' \deqn{\beta_k \sim \mathcal{N}(b_0,B_0^{-1}),\;\; m = 1, \ldots, K} 64#' 65#' The overdispersion parameters have a prior with the following form: 66#' 67#' \deqn{f(\rho_k|e,f,g) \propto \rho^{e-1}(\rho + g)^{-(e+f)}} 68#' 69#' The model is simulated via blocked Gibbs conditonal on the states. 70#' The \eqn{\beta} being simulated via the auxiliary mixture sampling 71#' method of Fuerhwirth-Schanetter et al. (2009). The \eqn{\rho} is 72#' updated via slice sampling. The \eqn{\nu_t} are updated their 73#' (conjugate) full conditional, which is also Gamma. The states and 74#' their durations are drawn as in Johnson & Willsky (2013). 75#' 76#' @param formula Model formula. 77#' 78#' @param data Data frame. 79#' 80#' @param K The number of regimes under consideration. This should be 81#' larger than the hypothesized number of regimes in the data. Note 82#' that the sampler will likely visit fewer than \code{K} regimes. 83#' 84#' @param burnin The number of burn-in iterations for the sampler. 85#' 86#' @param mcmc The number of Metropolis iterations for the sampler. 87#' 88#' @param thin The thinning interval used in the simulation. The number of 89#' mcmc iterations must be divisible by this value. 90#' 91#' @param verbose A switch which determines whether or not the progress of the 92#' sampler is printed to the screen. If \code{verbose} is greater than 0 the 93#' iteration number, the current beta vector, and the Metropolis acceptance 94#' rate are printed to the screen every \code{verbose}th iteration. 95#' 96#' @param seed The seed for the random number generator. If NA, the Mersenne 97#' Twister generator is used with default seed 12345; if an integer is passed 98#' it is used to seed the Mersenne twister. The user can also pass a list of 99#' length two to use the L'Ecuyer random number generator, which is suitable 100#' for parallel computation. The first element of the list is the L'Ecuyer 101#' seed, which is a vector of length six or NA (if NA a default seed of 102#' \code{rep(12345,6)} is used). The second element of list is a positive 103#' substream number. See the MCMCpack specification for more details. 104#' 105#' 106#' @param b0 The prior mean of \eqn{\beta}. This can either be a scalar 107#' or a column vector with dimension equal to the number of betas. If this 108#' takes a scalar value, then that value will serve as the prior mean for all 109#' of the betas. 110#' 111#' @param B0 The prior precision of \eqn{\beta}. This can either be a 112#' scalar or a square matrix with dimensions equal to the number of betas. If 113#' this takes a scalar value, then that value times an identity matrix serves 114#' as the prior precision of beta. Default value of 0 is equivalent to an 115#' improper uniform prior for beta. 116#' 117#' @param e The hyperprior for the distribution \eqn{\rho} See details. 118#' 119#' @param f The hyperprior for the distribution \eqn{\rho}. See details. 120#' 121#' @param g The hyperprior for the distribution \eqn{\rho}. See details. 122#' 123#' @param r Parameter of the Negative Binomial prior for regime 124#' durations. It is the target number of successful trials. Must be 125#' strictly positive. Higher values increase the variance of the 126#' duration distributions. 127#' 128#' @param a.omega,b.omega Paramaters for the Beta prior on 129#' \eqn{\omega}, which determines the regime length distribution, 130#' which is Negative Binomial, with parameters \code{r} and \code{omega}. 131 132#' @param beta.start The starting value for the \eqn{\beta} vector. This 133#' can either be a scalar or a column vector with dimension equal to the number 134#' of betas. If this takes a scalar value, then that value will serve as the 135#' starting value for all of the betas. The default value of NA will use the 136#' maximum likelihood estimate of \eqn{\beta} as the starting value 137#' for all regimes. 138#' 139#' @param rho.start The starting value for the \eqn{\rho} variable. 140#' This can either be a scalar or a column vector with dimension 141#' equal to the number of regimes. If the value is scalar, it will 142#' be used for all regimes. The default value is a vector of ones. 143#' 144#' @param nu.start The starting values for the random effect, 145#' \eqn{\nu}. The default value is a vector of ones. 146#' 147#' @param rho.step Tuning parameter for the slice sampling approach to 148#' sampling \eqn{rho}. Determines the size of the step-out used to 149#' find the correct slice to draw from. Lower values are more 150#' accurate, but will take longer (up to a fixed searching limit). 151#' Default is 0.1. 152#' 153#' 154#' @param a.alpha,b.alpha Shape and scale parameters for the Gamma 155#' distribution on \eqn{\alpha}. 156#' 157#' @param a.gamma,b.gamma Shape and scale parameters for the Gamma 158#' distribution on \eqn{\gamma}. 159#' 160#' @param alpha.start,gamma.start Scalar starting values for the 161#' \eqn{\alpha}, and \eqn{\gamma} parameters. 162#' 163#' @param omega.start A vector of starting values for the probability 164#' of success parameter in the Negative Binomial distribution that 165#' governs the duration distributions. 166#' 167#' @param P.start Initial transition matrix between regimes. Should be 168#' a \code{K} by \code{K} matrix. If not provided, the default value 169#' will be uniform transition distributions. 170#' 171#' @param ... further arguments to be passed. 172#' 173#' @return An mcmc object that contains the posterior sample. This object can 174#' be summarized by functions provided by the coda package. 175#' 176#' @export 177#' 178#' @seealso \code{\link{MCMCnegbinChange}}, 179#' \code{\link{HDPHMMnegbin}}, 180#' 181#' @references Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park. 2011. 182#' ``MCMCpack: Markov Chain Monte Carlo in R.'', \emph{Journal of Statistical 183#' Software}. 42(9): 1-21. \doi{10.18637/jss.v042.i09}. 184#' 185#' Daniel Pemstein, Kevin M. Quinn, and Andrew D. Martin. 2007. \emph{Scythe 186#' Statistical Library 1.0.} \url{http://scythe.lsa.umich.edu}. 187#' 188#' Sylvia Fruehwirth-Schnatter, Rudolf Fruehwirth, Leonhard Held, and 189#' Havard Rue. 2009. ``Improved auxiliary mixture sampling for 190#' hierarchical models of non-Gaussian data'', \emph{Statistics 191#' and Computing} 19(4): 479-492. 192#' <doi:10.1007/s11222-008-9109-4> 193#' 194#' Matthew Blackwell. 2017. ``Game Changers: Detecting Shifts in 195#' Overdispersed Count Data,'' \emph{Political Analysis} 196#' 26(2), 230-239. <doi:10.1017/pan.2017.42> 197#' 198#' Matthew J. Johnson and Alan S. Willsky. 2013. ``Bayesian Nonparametric Hidden Semi-Markov Models.'' \emph{Journal of Machine Learning Research}, 14(Feb), 673-701. 199#' 200#' @keywords models 201#' 202#' @examples 203#' 204#' \dontrun{ 205#' n <- 150 206#' reg <- 3 207#' true.s <- gl(reg, n/reg, n) 208#' rho.true <- c(1.5, 0.5, 3) 209#' b1.true <- c(1, -2, 2) 210#' x1 <- runif(n, 0, 2) 211#' nu.true <- rgamma(n, rho.true[true.s], rho.true[true.s]) 212#' mu <- nu.true * exp(1 + x1 * b1.true[true.s]) 213#' y <- rpois(n, mu) 214#' 215#' posterior <- HDPHSMMnegbin(y ~ x1, K = 10, verbose = 1000, 216#' e = 2, f = 2, g = 10, 217#' b0 = 0, B0 = 1/9, 218#' a.omega = 1, b.omega = 100, r = 1, 219#' rho.step = rep(0.75, times = 10), 220#' seed = list(NA, 2), 221#' omega.start = 0.05, gamma.start = 10, 222#' alpha.start = 5) 223#' 224#' plotHDPChangepoint(posterior, ylab="Density", start=1) 225#' } 226#' 227 228 229 230"HDPHSMMnegbin"<- 231 function(formula, data = parent.frame(), K = 10, 232 b0 = 0, B0 = 1, 233 a.alpha = 1, b.alpha = 0.1, a.gamma = 1, b.gamma = 0.1, 234 a.omega, b.omega, e = 2, f = 2, g = 10, r = 1, 235 burnin = 1000, mcmc = 1000, thin = 1, verbose = 0, 236 seed = NA, beta.start = NA, P.start = NA, 237 rho.start = NA, rho.step, nu.start = NA, 238 omega.start = NA, gamma.start = 0.5, alpha.start = 100, ...) { 239 240 ## form response and model matrices 241 holder <- parse.formula(formula, data) 242 y <- holder[[1]] 243 X <- holder[[2]] 244 xnames <- holder[[3]] 245 J <- ncol(X) 246 n <- length(y) 247 n.arrival<- y + 1 248 NT <- max(n.arrival) 249 tot.comp <- n + sum(y) 250 251 ## check iteration parameters 252 check.mcmc.parameters(burnin, mcmc, thin) 253 totiter <- mcmc + burnin 254 cl <- match.call() 255 nstore <- mcmc/thin 256 257 ## seeds 258 seeds <- form.seeds(seed) 259 lecuyer <- seeds[[1]] 260 seed.array <- seeds[[2]] 261 lecuyer.stream <- seeds[[3]] 262 if(!any(is.na(seed)) & (length(seed) == 1)) set.seed(seed) 263 264 265 mvn.prior <- form.mvn.prior(b0, B0, J) 266 b0 <- mvn.prior[[1]] 267 B0 <- mvn.prior[[2]] 268 269 if (K == 1) { 270 stop("HDP-HSMM only works with K > 1") 271 } 272 273 ## get initial values of tau from observed y 274 Pstart <- check.hdp.P(P.start, K, n, 1/K) 275 276 betastart <- beta.negbin.start(beta.start, K, J, formula, data) 277 if (is.na(rho.start[1])) { 278 rho.start <- rep(1, times = K) 279 } 280 if (!(length(rho.step) %in% c(1, K))) { 281 stop("rho.step has the wrong dimensions") 282 } 283 if (length(rho.step) == 1) { 284 rho.step <- rep(rho.step, times = K) 285 } 286 if (is.na(nu.start[1])) { 287 nu.start <- rep(1, times = n) 288 } 289 if (!(length(omega.start) %in% c(1, K))) { 290 stop("omega.start has the wrong dimensions") 291 } 292 if (is.na(omega.start[1])) { 293 omega.start <- rbeta(K, a.omega, b.omega) 294 } else { 295 if (length(omega.start) == 1) { 296 omega.start <- rep(omega.start, times = K) 297 } 298 } 299 ## new taus 300 taustart <- tau.negbin.initial(y) 301 component1start <- round(runif(tot.comp, 1, 10)) 302 303 ## call C++ code to draw sample 304 posterior <- .C("cHDPHSMMnegbin", 305 betaout = as.double(rep(0.0, nstore * K * J)), 306 Pout = as.double(rep(0.0, nstore * K * K)), 307 omegaout = as.double(rep(0.0, nstore * K)), 308 sout = as.double(rep(0.0, nstore * n)), 309 nuout = as.double(rep(0.0, nstore * n)), 310 rhoout = as.double(rep(0.0, nstore * K)), 311 tau1out = as.double(rep(0.0, nstore * n)), 312 tau2out = as.double(rep(0.0, nstore * n)), 313 comp1out = as.integer(rep(0, nstore * n)), 314 comp2out = as.integer(rep(0, nstore * n)), 315 sr1out = as.double(rep(0, nstore * n)), 316 sr2out = as.double(rep(0, nstore * n)), 317 mr1out = as.double(rep(0, nstore*n)), 318 mr2out = as.double(rep(0, nstore*n)), 319 gammaout = as.double(rep(0, nstore)), 320 alphaout = as.double(rep(0, nstore)), 321 rhosizes = as.double(rep(0.0, K)), 322 Ydata = as.double(y), 323 Yrow = as.integer(nrow(y)), 324 Ycol = as.integer(ncol(y)), 325 Xdata = as.double(X), 326 Xrow = as.integer(nrow(X)), 327 Xcol = as.integer(ncol(X)), 328 K = as.integer(K), 329 burnin = as.integer(burnin), 330 mcmc = as.integer(mcmc), 331 thin = as.integer(thin), 332 verbose = as.integer(verbose), 333 betastart = as.double(betastart), 334 Pstart = as.double(Pstart), 335 nustart = as.double(nu.start), 336 rhostart = as.double(rho.start), 337 tau1start = as.double(taustart$tau1), 338 tau2start = as.double(taustart$tau2), 339 component1start = as.double(component1start), 340 alphastart = as.double(alpha.start), 341 gammastart = as.double(gamma.start), 342 omegastart = as.double(omega.start), 343 a_alpha = as.double(a.alpha), 344 b_alpha = as.double(b.alpha), 345 a_gamma = as.double(a.gamma), 346 b_gamma = as.double(b.gamma), 347 a_omega = as.double(a.omega), 348 b_omega = as.double(b.omega), 349 e = as.double(e), 350 f = as.double(f), 351 g = as.double(g), 352 r = as.double(r), 353 rhostepdata = as.double(rho.step), 354 lecuyer = as.integer(lecuyer), 355 seedarray = as.integer(seed.array), 356 lecuyerstream = as.integer(lecuyer.stream), 357 b0data = as.double(b0), 358 B0data = as.double(B0), 359 PACKAGE="MCMCpack") 360 361 362 363 ## pull together matrix and build MCMC object to return 364 beta.holder <- matrix(posterior$betaout, nstore, ) 365 P.holder <- matrix(posterior$Pout, nstore, ) 366 s.holder <- matrix(posterior$sout, nstore, ) 367 nu.holder <- matrix(posterior$nuout, nstore, ) 368 rho.holder <- matrix(posterior$rhoout, nstore, ) 369 omega.holder <- matrix(posterior$omegaout, nstore, ) 370 tau1.holder <- matrix(posterior$tau1out, nstore, ) 371 tau2.holder <- matrix(posterior$tau2out, nstore, ) 372 comp1.holder <- matrix(posterior$comp1out, nstore, ) 373 comp2.holder <- matrix(posterior$comp2out, nstore, ) 374 sr1.holder <- matrix(posterior$sr1out, nstore, ) 375 sr2.holder <- matrix(posterior$sr2out, nstore, ) 376 mr1.holder <- matrix(posterior$mr1out, nstore, ) 377 mr2.holder <- matrix(posterior$mr2out, nstore, ) 378 379 nsts <- apply(s.holder, 1, function(x) length(unique(x))) 380 mode.mod <- as.numeric(names(which.max(table(nsts)))) 381 pr.chpt <- rowMeans(rbind(0, diff(t(s.holder)) != 0)) 382 output <- mcmc(data=beta.holder, start=burnin+1, end=burnin + mcmc, thin=thin) 383 varnames(output) <- sapply(c(1:K), function(i) { paste(xnames, 384 "_regime", i, sep = "")}) 385 attr(output, "title") <- "HDPHSMMnegbin Posterior Sample" 386 attr(output, "formula") <- formula 387 attr(output, "y") <- y 388 attr(output, "X") <- X 389 attr(output, "K") <- K 390 attr(output, "call") <- cl 391 ## attr(output, "prob.state") <- ps.holder/nstore 392 attr(output, "pr.chpt") <- pr.chpt 393 attr(output, "s.store") <- s.holder 394 attr(output, "P.store") <- P.holder 395 attr(output, "rho.store") <- rho.holder 396 attr(output, "nu.store") <- nu.holder 397 ## attr(output, "tau1.store") <- tau1.holder 398 ## attr(output, "tau2.store") <- tau2.holder 399 ## attr(output, "comp1.store") <- comp1.holder 400 ## attr(output, "comp2.store") <- comp2.holder 401 ## attr(output, "sr1.store") <- sr1.holder 402 ## attr(output, "sr2.store") <- sr2.holder 403 ## attr(output, "mr1.store") <- mr1.holder 404 ## attr(output, "mr2.store") <- mr2.holder 405 attr(output, "rho.step") <- posterior$rhosizes 406 attr(output, "num.regimes") <- nsts 407 attr(output, "gamma.store") <- posterior$gammaout 408 attr(output, "omega.store") <- omega.holder 409 attr(output, "alpha.store") <- posterior$alphaout 410 411 return(output) 412 } 413 414 415## initial values of tau in MCMCnegbinChange 416"tau.negbin.initial" <- function(y) { 417 tau1 <- rep(NA, length(y)) 418 tau2 <- rep(NA, length(y)) 419 lambda.t <- exp(5) 420 for (t in 1:length(y)){ 421 nt <- y[t] 422 if (nt==0) { 423 tau1[t] <- 1 + rexp(1, lambda.t) 424 tau2[t] <- 0 425 } 426 else{ 427 xi <- rexp(1, lambda.t) 428 tau2[t] <- rbeta(1, nt, 1) 429 tau1[t] <- 1 - tau2[t] + xi 430 } 431 } 432 return(list(tau1 = tau1, tau2 = tau2)) 433} 434