1################################
2## HDP-HMM Negative Binomial Model
3##
4## Matthew Blackwell 5/22/2017
5################################
6
7#' Markov Chain Monte Carlo for HDP-HSMM with a Negative
8#' Binomial outcome distribution
9#'
10#' This function generates a sample from the posterior distribution of
11#' a Hidden Semi-Markov Model with a Heirarchical Dirichlet Process
12#' and a Negative Binomial outcome distribution
13#' (Johnson and Willsky, 2013).  The user supplies data and priors, and a
14#' sample from the posterior distribution is returned as an mcmc
15#' object, which can be subsequently analyzed with functions provided
16#' in the coda package.
17#'
18#' \code{HDPHSMMnegbin} simulates from the posterior distribution of a
19#' HDP-HSMM with a Negative Binomial outcome distribution,
20#' allowing for multiple, arbitrary changepoints in the model. The details of the
21#' model are discussed in Johnson & Willsky (2013). The implementation here is
22#' based on a weak-limit approximation, where there is a large, though
23#' finite number of regimes that can be switched between. Unlike other
24#' changepoint models in \code{MCMCpack}, the HDP-HSMM approach allows
25#' for the state sequence to return to previous visited states.
26#'
27#'  The model takes the following form, where we show the fixed-limit version:
28#'
29#' \deqn{y_t \sim \mathcal{P}oisson(\nu_t\mu_t)}
30#'
31#' \deqn{\mu_t = x_t ' \beta_k,\;\; k = 1, \ldots, K}
32#'
33#' \deqn{\nu_t \sim \mathcal{G}amma(\rho_k, \rho_k)}
34#'
35#' Where \eqn{K} is an upper bound on the number of states and
36#' \eqn{\beta_k} and \eqn{\rho_k} are parameters when a state is
37#' \eqn{k} at \eqn{t}.
38#'
39#' In the HDP-HSMM, there is a super-state sequence that, for a given
40#' observation, is drawn from the transition distribution and then a
41#' duration is drawn from a duration distribution to determin how long
42#' that state will stay active. After that duration, a new super-state
43#' is drawn from the transition distribution, where self-transitions
44#' are  disallowed. The transition probabilities between states are
45#' assumed to follow a heirarchical Dirichlet process:
46#'
47#' \deqn{\pi_k \sim \mathcal{D}irichlet(\alpha\delta_1, \ldots ,
48#' \alpha\delta_K)}
49#'
50#' \deqn{\delta \sim \mathcal{D}irichlet(\gamma/K, \ldots, \gamma/K)}
51#'
52#' In the algorithm itself, these \eqn{\pi} vectors are modified to
53#' remove self-transitions as discussed above. There is a unique
54#' duration distribution for each regime with the following
55#' parameters:
56#'
57#' \deqn{D_k \sim \mathcal{N}egBin(r, \omega_k)}
58#'
59#' \deqn{\omega_k \sim \mathcal{B}eta(a_{\omega,k}, b_{\omega, k})}
60#'
61#' We assume Gaussian distribution for prior of \eqn{\beta}:
62#'
63#' \deqn{\beta_k \sim \mathcal{N}(b_0,B_0^{-1}),\;\; m = 1, \ldots, K}
64#'
65#' The overdispersion parameters have a prior with the following form:
66#'
67#' \deqn{f(\rho_k|e,f,g) \propto \rho^{e-1}(\rho + g)^{-(e+f)}}
68#'
69#' The model is simulated via blocked Gibbs conditonal on the states.
70#' The \eqn{\beta} being simulated via the auxiliary mixture sampling
71#' method of Fuerhwirth-Schanetter et al. (2009). The \eqn{\rho} is
72#' updated via slice sampling. The \eqn{\nu_t} are updated their
73#' (conjugate) full conditional, which is also Gamma. The states and
74#' their durations are drawn as in Johnson & Willsky (2013).
75#'
76#' @param formula Model formula.
77#'
78#' @param data Data frame.
79#'
80#' @param K The number of regimes under consideration. This should be
81#'   larger than the hypothesized number of regimes in the data. Note
82#'   that the sampler will likely visit fewer than \code{K} regimes.
83#'
84#' @param burnin The number of burn-in iterations for the sampler.
85#'
86#' @param mcmc The number of Metropolis iterations for the sampler.
87#'
88#' @param thin The thinning interval used in the simulation.  The number of
89#' mcmc iterations must be divisible by this value.
90#'
91#' @param verbose A switch which determines whether or not the progress of the
92#' sampler is printed to the screen.  If \code{verbose} is greater than 0 the
93#' iteration number, the current beta vector, and the Metropolis acceptance
94#' rate are printed to the screen every \code{verbose}th iteration.
95#'
96#' @param seed The seed for the random number generator.  If NA, the Mersenne
97#' Twister generator is used with default seed 12345; if an integer is passed
98#' it is used to seed the Mersenne twister.  The user can also pass a list of
99#' length two to use the L'Ecuyer random number generator, which is suitable
100#' for parallel computation.  The first element of the list is the L'Ecuyer
101#' seed, which is a vector of length six or NA (if NA a default seed of
102#' \code{rep(12345,6)} is used).  The second element of list is a positive
103#' substream number. See the MCMCpack specification for more details.
104#'
105#'
106#' @param b0 The prior mean of \eqn{\beta}.  This can either be a scalar
107#' or a column vector with dimension equal to the number of betas. If this
108#' takes a scalar value, then that value will serve as the prior mean for all
109#' of the betas.
110#'
111#' @param B0 The prior precision of \eqn{\beta}.  This can either be a
112#' scalar or a square matrix with dimensions equal to the number of betas.  If
113#' this takes a scalar value, then that value times an identity matrix serves
114#' as the prior precision of beta. Default value of 0 is equivalent to an
115#' improper uniform prior for beta.
116#'
117#' @param e The hyperprior for the distribution \eqn{\rho} See details.
118#'
119#' @param f The hyperprior for the distribution \eqn{\rho}. See details.
120#'
121#' @param g The hyperprior for the distribution \eqn{\rho}. See details.
122#'
123#' @param r Parameter of the Negative Binomial prior for regime
124#'   durations. It is the target number of successful trials. Must be
125#'   strictly positive. Higher values increase the variance of the
126#'   duration distributions.
127#'
128#' @param a.omega,b.omega Paramaters for the Beta prior on
129#'   \eqn{\omega}, which determines the regime length distribution,
130#'   which is Negative Binomial, with parameters \code{r} and \code{omega}.
131
132#' @param beta.start The starting value for the \eqn{\beta} vector.  This
133#' can either be a scalar or a column vector with dimension equal to the number
134#' of betas. If this takes a scalar value, then that value will serve as the
135#' starting value for all of the betas.  The default value of NA will use the
136#' maximum likelihood estimate of \eqn{\beta} as the starting value
137#'   for all regimes.
138#'
139#' @param rho.start The starting value for the \eqn{\rho} variable.
140#'   This can either be a scalar or a column vector with dimension
141#'   equal to the number of regimes. If the value is scalar, it will
142#'   be used for all regimes. The default value is a vector of ones.
143#'
144#' @param nu.start The starting values for the random effect,
145#'     \eqn{\nu}. The default value is a vector of ones.
146#'
147#' @param rho.step Tuning parameter for the slice sampling approach to
148#'     sampling \eqn{rho}. Determines the size of the step-out used to
149#'     find the correct slice to draw from. Lower values are more
150#'     accurate, but will take longer (up to a fixed searching limit).
151#'     Default is 0.1.
152#'
153#'
154#' @param a.alpha,b.alpha Shape and scale parameters for the Gamma
155#'   distribution on \eqn{\alpha}.
156#'
157#' @param a.gamma,b.gamma Shape and scale parameters for the Gamma
158#'   distribution on \eqn{\gamma}.
159#'
160#' @param alpha.start,gamma.start Scalar starting values for the
161#'   \eqn{\alpha}, and \eqn{\gamma} parameters.
162#'
163#' @param omega.start A vector of starting values for the probability
164#'   of success parameter in the Negative Binomial distribution that
165#'   governs the duration distributions.
166#'
167#' @param P.start Initial transition matrix between regimes. Should be
168#'   a \code{K} by \code{K} matrix. If not provided, the default value
169#'   will be uniform transition distributions.
170#'
171#' @param ... further arguments to be passed.
172#'
173#' @return An mcmc object that contains the posterior sample.  This object can
174#' be summarized by functions provided by the coda package.
175#'
176#' @export
177#'
178#' @seealso \code{\link{MCMCnegbinChange}},
179#'   \code{\link{HDPHMMnegbin}},
180#'
181#' @references Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park. 2011.
182#' ``MCMCpack: Markov Chain Monte Carlo in R.'', \emph{Journal of Statistical
183#' Software}. 42(9): 1-21.  \doi{10.18637/jss.v042.i09}.
184#'
185#' Daniel Pemstein, Kevin M. Quinn, and Andrew D. Martin.  2007.  \emph{Scythe
186#' Statistical Library 1.0.} \url{http://scythe.lsa.umich.edu}.
187#'
188#' Sylvia Fruehwirth-Schnatter, Rudolf Fruehwirth, Leonhard Held, and
189#'     Havard Rue. 2009. ``Improved auxiliary mixture sampling for
190#'     hierarchical models of non-Gaussian data'', \emph{Statistics
191#'     and Computing} 19(4): 479-492.
192#'     <doi:10.1007/s11222-008-9109-4>
193#'
194#' Matthew Blackwell. 2017. ``Game Changers: Detecting Shifts in
195#'   Overdispersed Count Data,'' \emph{Political Analysis}
196#'   26(2), 230-239. <doi:10.1017/pan.2017.42>
197#'
198#' Matthew J. Johnson and Alan S. Willsky. 2013. ``Bayesian Nonparametric Hidden Semi-Markov Models.'' \emph{Journal of Machine Learning Research}, 14(Feb), 673-701.
199#'
200#' @keywords models
201#'
202#' @examples
203#'
204#'  \dontrun{
205#'    n <- 150
206#'    reg <- 3
207#'    true.s <- gl(reg, n/reg, n)
208#'    rho.true <- c(1.5, 0.5, 3)
209#'    b1.true <- c(1, -2, 2)
210#'    x1 <- runif(n, 0, 2)
211#'    nu.true <- rgamma(n, rho.true[true.s], rho.true[true.s])
212#'    mu <- nu.true * exp(1 + x1 * b1.true[true.s])
213#'    y <- rpois(n, mu)
214#'
215#'    posterior <- HDPHSMMnegbin(y ~ x1, K = 10, verbose = 1000,
216#'                           e = 2, f = 2, g = 10,
217#'                           b0 = 0, B0 = 1/9,
218#'                           a.omega = 1, b.omega = 100, r = 1,
219#'                           rho.step = rep(0.75, times = 10),
220#'                           seed = list(NA, 2),
221#'                           omega.start = 0.05, gamma.start = 10,
222#'                           alpha.start = 5)
223#'
224#'    plotHDPChangepoint(posterior, ylab="Density", start=1)
225#'    }
226#'
227
228
229
230"HDPHSMMnegbin"<-
231  function(formula, data = parent.frame(), K = 10,
232           b0 = 0, B0 = 1,
233           a.alpha = 1, b.alpha = 0.1, a.gamma = 1, b.gamma = 0.1,
234           a.omega, b.omega, e = 2, f = 2, g = 10, r = 1,
235           burnin = 1000, mcmc = 1000, thin = 1, verbose = 0,
236           seed = NA, beta.start = NA, P.start = NA,
237           rho.start = NA, rho.step, nu.start = NA,
238           omega.start = NA, gamma.start = 0.5, alpha.start = 100, ...) {
239
240    ## form response and model matrices
241    holder <- parse.formula(formula, data)
242    y <- holder[[1]]
243    X <- holder[[2]]
244    xnames <- holder[[3]]
245    J <- ncol(X)
246    n <- length(y)
247    n.arrival<-  y + 1
248    NT      <-  max(n.arrival)
249    tot.comp <-  n + sum(y)
250
251    ## check iteration parameters
252    check.mcmc.parameters(burnin, mcmc, thin)
253    totiter <- mcmc + burnin
254    cl <- match.call()
255    nstore <- mcmc/thin
256
257    ## seeds
258    seeds <- form.seeds(seed)
259    lecuyer <- seeds[[1]]
260    seed.array <- seeds[[2]]
261    lecuyer.stream <- seeds[[3]]
262    if(!any(is.na(seed)) & (length(seed) == 1)) set.seed(seed)
263
264
265    mvn.prior <- form.mvn.prior(b0, B0, J)
266    b0 <- mvn.prior[[1]]
267    B0 <- mvn.prior[[2]]
268
269    if (K == 1) {
270      stop("HDP-HSMM only works with K > 1")
271    }
272
273    ## get initial values of tau from observed y
274    Pstart <- check.hdp.P(P.start, K, n, 1/K)
275
276    betastart  <- beta.negbin.start(beta.start, K, J, formula, data)
277    if (is.na(rho.start[1])) {
278      rho.start <- rep(1, times = K)
279    }
280    if (!(length(rho.step) %in% c(1, K))) {
281      stop("rho.step has the wrong dimensions")
282    }
283    if (length(rho.step) == 1) {
284      rho.step <- rep(rho.step, times = K)
285    }
286    if (is.na(nu.start[1])) {
287      nu.start <- rep(1, times = n)
288    }
289    if (!(length(omega.start) %in% c(1, K))) {
290      stop("omega.start has the wrong dimensions")
291    }
292    if (is.na(omega.start[1])) {
293      omega.start <- rbeta(K, a.omega, b.omega)
294    } else {
295      if (length(omega.start) == 1) {
296        omega.start <- rep(omega.start, times = K)
297      }
298    }
299    ## new taus
300    taustart <- tau.negbin.initial(y)
301    component1start  <-  round(runif(tot.comp, 1, 10))
302
303    ## call C++ code to draw sample
304    posterior <- .C("cHDPHSMMnegbin",
305                    betaout = as.double(rep(0.0, nstore * K * J)),
306                    Pout = as.double(rep(0.0, nstore * K * K)),
307                    omegaout = as.double(rep(0.0, nstore * K)),
308                    sout = as.double(rep(0.0, nstore * n)),
309                    nuout = as.double(rep(0.0, nstore * n)),
310                    rhoout = as.double(rep(0.0, nstore * K)),
311                    tau1out = as.double(rep(0.0, nstore * n)),
312                    tau2out = as.double(rep(0.0, nstore * n)),
313                    comp1out = as.integer(rep(0, nstore * n)),
314                    comp2out = as.integer(rep(0, nstore * n)),
315                    sr1out = as.double(rep(0, nstore * n)),
316                    sr2out = as.double(rep(0, nstore * n)),
317                    mr1out = as.double(rep(0, nstore*n)),
318                    mr2out = as.double(rep(0, nstore*n)),
319                    gammaout = as.double(rep(0, nstore)),
320                    alphaout = as.double(rep(0, nstore)),
321                    rhosizes = as.double(rep(0.0, K)),
322                    Ydata = as.double(y),
323                    Yrow = as.integer(nrow(y)),
324                    Ycol = as.integer(ncol(y)),
325                    Xdata = as.double(X),
326                    Xrow = as.integer(nrow(X)),
327                    Xcol = as.integer(ncol(X)),
328                    K = as.integer(K),
329                    burnin = as.integer(burnin),
330                    mcmc = as.integer(mcmc),
331                    thin = as.integer(thin),
332                    verbose = as.integer(verbose),
333                    betastart = as.double(betastart),
334                    Pstart = as.double(Pstart),
335                    nustart = as.double(nu.start),
336                    rhostart = as.double(rho.start),
337                    tau1start = as.double(taustart$tau1),
338                    tau2start = as.double(taustart$tau2),
339                    component1start = as.double(component1start),
340                    alphastart = as.double(alpha.start),
341                    gammastart = as.double(gamma.start),
342                    omegastart = as.double(omega.start),
343                    a_alpha = as.double(a.alpha),
344                    b_alpha = as.double(b.alpha),
345                    a_gamma = as.double(a.gamma),
346                    b_gamma = as.double(b.gamma),
347                    a_omega = as.double(a.omega),
348                    b_omega = as.double(b.omega),
349                    e = as.double(e),
350                    f = as.double(f),
351                    g = as.double(g),
352                    r = as.double(r),
353                    rhostepdata = as.double(rho.step),
354                    lecuyer = as.integer(lecuyer),
355                    seedarray = as.integer(seed.array),
356                    lecuyerstream = as.integer(lecuyer.stream),
357                    b0data = as.double(b0),
358                    B0data = as.double(B0),
359                    PACKAGE="MCMCpack")
360
361
362
363    ## pull together matrix and build MCMC object to return
364    beta.holder <- matrix(posterior$betaout, nstore, )
365    P.holder    <- matrix(posterior$Pout, nstore, )
366    s.holder    <- matrix(posterior$sout, nstore, )
367    nu.holder   <- matrix(posterior$nuout, nstore, )
368    rho.holder  <- matrix(posterior$rhoout, nstore, )
369    omega.holder  <- matrix(posterior$omegaout, nstore, )
370    tau1.holder <- matrix(posterior$tau1out, nstore, )
371    tau2.holder <- matrix(posterior$tau2out, nstore, )
372    comp1.holder <- matrix(posterior$comp1out, nstore, )
373    comp2.holder <- matrix(posterior$comp2out, nstore, )
374    sr1.holder <- matrix(posterior$sr1out, nstore, )
375    sr2.holder <- matrix(posterior$sr2out, nstore, )
376    mr1.holder <- matrix(posterior$mr1out, nstore, )
377    mr2.holder <- matrix(posterior$mr2out, nstore, )
378
379    nsts <- apply(s.holder, 1, function(x) length(unique(x)))
380    mode.mod <- as.numeric(names(which.max(table(nsts))))
381    pr.chpt <- rowMeans(rbind(0, diff(t(s.holder)) != 0))
382    output <- mcmc(data=beta.holder, start=burnin+1, end=burnin + mcmc, thin=thin)
383      varnames(output)  <- sapply(c(1:K), function(i) { paste(xnames,
384    "_regime", i, sep = "")})
385    attr(output, "title") <- "HDPHSMMnegbin Posterior Sample"
386    attr(output, "formula") <- formula
387    attr(output, "y")       <- y
388    attr(output, "X")       <- X
389    attr(output, "K")       <- K
390    attr(output, "call")    <- cl
391    ## attr(output, "prob.state") <- ps.holder/nstore
392    attr(output, "pr.chpt") <- pr.chpt
393    attr(output, "s.store") <- s.holder
394    attr(output, "P.store") <- P.holder
395    attr(output, "rho.store") <- rho.holder
396    attr(output, "nu.store") <- nu.holder
397    ## attr(output, "tau1.store") <- tau1.holder
398    ## attr(output, "tau2.store") <- tau2.holder
399    ## attr(output, "comp1.store") <- comp1.holder
400    ## attr(output, "comp2.store") <- comp2.holder
401    ## attr(output, "sr1.store") <- sr1.holder
402    ## attr(output, "sr2.store") <- sr2.holder
403    ## attr(output, "mr1.store") <- mr1.holder
404    ## attr(output, "mr2.store") <- mr2.holder
405    attr(output, "rho.step") <- posterior$rhosizes
406    attr(output, "num.regimes") <- nsts
407    attr(output, "gamma.store") <- posterior$gammaout
408    attr(output, "omega.store") <- omega.holder
409    attr(output, "alpha.store") <- posterior$alphaout
410
411    return(output)
412  }
413
414
415## initial values of tau in MCMCnegbinChange
416"tau.negbin.initial" <- function(y) {
417  tau1 <- rep(NA, length(y))
418  tau2 <- rep(NA, length(y))
419  lambda.t        <-  exp(5)
420  for (t in 1:length(y)){
421    nt      <-  y[t]
422    if (nt==0) {
423      tau1[t]    <-  1 + rexp(1, lambda.t)
424      tau2[t] <- 0
425    }
426    else{
427      xi <- rexp(1, lambda.t)
428      tau2[t] <- rbeta(1, nt, 1)
429      tau1[t] <- 1 - tau2[t] + xi
430    }
431  }
432  return(list(tau1 = tau1, tau2 = tau2))
433}
434