1#' Importance Sampling of Exponential Family State Space Model
2#'
3#' Function \code{importanceSSM} simulates states or signals of the exponential
4#' family state space model conditioned with the observations, returning the
5#' simulated samples of the states/signals with the corresponding importance
6#' weights.
7#'
8#' Function can use two antithetic variables, one for location and other for
9#' scale, so output contains four blocks of simulated values which correlate
10#' which each other (ith block correlates negatively with (i+1)th block, and
11#' positively with (i+2)th block etc.).
12#'
13#' @export
14#' @param model Exponential family state space model of class \code{SSModel}.
15#' @param type What to simulate, \code{"states"} or \code{"signals"}. Default is
16#'   \code{"states"}
17#' @param filtered Simulate from \eqn{p(\alpha_t|y_{t-1},...,y_1)} instead of
18#'   \eqn{p(\alpha|y)}. Note that for large models this can be very slow. Default is FALSE.
19#' @param nsim Number of independent samples. Default is 1000.
20#' @param save.model Return the original model with the samples. Default is
21#'   FALSE.
22#' @param theta Initial values for the conditional mode theta.
23#' @param antithetics Logical. If TRUE, two antithetic variables are used in
24#'   simulations, one for location and another for scale. Default is FALSE.
25#' @param maxiter Maximum number of iterations used in linearisation. Default is
26#'   50.
27#' @param expected Logical value defining the approximation of H_t in case of Gamma
28#' and negative binomial distribution. Default is \code{FALSE} which matches the
29#' algorithm of Durbin & Koopman (1997), whereas \code{TRUE} uses the expected value
30#' of observations in the equations, leading to results which match with \code{glm} (where applicable).
31#' The latter case was the default behaviour of KFAS before version 1.3.8.
32#' Essentially this is the difference between observed and expected information.
33#' @param H_tol Tolerance parameter for check \code{max(H) > H_tol}, which suggests that the approximation
34#' converged to degenerate case with near zero signal-to-noise ratio. Default is very generous 1e15.
35#' @return A list containing elements
36#' \item{samples}{Simulated samples. }
37#' \item{weights}{Importance weights. }
38#' \item{model}{Original model in case of \code{save.model==TRUE}.}
39#' @examples
40#' data("sexratio")
41#' model <- SSModel(Male ~ SSMtrend(1, Q = list(NA)), u = sexratio[,"Total"], data = sexratio,
42#'                 distribution = "binomial")
43#' fit <- fitSSM(model, inits = -15, method = "BFGS")
44#' fit$model$Q #1.107652e-06
45
46#' # Computing confidence intervals for sex ratio
47#' # Uses importance sampling on response scale (1000 samples with antithetics)
48#' set.seed(1)
49#' imp <- importanceSSM(fit$model, nsim = 250, antithetics = TRUE)
50#' sexratio.smooth <- numeric(length(model$y))
51#' sexratio.ci <- matrix(0, length(model$y), 2)
52#' w <- imp$w/sum(imp$w)
53#' for(i in 1:length(model$y)){
54#'   sexr <- exp(imp$sample[i,1,])
55#'   sexratio.smooth[i]<-sum(sexr*w)
56#'   oo <- order(sexr)
57#'   sexratio.ci[i,] <- c(sexr[oo][which.min(abs(cumsum(w[oo]) - 0.05))],
58#'                    sexr[oo][which.min(abs(cumsum(w[oo]) - 0.95))])
59#' }
60#'
61#' \dontrun{
62#' # Filtered estimates
63#' impf <- importanceSSM(fit$model, nsim = 250, antithetics = TRUE,filtered=TRUE)
64#' sexratio.filter <- rep(NA,length(model$y))
65#' sexratio.fci <- matrix(NA, length(model$y), 2)
66#' w <- impf$w/rowSums(impf$w)
67#' for(i in 2:length(model$y)){
68#'   sexr <- exp(impf$sample[i,1,])
69#'   sexratio.filter[i] <- sum(sexr*w[i,])
70#'   oo<-order(sexr)
71#'   sexratio.fci[i,] <- c(sexr[oo][which.min(abs(cumsum(w[i,oo]) - 0.05))],
72#'                     sexr[oo][which.min(abs(cumsum(w[i,oo]) - 0.95))])
73#' }
74#'
75#' ts.plot(cbind(sexratio.smooth,sexratio.ci,sexratio.filter,sexratio.fci),
76#'         col=c(1,1,1,2,2,2),lty=c(1,2,2,1,2,2))
77#' }
78importanceSSM <-  function(model, type = c("states", "signals"),
79  filtered = FALSE,  nsim = 1000, save.model = FALSE, theta,
80  antithetics = FALSE, maxiter = 50, expected = FALSE, H_tol = 1e15) {
81
82  if (all(model$distribution == "gaussian")) {
83    stop("Model is completely Gaussian, use simulateSSM instead. ")
84  }
85  if(maxiter<1)
86    stop("Argument maxiter must a positive integer. ")
87  if(nsim<1)
88    stop("Argument nsim must a positive integer. ")
89  # Check that the model object is of proper form
90  is.SSModel(model, na.check = TRUE, return.logical = FALSE)
91  if (!is.logical(expected))
92    stop("Argument expected should be logical. ")
93  expected <- as.integer(expected)
94  p <- attr(model, "p")
95  m <- attr(model, "m")
96  k <- attr(model, "k")
97  n <- attr(model, "n")
98  tv <- attr(model, "tv")
99  ymiss <- is.na(model$y)
100  storage.mode(ymiss) <- "integer"
101
102  # initial values for linear predictor theta
103  if (missing(theta)) {
104    theta <- initTheta(model$y, model$u, model$distribution)
105  } else theta <- array(theta, dim = c(n, p))
106
107  # generate standard normal variables for importance sampling
108  simtmp <- simHelper(model, nsim, antithetics)
109  sim.what <- which(c("epsilon", "eta", "disturbances", "states", "signals", "observations") ==
110      match.arg(arg = type, choices = c("states", "signals")))
111  simdim <- as.integer(switch(sim.what, p, k, p + k, m, p, p))
112  if (!filtered) {
113    out <- .Fortran(fisample, NAOK = TRUE, model$y, ymiss, tv, model$Z,
114      model$T, model$R, model$Q, model$a1, model$P1, model$P1inf, model$u,
115      dist = pmatch(x = model$distribution,
116        table = c("gaussian", "poisson",  "binomial", "gamma", "negative binomial"),
117        duplicates.ok = TRUE),
118      p, n, m, k, theta = theta, maxiter = as.integer(maxiter),
119      simtmp$nNonzeroP1inf, 1e-08, simtmp$nNonzeroP1, as.integer(nsim),
120      simtmp$epsplus, simtmp$etaplus, simtmp$aplus1, simtmp$c2, model$tol,
121      info = integer(1), as.integer(antithetics),
122      w = numeric(3 * nsim * antithetics + nsim),
123      sim = array(0, c(simdim,  n, 3 * nsim * antithetics + nsim)), sim.what, simdim,
124      expected, H_tol = H_tol)
125  } else {
126    out <- .Fortran(fisamplefilter, NAOK = TRUE, model$y, ymiss, as.integer(tv),
127      model$Z, model$T, model$R, model$Q, model$a1, model$P1, model$P1inf,model$u,
128      dist = pmatch(x = model$distribution,
129        table = c("gaussian",  "poisson", "binomial", "gamma", "negative binomial"),
130        duplicates.ok = TRUE),
131      p, n, m, k, theta = theta, maxiter = as.integer(maxiter),
132      simtmp$nNonzeroP1inf, 1e-08, simtmp$nNonzeroP1, as.integer(nsim),
133      simtmp$epsplus, simtmp$etaplus, simtmp$aplus1, simtmp$c2,
134      model$tol, info = integer(1), as.integer(antithetics),
135      w = array(0, c(n, 3 * nsim * antithetics + nsim)),
136      sim = array(0, c(simdim, n, 3 * nsim * antithetics + nsim)), sim.what, simdim,
137      expected, H_tol = H_tol)
138  }
139  if(out$info!=0){
140    switch(as.character(out$info),
141      "-5" = warning(paste0("Gaussian approximation converged to a degenerate case with max(H) = ", out$H_tol, ".")),
142      "-3" = stop("Couldn't compute LDL decomposition of P1."),
143      "-2" =  stop("Couldn't compute LDL decomposition of Q."),
144      "1" = stop("Gaussian approximation failed due to non-finite value in linear predictor."),
145      "2" = stop("Gaussian approximation failed due to non-finite value of p(theta|y)."),
146      "3" = warning("Maximum number of iterations reached, the approximation did not converge.")
147    )
148  }
149
150  out <- list(samples = aperm(out$sim, c(2, 1, 3)), weights = out$w)
151  if (save.model)
152    out$model <- model
153  out
154}
155