1#' Importance Sampling of Exponential Family State Space Model 2#' 3#' Function \code{importanceSSM} simulates states or signals of the exponential 4#' family state space model conditioned with the observations, returning the 5#' simulated samples of the states/signals with the corresponding importance 6#' weights. 7#' 8#' Function can use two antithetic variables, one for location and other for 9#' scale, so output contains four blocks of simulated values which correlate 10#' which each other (ith block correlates negatively with (i+1)th block, and 11#' positively with (i+2)th block etc.). 12#' 13#' @export 14#' @param model Exponential family state space model of class \code{SSModel}. 15#' @param type What to simulate, \code{"states"} or \code{"signals"}. Default is 16#' \code{"states"} 17#' @param filtered Simulate from \eqn{p(\alpha_t|y_{t-1},...,y_1)} instead of 18#' \eqn{p(\alpha|y)}. Note that for large models this can be very slow. Default is FALSE. 19#' @param nsim Number of independent samples. Default is 1000. 20#' @param save.model Return the original model with the samples. Default is 21#' FALSE. 22#' @param theta Initial values for the conditional mode theta. 23#' @param antithetics Logical. If TRUE, two antithetic variables are used in 24#' simulations, one for location and another for scale. Default is FALSE. 25#' @param maxiter Maximum number of iterations used in linearisation. Default is 26#' 50. 27#' @param expected Logical value defining the approximation of H_t in case of Gamma 28#' and negative binomial distribution. Default is \code{FALSE} which matches the 29#' algorithm of Durbin & Koopman (1997), whereas \code{TRUE} uses the expected value 30#' of observations in the equations, leading to results which match with \code{glm} (where applicable). 31#' The latter case was the default behaviour of KFAS before version 1.3.8. 32#' Essentially this is the difference between observed and expected information. 33#' @param H_tol Tolerance parameter for check \code{max(H) > H_tol}, which suggests that the approximation 34#' converged to degenerate case with near zero signal-to-noise ratio. Default is very generous 1e15. 35#' @return A list containing elements 36#' \item{samples}{Simulated samples. } 37#' \item{weights}{Importance weights. } 38#' \item{model}{Original model in case of \code{save.model==TRUE}.} 39#' @examples 40#' data("sexratio") 41#' model <- SSModel(Male ~ SSMtrend(1, Q = list(NA)), u = sexratio[,"Total"], data = sexratio, 42#' distribution = "binomial") 43#' fit <- fitSSM(model, inits = -15, method = "BFGS") 44#' fit$model$Q #1.107652e-06 45 46#' # Computing confidence intervals for sex ratio 47#' # Uses importance sampling on response scale (1000 samples with antithetics) 48#' set.seed(1) 49#' imp <- importanceSSM(fit$model, nsim = 250, antithetics = TRUE) 50#' sexratio.smooth <- numeric(length(model$y)) 51#' sexratio.ci <- matrix(0, length(model$y), 2) 52#' w <- imp$w/sum(imp$w) 53#' for(i in 1:length(model$y)){ 54#' sexr <- exp(imp$sample[i,1,]) 55#' sexratio.smooth[i]<-sum(sexr*w) 56#' oo <- order(sexr) 57#' sexratio.ci[i,] <- c(sexr[oo][which.min(abs(cumsum(w[oo]) - 0.05))], 58#' sexr[oo][which.min(abs(cumsum(w[oo]) - 0.95))]) 59#' } 60#' 61#' \dontrun{ 62#' # Filtered estimates 63#' impf <- importanceSSM(fit$model, nsim = 250, antithetics = TRUE,filtered=TRUE) 64#' sexratio.filter <- rep(NA,length(model$y)) 65#' sexratio.fci <- matrix(NA, length(model$y), 2) 66#' w <- impf$w/rowSums(impf$w) 67#' for(i in 2:length(model$y)){ 68#' sexr <- exp(impf$sample[i,1,]) 69#' sexratio.filter[i] <- sum(sexr*w[i,]) 70#' oo<-order(sexr) 71#' sexratio.fci[i,] <- c(sexr[oo][which.min(abs(cumsum(w[i,oo]) - 0.05))], 72#' sexr[oo][which.min(abs(cumsum(w[i,oo]) - 0.95))]) 73#' } 74#' 75#' ts.plot(cbind(sexratio.smooth,sexratio.ci,sexratio.filter,sexratio.fci), 76#' col=c(1,1,1,2,2,2),lty=c(1,2,2,1,2,2)) 77#' } 78importanceSSM <- function(model, type = c("states", "signals"), 79 filtered = FALSE, nsim = 1000, save.model = FALSE, theta, 80 antithetics = FALSE, maxiter = 50, expected = FALSE, H_tol = 1e15) { 81 82 if (all(model$distribution == "gaussian")) { 83 stop("Model is completely Gaussian, use simulateSSM instead. ") 84 } 85 if(maxiter<1) 86 stop("Argument maxiter must a positive integer. ") 87 if(nsim<1) 88 stop("Argument nsim must a positive integer. ") 89 # Check that the model object is of proper form 90 is.SSModel(model, na.check = TRUE, return.logical = FALSE) 91 if (!is.logical(expected)) 92 stop("Argument expected should be logical. ") 93 expected <- as.integer(expected) 94 p <- attr(model, "p") 95 m <- attr(model, "m") 96 k <- attr(model, "k") 97 n <- attr(model, "n") 98 tv <- attr(model, "tv") 99 ymiss <- is.na(model$y) 100 storage.mode(ymiss) <- "integer" 101 102 # initial values for linear predictor theta 103 if (missing(theta)) { 104 theta <- initTheta(model$y, model$u, model$distribution) 105 } else theta <- array(theta, dim = c(n, p)) 106 107 # generate standard normal variables for importance sampling 108 simtmp <- simHelper(model, nsim, antithetics) 109 sim.what <- which(c("epsilon", "eta", "disturbances", "states", "signals", "observations") == 110 match.arg(arg = type, choices = c("states", "signals"))) 111 simdim <- as.integer(switch(sim.what, p, k, p + k, m, p, p)) 112 if (!filtered) { 113 out <- .Fortran(fisample, NAOK = TRUE, model$y, ymiss, tv, model$Z, 114 model$T, model$R, model$Q, model$a1, model$P1, model$P1inf, model$u, 115 dist = pmatch(x = model$distribution, 116 table = c("gaussian", "poisson", "binomial", "gamma", "negative binomial"), 117 duplicates.ok = TRUE), 118 p, n, m, k, theta = theta, maxiter = as.integer(maxiter), 119 simtmp$nNonzeroP1inf, 1e-08, simtmp$nNonzeroP1, as.integer(nsim), 120 simtmp$epsplus, simtmp$etaplus, simtmp$aplus1, simtmp$c2, model$tol, 121 info = integer(1), as.integer(antithetics), 122 w = numeric(3 * nsim * antithetics + nsim), 123 sim = array(0, c(simdim, n, 3 * nsim * antithetics + nsim)), sim.what, simdim, 124 expected, H_tol = H_tol) 125 } else { 126 out <- .Fortran(fisamplefilter, NAOK = TRUE, model$y, ymiss, as.integer(tv), 127 model$Z, model$T, model$R, model$Q, model$a1, model$P1, model$P1inf,model$u, 128 dist = pmatch(x = model$distribution, 129 table = c("gaussian", "poisson", "binomial", "gamma", "negative binomial"), 130 duplicates.ok = TRUE), 131 p, n, m, k, theta = theta, maxiter = as.integer(maxiter), 132 simtmp$nNonzeroP1inf, 1e-08, simtmp$nNonzeroP1, as.integer(nsim), 133 simtmp$epsplus, simtmp$etaplus, simtmp$aplus1, simtmp$c2, 134 model$tol, info = integer(1), as.integer(antithetics), 135 w = array(0, c(n, 3 * nsim * antithetics + nsim)), 136 sim = array(0, c(simdim, n, 3 * nsim * antithetics + nsim)), sim.what, simdim, 137 expected, H_tol = H_tol) 138 } 139 if(out$info!=0){ 140 switch(as.character(out$info), 141 "-5" = warning(paste0("Gaussian approximation converged to a degenerate case with max(H) = ", out$H_tol, ".")), 142 "-3" = stop("Couldn't compute LDL decomposition of P1."), 143 "-2" = stop("Couldn't compute LDL decomposition of Q."), 144 "1" = stop("Gaussian approximation failed due to non-finite value in linear predictor."), 145 "2" = stop("Gaussian approximation failed due to non-finite value of p(theta|y)."), 146 "3" = warning("Maximum number of iterations reached, the approximation did not converge.") 147 ) 148 } 149 150 out <- list(samples = aperm(out$sim, c(2, 1, 3)), weights = out$w) 151 if (save.model) 152 out$model <- model 153 out 154} 155