1#################################################################### 2## test subject-level breaks from panel residuals 3## 4## written by Jong Hee Park 03/2009 5## modified and integrated with other codes by JHP 07/2011 6## fixed a starting.id and ending.id 7###################################################################### 8 9#' A Test for the Subject-level Break using a Unitivariate Linear Regression 10#' Model with Breaks 11#' 12#' testpanelSubjectBreak fits a unitivariate linear regression model with 13#' parametric breaks using panel residuals to test the existence of 14#' subject-level breaks in panel residuals. The details are discussed in Park 15#' (2011). 16#' 17#' 18#' \code{testpanelSubjectBreak} fits a univariate linear regression model for 19#' subject-level residuals from a panel model. The details are discussed in 20#' Park (2011). 21#' 22#' The model takes the following form: 23#' 24#' \deqn{e_{it} = \alpha_{im} + \varepsilon_{it}\;\; m = 1, \ldots, M} 25#' 26#' The errors are assumed to be time-varying at the subject level: 27#' 28#' \deqn{\varepsilon_{it} \sim \mathcal{N}(0, \sigma^2_{im})} 29#' 30#' We assume standard, semi-conjugate priors: 31#' 32#' \deqn{\beta \sim \mathcal{N}(b_0,B_0^{-1})} 33#' 34#' And: 35#' 36#' \deqn{\sigma^{-2} \sim \mathcal{G}amma(c_0/2, d_0/2)} 37#' 38#' Where \eqn{\beta} and \eqn{\sigma^{-2}} are assumed \emph{a priori} 39#' independent. 40#' 41#' And: 42#' 43#' \deqn{p_{mm} \sim \mathcal{B}eta(a, b),\;\; m = 1, \ldots, M} 44#' 45#' Where \eqn{M} is the number of states. 46#' 47#' OLS estimates are used for starting values. 48#' 49#' @param subject.id A numeric vector indicating the group number. It should 50#' start from 1. 51#' 52#' @param time.id A numeric vector indicating the time unit. It should start 53#' from 1. 54#' 55#' @param resid A vector of panel residuals. 56#' 57#' @param max.break An upper bound of break numbers for the test. 58#' 59#' @param minimum A minimum length of time series for the test. The test will 60#' skip a subject with a time series shorter than this. 61#' 62#' @param mcmc The number of MCMC iterations after burn-in. 63#' 64#' @param burnin The number of burn-in iterations for the sampler. 65#' 66#' @param thin The thinning interval used in the simulation. The number of 67#' MCMC iterations must be divisible by this value. 68#' 69#' @param verbose A switch which determines whether or not the progress of the 70#' sampler is printed to the screen. If \code{verbose} is greater than 0, the 71#' iteration number and the posterior density samples are printed to the screen 72#' every \code{verbose}th iteration. 73#' 74#' @param b0 The prior mean of the residual mean. 75#' 76#' @param B0 The prior precision of the residual variance 77#' 78#' @param c0 \eqn{c_0/2} is the shape parameter for the inverse Gamma 79#' prior on \eqn{\sigma^2}. The amount of information in the inverse 80#' Gamma prior is something like that from \eqn{c_0} pseudo-observations. 81#' 82#' @param d0 \eqn{d_0/2} is the scale parameter for the inverse Gamma 83#' prior on \eqn{\sigma^2}. 84#' 85#' @param a \eqn{a} is the shape1 beta prior for transition probabilities. 86#' By default, the expected duration is computed and corresponding a and b 87#' values are assigned. The expected duration is the sample period divided by 88#' the number of states. 89#' 90#' @param b \eqn{b} is the shape2 beta prior for transition probabilities. 91#' By default, the expected duration is computed and corresponding a and b 92#' values are assigned. The expected duration is the sample period divided by 93#' the number of states. 94#' 95#' @param seed The seed for the random number generator. If NA, current R 96#' system seed is used. 97#' 98#' @param Time Times of the observations. This will be used to find the time of 99#' the first observations in panel residuals. 100#' 101#' @param ps.out If ps.out == TRUE, state probabilities are exported. If the 102#' number of panel subjects is huge, users can turn it off to save memory. 103#' 104#' @param ... further arguments to be passed 105#' 106#' @return The returned object is a matrix containing log marginal likelihoods 107#' for all HMMs. The dimension of the returned object is the number of panel 108#' subjects by max.break + 1. If psout == TRUE, the returned object has an 109#' array attribute \code{psout} containing state probabilities for all HMMs. 110#' 111#' @export 112#' 113#' @references Jong Hee Park, 2012. ``Unified Method for Dynamic and 114#' Cross-Sectional Heterogeneity: Introducing Hidden Markov Panel Models.'' 115#' \emph{American Journal of Political Science}.56: 1040-1054. 116#' <doi: 10.1111/j.1540-5907.2012.00590.x> 117#' 118#' Siddhartha Chib. 1998. ``Estimation and comparison of multiple change-point 119#' models.'' \emph{Journal of Econometrics}. 86: 221-241. 120#' <doi: 10.1080/01621459.1995.10476635> 121#' 122#' @keywords models 123#' 124#' @examples 125#' 126#' \dontrun{ 127#' set.seed(1974) 128#' N <- 30 129#' T <- 80 130#' NT <- N*T 131#' 132#' ## true parameter values 133#' true.beta <- c(1, 1) 134#' true.sigma <- 3 135#' x1 <- rnorm(NT) 136#' x2 <- runif(NT, 2, 4) 137#' 138#' ## group-specific breaks 139#' break.point = rep(T/2, N); break.sigma=c(rep(1, N)); 140#' break.list <- rep(1, N) 141#' 142#' X <- as.matrix(cbind(x1, x2), NT, ); 143#' y <- rep(NA, NT) 144#' id <- rep(1:N, each=NT/N) 145#' K <- ncol(X); 146#' true.beta <- as.matrix(true.beta, K, 1) 147#' 148#' ## compute the break probability 149#' ruler <- c(1:T) 150#' W.mat <- matrix(NA, T, N) 151#' for (i in 1:N){ 152#' W.mat[, i] <- pnorm((ruler-break.point[i])/break.sigma[i]) 153#' } 154#' Weight <- as.vector(W.mat) 155#' 156#' ## draw time-varying individual effects and sample y 157#' j = 1 158#' true.sigma.alpha <- 30 159#' true.alpha1 <- true.alpha2 <- rep(NA, N) 160#' for (i in 1:N){ 161#' Xi <- X[j:(j+T-1), ] 162#' true.mean <- Xi %*% true.beta 163#' weight <- Weight[j:(j+T-1)] 164#' true.alpha1[i] <- rnorm(1, 0, true.sigma.alpha) 165#' true.alpha2[i] <- -1*true.alpha1[i] 166#' y[j:(j+T-1)] <- ((1-weight)*true.mean + (1-weight)*rnorm(T, 0, true.sigma) + 167#' (1-weight)*true.alpha1[i]) + 168#' (weight*true.mean + weight*rnorm(T, 0, true.sigma) + weight*true.alpha2[i]) 169#' j <- j + T 170#' } 171#' 172#' ## extract the standardized residuals from the OLS with fixed-effects 173#' FEols <- lm(y ~ X + as.factor(id) -1 ) 174#' resid.all <- rstandard(FEols) 175#' time.id <- rep(1:80, N) 176#' 177#' ## model fitting 178#' G <- 1000 179#' BF <- testpanelSubjectBreak(subject.id=id, time.id=time.id, 180#' resid= resid.all, max.break=3, minimum = 10, 181#' mcmc=G, burnin = G, thin=1, verbose=G, 182#' b0=0, B0=1/100, c0=2, d0=2, Time = time.id) 183#' 184#' ## estimated break numbers 185#' ## thresho 186#' estimated.breaks <- make.breaklist(BF, threshold=3) 187#' 188#' ## print all posterior model probabilities 189#' print(attr(BF, "model.prob")) 190#' } 191#' 192"testpanelSubjectBreak" <- 193 function(subject.id, time.id, resid, max.break=2, minimum = 10, 194 mcmc=1000, burnin=1000, thin=1, verbose=0, 195 b0, B0, c0, d0, a = NULL, b = NULL, seed = NA, 196 Time = NULL, ps.out = FALSE, ...){ 197 ## seeds 198 seeds <- form.seeds(seed) 199 lecuyer <- seeds[[1]] 200 seed.array <- seeds[[2]] 201 lecuyer.stream <- seeds[[3]] 202 203 ## Data 204 N <- length(subject.id) 205 206 ## groupinfo matrix 207 ## col1: subj ID, col2: offset (first time C indexing), col3: #time periods 208 if (min(subject.id) != 1){ 209 stop("subject.id should start 1!") 210 } 211 if (min(time.id) != 1){ 212 stop("time.id should start 1!") 213 } 214 if (is.null(Time)){ 215 Time <- rep(N, 1) 216 } 217 NC <- length(unique(subject.id)) 218 time.list <- as.numeric(table(subject.id)) 219 220 ## Make a residula list 221 resid.list <- as.list(rep(NA, NC)) 222 start <- 1; end <- 0 223 for (i in 1:NC){ 224 end <- start + time.list[i] - 1 225 resid.list[[i]] <- ts(resid[start:end], start=Time[start]) 226 start <- end + 1 227 } 228 ## Do the break analysis 229 BFout <- matrix(NA, NC, max.break + 1) 230 if (ps.out ==TRUE){ 231 psout <- NULL 232 } 233 else { 234 psout <- array(NA, c(max(time.list), sum(2:(max.break+1)), NC)) 235 } 236 for (i in 1:NC){ 237 residual <- resid.list[[i]] 238 nk <- length(residual) 239 out <- as.list(rep(NA, max.break)) 240 if(nk > minimum){ 241 for (k in 0:max.break){ 242 out[[k+1]] <- MCMCresidualBreakAnalysis(residual, m=k, 243 b0=b0, B0=B0, c0=c0, d0=d0, a=a, b=b, 244 burnin=burnin, mcmc=mcmc, thin=thin, verbose=verbose, 245 marginal.likelihood="Chib95") 246 if (ps.out ==TRUE&k>0){ 247 if(k==1){ 248 start <- 1 249 } 250 else{ 251 start <- sum(2:k)+1 252 } 253 probstate <- attr(out[[k+1]], "prob.state") 254 psout[1:length(probstate[,1]), start:(start+k), i] <- probstate 255 } 256 ## if no convergence diagnostic 257 BFout[i, k+1] <- attr(out[[k+1]], "logmarglike") 258 } 259 } 260 if (verbose > 0){ 261 cat("\n ------------------------------------------------------------- ") 262 cat("\n Break analysis for subject=", i, "is just finished! \n") 263 } 264 } 265 if (ps.out ==TRUE){ 266 attr(BFout, "psout") <- psout 267 } 268 model.prob.mat <- matrix(NA, NC, max.break + 1) 269 for (i in 1:NC){ 270 model.prob <- exp(BFout[i, ])/sum(exp(BFout[i, ])) 271 winner <- which.max(model.prob) 272 if (verbose > 0){ 273 cat("\nPr(no residual break) for subject", i, "=", 274 model.prob[1]) 275 } 276 model.prob.mat[i,] <- model.prob 277 } 278 attr(BFout, "model.prob") <- model.prob.mat 279 return(BFout) 280 } 281