1####################################################################
2## test subject-level breaks from panel residuals
3##
4## written by Jong Hee Park 03/2009
5## modified and integrated with other codes by JHP 07/2011
6## fixed a starting.id and ending.id
7######################################################################
8
9#' A Test for the Subject-level Break using a Unitivariate Linear Regression
10#' Model with Breaks
11#'
12#' testpanelSubjectBreak fits a unitivariate linear regression model with
13#' parametric breaks using panel residuals to test the existence of
14#' subject-level breaks in panel residuals. The details are discussed in Park
15#' (2011).
16#'
17#'
18#' \code{testpanelSubjectBreak} fits a univariate linear regression model for
19#' subject-level residuals from a panel model.  The details are discussed in
20#' Park (2011).
21#'
22#' The model takes the following form:
23#'
24#' \deqn{e_{it} = \alpha_{im} + \varepsilon_{it}\;\; m = 1, \ldots, M}
25#'
26#' The errors are assumed to be time-varying at the subject level:
27#'
28#' \deqn{\varepsilon_{it} \sim \mathcal{N}(0, \sigma^2_{im})}
29#'
30#' We assume standard, semi-conjugate priors:
31#'
32#' \deqn{\beta \sim \mathcal{N}(b_0,B_0^{-1})}
33#'
34#' And:
35#'
36#' \deqn{\sigma^{-2} \sim \mathcal{G}amma(c_0/2, d_0/2)}
37#'
38#' Where \eqn{\beta} and \eqn{\sigma^{-2}} are assumed \emph{a priori}
39#' independent.
40#'
41#' And:
42#'
43#' \deqn{p_{mm} \sim \mathcal{B}eta(a, b),\;\; m = 1, \ldots, M}
44#'
45#' Where \eqn{M} is the number of states.
46#'
47#' OLS estimates are used for starting values.
48#'
49#' @param subject.id A numeric vector indicating the group number. It should
50#' start from 1.
51#'
52#' @param time.id A numeric vector indicating the time unit. It should start
53#' from 1.
54#'
55#' @param resid A vector of panel residuals.
56#'
57#' @param max.break An upper bound of break numbers for the test.
58#'
59#' @param minimum A minimum length of time series for the test. The test will
60#' skip a subject with a time series shorter than this.
61#'
62#' @param mcmc The number of MCMC iterations after burn-in.
63#'
64#' @param burnin The number of burn-in iterations for the sampler.
65#'
66#' @param thin The thinning interval used in the simulation.  The number of
67#' MCMC iterations must be divisible by this value.
68#'
69#' @param verbose A switch which determines whether or not the progress of the
70#' sampler is printed to the screen.  If \code{verbose} is greater than 0, the
71#' iteration number and the posterior density samples are printed to the screen
72#' every \code{verbose}th iteration.
73#'
74#' @param b0 The prior mean of the residual mean.
75#'
76#' @param B0 The prior precision of the residual variance
77#'
78#' @param c0 \eqn{c_0/2} is the shape parameter for the inverse Gamma
79#' prior on \eqn{\sigma^2}. The amount of information in the inverse
80#' Gamma prior is something like that from \eqn{c_0} pseudo-observations.
81#'
82#' @param d0 \eqn{d_0/2} is the scale parameter for the inverse Gamma
83#' prior on \eqn{\sigma^2}.
84#'
85#' @param a \eqn{a} is the shape1 beta prior for transition probabilities.
86#' By default, the expected duration is computed and corresponding a and b
87#' values are assigned. The expected duration is the sample period divided by
88#' the number of states.
89#'
90#' @param b \eqn{b} is the shape2 beta prior for transition probabilities.
91#' By default, the expected duration is computed and corresponding a and b
92#' values are assigned. The expected duration is the sample period divided by
93#' the number of states.
94#'
95#' @param seed The seed for the random number generator.  If NA, current R
96#' system seed is used.
97#'
98#' @param Time Times of the observations. This will be used to find the time of
99#' the first observations in panel residuals.
100#'
101#' @param ps.out If ps.out == TRUE, state probabilities are exported. If the
102#' number of panel subjects is huge, users can turn it off to save memory.
103#'
104#' @param ... further arguments to be passed
105#'
106#' @return The returned object is a matrix containing log marginal likelihoods
107#' for all HMMs.  The dimension of the returned object is the number of panel
108#' subjects by max.break + 1.  If psout == TRUE, the returned object has an
109#' array attribute \code{psout} containing state probabilities for all HMMs.
110#'
111#' @export
112#'
113#' @references Jong Hee Park, 2012. ``Unified Method for Dynamic and
114#' Cross-Sectional Heterogeneity: Introducing Hidden Markov Panel Models.''
115#' \emph{American Journal of Political Science}.56: 1040-1054.
116#' <doi: 10.1111/j.1540-5907.2012.00590.x>
117#'
118#' Siddhartha Chib. 1998. ``Estimation and comparison of multiple change-point
119#' models.'' \emph{Journal of Econometrics}. 86: 221-241.
120#' <doi: 10.1080/01621459.1995.10476635>
121#'
122#' @keywords models
123#'
124#' @examples
125#'
126#' \dontrun{
127#'   set.seed(1974)
128#'   N <- 30
129#'   T <- 80
130#'   NT <- N*T
131#'
132#'   ## true parameter values
133#'   true.beta <- c(1, 1)
134#'   true.sigma <- 3
135#'   x1 <- rnorm(NT)
136#'   x2 <- runif(NT, 2, 4)
137#'
138#'   ## group-specific breaks
139#'   break.point = rep(T/2, N); break.sigma=c(rep(1, N));
140#'   break.list <- rep(1, N)
141#'
142#'   X <- as.matrix(cbind(x1, x2), NT, );
143#'   y <- rep(NA, NT)
144#'   id  <-  rep(1:N, each=NT/N)
145#'   K <-  ncol(X);
146#'   true.beta <- as.matrix(true.beta, K, 1)
147#'
148#'   ## compute the break probability
149#'   ruler <- c(1:T)
150#'   W.mat <- matrix(NA, T, N)
151#'   for (i in 1:N){
152#'     W.mat[, i] <- pnorm((ruler-break.point[i])/break.sigma[i])
153#'   }
154#'   Weight <- as.vector(W.mat)
155#'
156#'   ## draw time-varying individual effects and sample y
157#'   j = 1
158#'   true.sigma.alpha <- 30
159#'   true.alpha1 <- true.alpha2 <- rep(NA, N)
160#'   for (i in 1:N){
161#'     Xi <- X[j:(j+T-1), ]
162#'     true.mean <- Xi  %*% true.beta
163#'     weight <- Weight[j:(j+T-1)]
164#'     true.alpha1[i] <- rnorm(1, 0, true.sigma.alpha)
165#'     true.alpha2[i] <- -1*true.alpha1[i]
166#'     y[j:(j+T-1)] <- ((1-weight)*true.mean + (1-weight)*rnorm(T, 0, true.sigma) +
167#'     		    (1-weight)*true.alpha1[i]) +
168#'     		    (weight*true.mean + weight*rnorm(T, 0, true.sigma) + weight*true.alpha2[i])
169#'     j <- j + T
170#'   }
171#'
172#'   ## extract the standardized residuals from the OLS with fixed-effects
173#'   FEols <- lm(y ~ X + as.factor(id) -1 )
174#'   resid.all <- rstandard(FEols)
175#'   time.id <- rep(1:80, N)
176#'
177#'   ## model fitting
178#'   G <- 1000
179#'   BF <- testpanelSubjectBreak(subject.id=id, time.id=time.id,
180#'          resid= resid.all, max.break=3, minimum = 10,
181#'          mcmc=G, burnin = G, thin=1, verbose=G,
182#'          b0=0, B0=1/100, c0=2, d0=2, Time = time.id)
183#'
184#'   ## estimated break numbers
185#'   ## thresho
186#'   estimated.breaks <- make.breaklist(BF, threshold=3)
187#'
188#'   ## print all posterior model probabilities
189#'   print(attr(BF, "model.prob"))
190#' }
191#'
192"testpanelSubjectBreak" <-
193  function(subject.id, time.id, resid, max.break=2, minimum = 10,
194           mcmc=1000, burnin=1000, thin=1, verbose=0,
195           b0, B0, c0, d0, a = NULL, b = NULL, seed = NA,
196           Time = NULL, ps.out = FALSE, ...){
197    ## seeds
198    seeds <- form.seeds(seed)
199    lecuyer <- seeds[[1]]
200    seed.array <- seeds[[2]]
201    lecuyer.stream <- seeds[[3]]
202
203    ## Data
204    N <- length(subject.id)
205
206    ## groupinfo matrix
207    ## col1: subj ID, col2: offset (first time C indexing), col3: #time periods
208    if (min(subject.id) != 1){
209      stop("subject.id should start 1!")
210    }
211    if (min(time.id) != 1){
212      stop("time.id should start 1!")
213    }
214    if (is.null(Time)){
215      Time <- rep(N, 1)
216    }
217    NC <- length(unique(subject.id))
218    time.list <- as.numeric(table(subject.id))
219
220    ## Make a residula list
221    resid.list <- as.list(rep(NA, NC))
222    start <- 1; end <- 0
223    for (i in 1:NC){
224      end <- start + time.list[i] - 1
225      resid.list[[i]] <- ts(resid[start:end], start=Time[start])
226      start <- end + 1
227    }
228    ## Do the break analysis
229    BFout <- matrix(NA, NC, max.break + 1)
230    if (ps.out ==TRUE){
231      psout <- NULL
232    }
233    else {
234      psout <- array(NA, c(max(time.list), sum(2:(max.break+1)), NC))
235    }
236    for (i in 1:NC){
237      residual <- resid.list[[i]]
238      nk <- length(residual)
239      out <- as.list(rep(NA, max.break))
240      if(nk > minimum){
241        for (k in 0:max.break){
242          out[[k+1]] <- MCMCresidualBreakAnalysis(residual, m=k,
243                                                  b0=b0, B0=B0, c0=c0, d0=d0, a=a, b=b,
244                                                  burnin=burnin, mcmc=mcmc, thin=thin, verbose=verbose,
245                                                  marginal.likelihood="Chib95")
246          if (ps.out ==TRUE&k>0){
247            if(k==1){
248              start <- 1
249            }
250            else{
251              start <- sum(2:k)+1
252            }
253            probstate <- attr(out[[k+1]], "prob.state")
254            psout[1:length(probstate[,1]), start:(start+k), i] <- probstate
255          }
256          ## if no convergence diagnostic
257          BFout[i, k+1] <- attr(out[[k+1]], "logmarglike")
258        }
259      }
260      if (verbose > 0){
261        cat("\n ------------------------------------------------------------- ")
262        cat("\n Break analysis for subject=", i, "is just finished! \n")
263      }
264    }
265    if (ps.out ==TRUE){
266      attr(BFout, "psout") <- psout
267    }
268    model.prob.mat <- matrix(NA, NC, max.break + 1)
269    for (i in 1:NC){
270      model.prob <- exp(BFout[i, ])/sum(exp(BFout[i, ]))
271      winner <- which.max(model.prob)
272      if (verbose > 0){
273        cat("\nPr(no residual break) for subject", i, "=",
274            model.prob[1])
275      }
276      model.prob.mat[i,] <-  model.prob
277    }
278    attr(BFout, "model.prob") <- model.prob.mat
279    return(BFout)
280  }
281