1 //////////////////////////////////////////////////////////////////////////
2 // cMCMCnegbin.cc is C++ code to estimate a Negative
3 // Binomial regression model
4 //
5 // Matthew Blackwell
6 // Department of Government
7 // Harvard University
8 // mblackwell@gov.harvard.edu
9
10 // Written 05/19/2017
11 //////////////////////////////////////////////////////////////////////////
12
13 #ifndef CMCMCNEGBIN_CC
14 #define CMCMCNEGBIN_CC
15
16 #include<vector>
17 #include<algorithm>
18
19 #include "MCMCrng.h"
20 #include "MCMCfcds.h"
21 #include "matrix.h"
22 #include "distributions.h"
23 #include "stat.h"
24 #include "la.h"
25 #include "ide.h"
26 #include "smath.h"
27 #include "rng.h"
28 #include "mersenne.h"
29 #include "lecuyer.h"
30
31 #include "MCMCnbutil.h"
32
33 #include <R.h>
34 #include <R_ext/Utils.h>
35
36 using namespace std;
37 using namespace scythe;
38
39
40 ////////////////////////////////////////////
41 // MCMCnegbinReg implementation.
42 ////////////////////////////////////////////
43 template <typename RNGTYPE>
MCMCnegbinReg_impl(rng<RNGTYPE> & stream,double * betaout,double * nuout,double * rhoout,double * tau1out,double * tau2out,int * comp1out,int * comp2out,double * sr1out,double * sr2out,double * mr1out,double * mr2out,double * rhosizes,const double * Ydata,const int * Yrow,const int * Ycol,const double * Xdata,const int * Xrow,const int * Xcol,const int * burnin,const int * mcmc,const int * thin,const int * verbose,const double * betastart,const double * nustart,const double * rhostart,const double * tau1start,const double * tau2start,const double * component1start,const double * e,const double * f,const double * g,const double * rhostepdata,const double * b0data,const double * B0data,double * logmarglikeholder,double * loglikeholder,const int * chib)44 void MCMCnegbinReg_impl(rng<RNGTYPE>& stream,
45 double *betaout,
46 double *nuout,
47 double *rhoout,
48 double *tau1out,
49 double *tau2out,
50 int *comp1out,
51 int *comp2out,
52 double *sr1out,
53 double *sr2out,
54 double *mr1out,
55 double *mr2out,
56 double *rhosizes,
57 const double *Ydata,
58 const int *Yrow,
59 const int *Ycol,
60 const double *Xdata,
61 const int *Xrow,
62 const int *Xcol,
63 const int *burnin,
64 const int *mcmc,
65 const int *thin,
66 const int *verbose,
67 const double *betastart,
68 const double *nustart,
69 const double *rhostart,
70 const double *tau1start,
71 const double *tau2start,
72 const double *component1start,
73 const double *e,
74 const double *f,
75 const double *g,
76 const double *rhostepdata,
77 const double *b0data,
78 const double *B0data,
79 double *logmarglikeholder,
80 double *loglikeholder,
81 const int *chib)
82 {
83 const Matrix <> Y(*Yrow, *Ycol, Ydata);
84 const Matrix <> X(*Xrow, *Xcol, Xdata);
85 const int tot_iter = *burnin + *mcmc;
86 const int nstore = *mcmc / *thin;
87 const int n = Y.rows();
88 const int k = X.cols();
89 const int max_comp = 10;
90 const Matrix <> b0(k, 1, b0data);
91 const Matrix <> B0(k, k, B0data);
92 const Matrix <> B0inv = invpd(B0);
93 Matrix<> wr1(max_comp, 1);
94 Matrix<> mr1(max_comp, 1);
95 Matrix<> sr1(max_comp, 1);
96 Matrix<> wr2(n, max_comp);
97 Matrix<> mr2(n, max_comp);
98 Matrix<> sr2(n, max_comp);
99 Matrix<> nr2(n, 1);
100
101
102 Matrix <> nu(n, 1, nustart);
103 double rho = *rhostart;
104 double step_out = *rhostepdata;
105 Matrix <> beta(k, 1, betastart);
106 Matrix <> tau1(n, 1, tau1start);
107 Matrix <> tau2(n, 1, tau2start);
108 Matrix <> component1(n, 1, component1start);
109 Matrix <> component2(n, 1);
110
111 Matrix<> beta_store(nstore, k);
112 Matrix<int> component1_store(nstore, n);
113 Matrix<int> component2_store(nstore, n);
114 Matrix<> tau1_store(nstore, n);
115 Matrix<> tau2_store(nstore, n);
116 Matrix<> sr1_store(nstore, n);
117 Matrix<> sr2_store(nstore, n);
118 Matrix<> mr1_store(nstore, n);
119 Matrix<> mr2_store(nstore, n);
120 Matrix<> nu_store(nstore, n);
121 Matrix<> rho_store(nstore, 1);
122 Matrix<> rhostep;
123
124 init_aux(stream, Y, wr1, mr1, sr1, wr2, mr2, sr2, nr2, component2);
125 int nplus = 0;
126 for (int t = 0; t < n; t++) {
127 int yt = (int) Y[t];
128 if (yt > 0) {
129 nplus = nplus + 1;
130 }
131 }
132
133 Matrix<> Xplus(nplus, k);
134 int xp_count = 0;
135 for (int t = 0; t < nplus; t++) {
136 int yt = (int) Y[t];
137 Matrix<> xt = X(t, _);
138 if (yt > 0) {
139 Xplus(xp_count, _) = xt;
140 xp_count++;
141 }
142 }
143 //MCMC loop
144 int count = 0;
145 int debug = 0;
146 for (int iter = 0; iter < tot_iter; ++iter){
147 if (debug > 0) {
148 Rprintf("\niter: %i\n-----\n", iter);
149 }
150
151 if (debug > 0) {
152 Rprintf("Sampling rho...\n");
153 }
154
155
156 //////////////////////
157 // 1. Sample rho
158 //////////////////////
159 Matrix<> lambda = exp(X * beta);
160 rhostep = rho_slice_sampler(stream, Y, lambda, rho, step_out, *g, *e, *f);
161 rho = rhostep[0];
162
163
164 if (debug > 0) {
165 Rprintf("Sampling nu...\n");
166 }
167 //////////////////////
168 // 2. Sample nu
169 //////////////////////
170 for (int t = 0; t < n; t++) {
171 int yt = (int) Y[t];
172 nu[t] = stream.rgamma(rho + yt, rho + lambda[t]);
173 }
174
175 if (debug > 0) {
176 Rprintf("Sampling tau...\n");
177 }
178 //////////////////////
179 // 4. Sample tau
180 //////////////////////
181 Matrix<> TAUout;
182 for (int t = 0; t < n; t++) {
183 int yt = (int) Y[t];
184 double mut = exp(log(nu[t]) + log(lambda[t]));
185
186 TAUout = tau_comp_sampler(stream, yt, mut, wr1, mr1, sr1,
187 wr2(t,_), mr2(t,_), sr2(t,_), nr2[t]);
188 tau1[t] = TAUout[0];
189 tau2[t] = TAUout[1];
190 component1[t] = TAUout[2];
191 component2[t] = TAUout[3];
192 }
193
194
195 if (debug > 0) {
196 Rprintf("Sampling beta...\n");
197 }
198 //////////////////////
199 // 2. Sample beta
200 //////////////////////
201 Matrix<> y_tilde(n,1);
202 Matrix<> Sigma_inv_sum(n, 1);
203 Matrix<> yp_tilde(n,1);
204 Matrix<> Sigma_plus_inv_sum(n, 1);
205 Matrix<> sr1_hold(n,1);
206 Matrix<> sr2_hold(n,1);
207 Matrix<> mr1_hold(n,1);
208 Matrix<> mr2_hold(n,1);
209 for (int t = 0; t<n ; ++t) {
210 int yt = (int) Y[t];
211 int comp1 = (int) component1[t];
212 if (yt > 0) {
213 int comp2 = (int) component2[t];
214 sr2_hold[t] = sr2(t, comp2 - 1);
215 mr2_hold[t] = mr2(t, comp2 - 1);
216 Sigma_plus_inv_sum[t] = 1/sqrt(sr2(t, comp2 - 1));
217 yp_tilde[t] = (-log(tau2[t]) - log(nu[t]) - mr2(t, comp2 - 1))/sqrt(sr2(t, comp2-1));
218
219 }
220 sr1_hold[t] = sr1[comp1 - 1];
221 mr1_hold[t] = mr1[comp1 - 1];
222 Sigma_inv_sum[t] = 1/sqrt(sr1[comp1 - 1]);
223 y_tilde[t] = (-log(tau1[t]) - log(nu[t]) - mr1[comp1 - 1])/sqrt(sr1[comp1 - 1]);
224
225 }
226 Matrix<> yjp = rbind(y_tilde, selif(yp_tilde, Y > 0));
227 Matrix<> Xjp = rbind(X, selif(X, Y > 0));
228 Matrix<> wip = rbind(Sigma_inv_sum, selif(Sigma_plus_inv_sum, Y > 0));
229 Matrix<> Xwj(Xjp.rows(), k);
230 for (unsigned int h = 0; h<Xjp.rows(); ++h){
231 Xwj(h, _) = Xjp(h,_)*wip[h];
232 }
233 Matrix<> Bn = invpd(B0 + ::t(Xwj)*Xwj);
234 Matrix<> bn = Bn*gaxpy(B0, b0, ::t(Xwj)*yjp);
235 if (k == 1) {
236 beta[0] = stream.rnorm(bn[0], sqrt(Bn[0]));
237 } else {
238 beta = stream.rmvnorm(bn, Bn);
239 }
240
241 if (debug > 0) {
242 Rprintf("Sampling P...\n");
243 }
244
245
246 if (iter >= *burnin && ((iter % *thin)==0)){
247 beta_store(count,_) = ::t(beta);
248 tau1_store(count,_) = tau1;
249 tau2_store(count,_) = tau2;
250 component1_store(count,_) = component1;
251 component2_store(count,_) = component2;
252 sr1_store(count,_) = sr1_hold;
253 sr2_store(count,_) = sr2_hold;
254 mr1_store(count,_) = mr1_hold;
255 mr2_store(count,_) = mr2_hold;
256 nu_store(count,_) = nu;
257 rho_store(count, _) = rho;
258 ++count;
259 }
260
261 if(*verbose > 0 && iter % *verbose == 0){
262 Rprintf("\n\n MCMCnegbin iteration %i of %i \n", (iter+1), tot_iter);
263 Rprintf("rho is %10.5f\n", rho);
264 for (int j = 0; j<k; ++j){
265 Rprintf("beta(%i) is %10.5f\n", j+1, beta[j]);
266 }
267 }
268
269 }// end MCMC loop
270
271
272
273 //////////////////////////////////////
274 if (*chib == 1) {
275 //////////////////////////////////////
276 if(*verbose > 0){
277 Rprintf("\nCalculating marginal likelihood...\n");
278 }
279 Matrix<> beta_st = ::t(meanc(beta_store));
280 Matrix<> lambda = exp(X * beta_st);
281 double rho_st = exp(mean(log(rho_store)));
282
283 //////////////////////
284 // beta
285 //////////////////////
286 Matrix<> density_beta(nstore, 1);
287 for (int iter = 0; iter<nstore; ++iter){
288 Matrix<> y_tilde(n,1);
289 Matrix<> Sigma_inv_sum(n, 1);
290 Matrix<> yp_tilde(n,1);
291 Matrix<> Sigma_plus_inv_sum(n, 1);
292 for (int t = 0; t<n ; ++t) {
293 int yt = (int) Y[t];
294 int comp1 = (int) component1_store(iter, t);
295 if (yt > 0) {
296 int comp2 = (int) component2_store(iter, t);
297 Sigma_plus_inv_sum[t] = 1/sqrt(sr2(t, comp2 - 1));
298 yp_tilde[t] = (-log(tau2_store(iter,t)) - log(nu_store(iter,t)) - mr2(t, comp2 - 1))/sqrt(sr2(t, comp2-1));
299 }
300 Sigma_inv_sum[t] = 1/sqrt(sr1[comp1 - 1]);
301 y_tilde[t] = (-log(tau1_store(iter,t)) - log(nu_store(iter, t)) - mr1[comp1 - 1])/sqrt(sr1[comp1 - 1]);
302 }
303 Matrix<> yjp = rbind(y_tilde, selif(yp_tilde, Y > 0));
304 Matrix<> Xjp = rbind(X, selif(X, Y > 0));
305 Matrix<> wip = rbind(Sigma_inv_sum, selif(Sigma_plus_inv_sum, Y > 0));
306 Matrix<> Xwj(Xjp.rows(), k);
307 for (unsigned int h = 0; h<Xjp.rows(); ++h){
308 Xwj(h, _) = Xjp(h,_)*wip[h];
309 }
310 Matrix<> Bn = invpd(B0 + ::t(Xwj)*Xwj);
311 Matrix<> bn = Bn*gaxpy(B0, b0, ::t(Xwj)*yjp);
312 if (k == 1) {
313 density_beta[iter] = exp(lndnorm(beta_st[0], bn[0], sqrt(Bn[0])));
314 } else {
315 density_beta[iter] = exp(lndmvn(beta_st, bn, Bn));
316 }
317 }
318 double pdf_beta = log(mean(density_beta));
319
320 //////////////////////
321 // rho/nu
322 //////////////////////
323 Matrix<> density_rho(nstore, 1);
324 for (int iter = 0; iter < nstore; iter++) {
325
326 rhostep = rho_slice_sampler(stream, Y, lambda, rho,
327 step_out, *g, *e, *f);
328 rho = rhostep[0];
329 double L = rhostep[3];
330 double R = rhostep[4];
331
332 // We're going to assume that the distribution is unimodal and we can use the slice width.
333 if (rho_st > L && rho_st < R) {
334 density_rho[iter] = 1/(R-L);
335 } else {
336 density_rho[iter] = 0.0;
337 }
338 }
339
340 double pdf_rho = log(mean(density_rho));
341
342
343 //////////////////////
344 // likelihood
345 //////////////////////
346 double loglike = 0;
347 for (int t = 0; t < n ; ++t){
348 int yt = (int) Y[t];
349 loglike += lngammafn(rho_st + yt) - lngammafn(rho_st) - lngammafn(yt + 1);
350 loglike += rho_st * log(rho_st) + yt*log(lambda[t]) - (rho_st + yt) * log(rho_st + lambda[t]);
351 }
352
353 //////////////////////
354 // prior
355 //////////////////////
356 double density_beta_prior;
357 double density_rho_prior;
358
359 if (k == 1) {
360 density_beta_prior = lndnorm(beta_st[0], b0[0], B0inv[0]);
361 } else {
362 density_beta_prior = lndmvn(beta_st, b0, B0inv);
363 }
364
365 density_rho_prior = (*e - 1) * log(rho_st) - (*e + *f) * log(rho_st + *g);
366
367 // compute marginal likelihood
368 const double logprior = density_beta_prior + density_rho_prior;
369 const double logmarglike = (loglike + logprior) - (pdf_beta + pdf_rho);
370 if (*verbose >0 ){
371 Rprintf("\nlogmarglike = %10.5f\n", logmarglike);
372 Rprintf("loglike = %10.5f\n", loglike);
373 Rprintf("log_prior = %10.5f\n", logprior);
374 Rprintf("log_beta = %10.5f\n", pdf_beta);
375 Rprintf("log_rho = %10.5f\n", pdf_rho);
376 }
377 logmarglikeholder[0] = logmarglike;
378 loglikeholder[0] = loglike;
379
380 }
381
382 R_CheckUserInterrupt();
383
384 for (int i = 0; i<(nstore*k); ++i){
385 betaout[i] = beta_store[i];
386 }
387 for (int i = 0; i<(nstore*n); ++i){
388 nuout[i] = nu_store[i];
389 tau1out[i] = tau1_store[i];
390 tau2out[i] = tau2_store[i];
391 comp1out[i] = component1_store[i];
392 comp2out[i] = component2_store[i];
393 sr1out[i] = sr1_store[i];
394 sr2out[i] = sr2_store[i];
395 mr1out[i] = mr1_store[i];
396 mr2out[i] = mr2_store[i];
397 }
398 for (int i = 0; i<nstore; ++i){
399 rhoout[i] = rho_store[i];
400 }
401 *rhosizes = step_out;
402 }
403
404 extern "C" {
cMCMCnegbin(double * betaout,double * nuout,double * rhoout,double * tau1out,double * tau2out,int * comp1out,int * comp2out,double * sr1out,double * sr2out,double * mr1out,double * mr2out,double * rhosizes,const double * Ydata,const int * Yrow,const int * Ycol,const double * Xdata,const int * Xrow,const int * Xcol,const int * burnin,const int * mcmc,const int * thin,const int * verbose,const double * betastart,const double * nustart,const double * rhostart,const double * tau1start,const double * tau2start,const double * component1start,const double * e,const double * f,const double * g,const double * rhostepdata,const int * uselecuyer,const int * seedarray,const int * lecuyerstream,const double * b0data,const double * B0data,double * logmarglikeholder,double * loglikeholder,const int * chib)405 void cMCMCnegbin(double *betaout,
406 double *nuout,
407 double *rhoout,
408 double *tau1out,
409 double *tau2out,
410 int *comp1out,
411 int *comp2out,
412 double *sr1out,
413 double *sr2out,
414 double *mr1out,
415 double *mr2out,
416 double *rhosizes,
417 const double *Ydata,
418 const int *Yrow,
419 const int *Ycol,
420 const double *Xdata,
421 const int *Xrow,
422 const int *Xcol,
423 const int *burnin,
424 const int *mcmc,
425 const int *thin,
426 const int *verbose,
427 const double *betastart,
428 const double *nustart,
429 const double *rhostart,
430 const double *tau1start,
431 const double *tau2start,
432 const double *component1start,
433 const double *e,
434 const double *f,
435 const double *g,
436 const double *rhostepdata,
437 const int* uselecuyer,
438 const int* seedarray,
439 const int* lecuyerstream,
440 const double *b0data,
441 const double *B0data,
442 double *logmarglikeholder,
443 double *loglikeholder,
444 const int *chib){
445
446 MCMCPACK_PASSRNG2MODEL(MCMCnegbinReg_impl,
447 betaout, nuout, rhoout,
448 tau1out, tau2out, comp1out, comp2out,
449 sr1out, sr2out, mr1out, mr2out,
450 rhosizes,
451 Ydata, Yrow, Ycol,
452 Xdata, Xrow, Xcol,
453 burnin, mcmc, thin, verbose,
454 betastart, nustart, rhostart,
455 tau1start, tau2start, component1start,
456 e, f, g, rhostepdata,
457 b0data, B0data,
458 logmarglikeholder, loglikeholder, chib);
459 }//end MCMC
460 } // end extern "C"
461
462
463 #endif
464