1 //////////////////////////////////////////////////////////////////////////
2 // cMCMCnegbin.cc is C++ code to estimate a  Negative
3 // Binomial regression model
4 //
5 // Matthew Blackwell
6 // Department of Government
7 // Harvard University
8 // mblackwell@gov.harvard.edu
9 
10 // Written 05/19/2017
11 //////////////////////////////////////////////////////////////////////////
12 
13 #ifndef CMCMCNEGBIN_CC
14 #define CMCMCNEGBIN_CC
15 
16 #include<vector>
17 #include<algorithm>
18 
19 #include "MCMCrng.h"
20 #include "MCMCfcds.h"
21 #include "matrix.h"
22 #include "distributions.h"
23 #include "stat.h"
24 #include "la.h"
25 #include "ide.h"
26 #include "smath.h"
27 #include "rng.h"
28 #include "mersenne.h"
29 #include "lecuyer.h"
30 
31 #include "MCMCnbutil.h"
32 
33 #include <R.h>
34 #include <R_ext/Utils.h>
35 
36 using namespace std;
37 using namespace scythe;
38 
39 
40 ////////////////////////////////////////////
41 // MCMCnegbinReg implementation.
42 ////////////////////////////////////////////
43 template <typename RNGTYPE>
MCMCnegbinReg_impl(rng<RNGTYPE> & stream,double * betaout,double * nuout,double * rhoout,double * tau1out,double * tau2out,int * comp1out,int * comp2out,double * sr1out,double * sr2out,double * mr1out,double * mr2out,double * rhosizes,const double * Ydata,const int * Yrow,const int * Ycol,const double * Xdata,const int * Xrow,const int * Xcol,const int * burnin,const int * mcmc,const int * thin,const int * verbose,const double * betastart,const double * nustart,const double * rhostart,const double * tau1start,const double * tau2start,const double * component1start,const double * e,const double * f,const double * g,const double * rhostepdata,const double * b0data,const double * B0data,double * logmarglikeholder,double * loglikeholder,const int * chib)44 void MCMCnegbinReg_impl(rng<RNGTYPE>& stream,
45                         double *betaout,
46                         double *nuout,
47                         double *rhoout,
48                         double *tau1out,
49                         double *tau2out,
50                         int *comp1out,
51                         int *comp2out,
52                         double *sr1out,
53                         double *sr2out,
54                         double *mr1out,
55                         double *mr2out,
56                         double *rhosizes,
57                         const double *Ydata,
58                         const int *Yrow,
59                         const int *Ycol,
60                         const double *Xdata,
61                         const int *Xrow,
62                         const int *Xcol,
63                         const int *burnin,
64                         const int *mcmc,
65                         const int *thin,
66                         const int *verbose,
67                         const double *betastart,
68                         const double *nustart,
69                         const double *rhostart,
70                         const double *tau1start,
71                         const double *tau2start,
72                         const double *component1start,
73                         const double *e,
74                         const double *f,
75                         const double *g,
76                         const double *rhostepdata,
77                         const double *b0data,
78                         const double *B0data,
79                         double *logmarglikeholder,
80                         double *loglikeholder,
81                         const int *chib)
82 {
83   const Matrix <> Y(*Yrow, *Ycol, Ydata);
84   const Matrix <> X(*Xrow, *Xcol, Xdata);
85   const int tot_iter = *burnin + *mcmc;
86   const int nstore = *mcmc / *thin;
87   const int n = Y.rows();
88   const int k = X.cols();
89   const int max_comp = 10;
90   const Matrix <> b0(k, 1, b0data);
91   const Matrix <> B0(k, k, B0data);
92   const Matrix <> B0inv = invpd(B0);
93   Matrix<> wr1(max_comp, 1);
94   Matrix<> mr1(max_comp, 1);
95   Matrix<> sr1(max_comp, 1);
96   Matrix<> wr2(n, max_comp);
97   Matrix<> mr2(n, max_comp);
98   Matrix<> sr2(n, max_comp);
99   Matrix<> nr2(n, 1);
100 
101 
102   Matrix <> nu(n, 1, nustart);
103   double rho = *rhostart;
104   double step_out = *rhostepdata;
105   Matrix <> beta(k, 1, betastart);
106   Matrix <> tau1(n, 1, tau1start);
107   Matrix <> tau2(n, 1, tau2start);
108   Matrix <> component1(n, 1, component1start);
109   Matrix <> component2(n, 1);
110 
111   Matrix<> beta_store(nstore, k);
112   Matrix<int> component1_store(nstore, n);
113   Matrix<int> component2_store(nstore, n);
114   Matrix<> tau1_store(nstore, n);
115   Matrix<> tau2_store(nstore, n);
116   Matrix<> sr1_store(nstore, n);
117   Matrix<> sr2_store(nstore, n);
118   Matrix<> mr1_store(nstore, n);
119   Matrix<> mr2_store(nstore, n);
120   Matrix<> nu_store(nstore, n);
121   Matrix<> rho_store(nstore, 1);
122   Matrix<> rhostep;
123 
124   init_aux(stream, Y, wr1, mr1, sr1, wr2, mr2, sr2, nr2, component2);
125   int nplus = 0;
126   for (int t = 0; t < n; t++) {
127     int yt = (int) Y[t];
128     if (yt > 0) {
129       nplus = nplus + 1;
130     }
131   }
132 
133   Matrix<> Xplus(nplus, k);
134   int xp_count = 0;
135   for (int t = 0; t < nplus; t++) {
136     int yt = (int) Y[t];
137     Matrix<> xt = X(t, _);
138     if (yt > 0) {
139       Xplus(xp_count, _) = xt;
140       xp_count++;
141     }
142   }
143   //MCMC loop
144   int count = 0;
145   int debug = 0;
146   for (int iter = 0; iter < tot_iter; ++iter){
147     if (debug > 0) {
148       Rprintf("\niter: %i\n-----\n", iter);
149     }
150 
151     if (debug > 0) {
152       Rprintf("Sampling rho...\n");
153     }
154 
155 
156     //////////////////////
157     // 1. Sample rho
158     //////////////////////
159     Matrix<> lambda = exp(X * beta);
160     rhostep = rho_slice_sampler(stream, Y, lambda, rho, step_out, *g, *e, *f);
161     rho = rhostep[0];
162 
163 
164     if (debug > 0) {
165       Rprintf("Sampling nu...\n");
166     }
167     //////////////////////
168     // 2. Sample nu
169     //////////////////////
170     for (int t = 0; t < n; t++) {
171       int yt = (int) Y[t];
172       nu[t] = stream.rgamma(rho + yt, rho + lambda[t]);
173     }
174 
175     if (debug > 0) {
176       Rprintf("Sampling tau...\n");
177     }
178     //////////////////////
179     // 4. Sample tau
180     //////////////////////
181     Matrix<> TAUout;
182     for (int t = 0; t < n; t++) {
183       int yt = (int) Y[t];
184       double mut = exp(log(nu[t]) + log(lambda[t]));
185 
186       TAUout = tau_comp_sampler(stream, yt, mut, wr1, mr1, sr1,
187                                 wr2(t,_), mr2(t,_), sr2(t,_), nr2[t]);
188       tau1[t] = TAUout[0];
189       tau2[t] = TAUout[1];
190       component1[t] = TAUout[2];
191       component2[t] = TAUout[3];
192     }
193 
194 
195     if (debug > 0) {
196       Rprintf("Sampling beta...\n");
197     }
198     //////////////////////
199     // 2. Sample beta
200     //////////////////////
201     Matrix<> y_tilde(n,1);
202     Matrix<> Sigma_inv_sum(n, 1);
203     Matrix<> yp_tilde(n,1);
204     Matrix<> Sigma_plus_inv_sum(n, 1);
205     Matrix<> sr1_hold(n,1);
206     Matrix<> sr2_hold(n,1);
207     Matrix<> mr1_hold(n,1);
208     Matrix<> mr2_hold(n,1);
209     for (int t = 0; t<n ; ++t) {
210       int yt = (int) Y[t];
211       int comp1 = (int) component1[t];
212         if (yt > 0) {
213           int comp2 = (int) component2[t];
214           sr2_hold[t] = sr2(t, comp2 - 1);
215           mr2_hold[t] = mr2(t, comp2 - 1);
216           Sigma_plus_inv_sum[t] = 1/sqrt(sr2(t, comp2 - 1));
217           yp_tilde[t] = (-log(tau2[t]) - log(nu[t]) - mr2(t, comp2 - 1))/sqrt(sr2(t, comp2-1));
218 
219         }
220         sr1_hold[t] = sr1[comp1 - 1];
221         mr1_hold[t] = mr1[comp1 - 1];
222         Sigma_inv_sum[t] = 1/sqrt(sr1[comp1 - 1]);
223         y_tilde[t] = (-log(tau1[t]) - log(nu[t])  - mr1[comp1 - 1])/sqrt(sr1[comp1 - 1]);
224 
225     }
226     Matrix<> yjp = rbind(y_tilde, selif(yp_tilde, Y > 0));
227     Matrix<> Xjp = rbind(X, selif(X, Y > 0));
228     Matrix<> wip = rbind(Sigma_inv_sum, selif(Sigma_plus_inv_sum, Y > 0));
229     Matrix<> Xwj(Xjp.rows(), k);
230     for (unsigned int h = 0; h<Xjp.rows(); ++h){
231       Xwj(h, _) = Xjp(h,_)*wip[h];
232     }
233     Matrix<> Bn = invpd(B0 + ::t(Xwj)*Xwj);
234     Matrix<> bn = Bn*gaxpy(B0, b0, ::t(Xwj)*yjp);
235     if (k == 1) {
236       beta[0] = stream.rnorm(bn[0], sqrt(Bn[0]));
237     } else {
238       beta = stream.rmvnorm(bn, Bn);
239     }
240 
241     if (debug > 0) {
242       Rprintf("Sampling P...\n");
243     }
244 
245 
246     if (iter >= *burnin && ((iter % *thin)==0)){
247       beta_store(count,_) = ::t(beta);
248       tau1_store(count,_) = tau1;
249       tau2_store(count,_) = tau2;
250       component1_store(count,_) = component1;
251       component2_store(count,_) = component2;
252       sr1_store(count,_) = sr1_hold;
253       sr2_store(count,_) = sr2_hold;
254       mr1_store(count,_) = mr1_hold;
255       mr2_store(count,_) = mr2_hold;
256       nu_store(count,_) = nu;
257       rho_store(count, _) = rho;
258       ++count;
259     }
260 
261     if(*verbose > 0 && iter % *verbose == 0){
262       Rprintf("\n\n MCMCnegbin iteration %i of %i \n", (iter+1), tot_iter);
263       Rprintf("rho is %10.5f\n", rho);
264       for (int j = 0; j<k; ++j){
265         Rprintf("beta(%i) is %10.5f\n", j+1, beta[j]);
266       }
267     }
268 
269   }// end MCMC loop
270 
271 
272 
273   //////////////////////////////////////
274   if (*chib == 1) {
275   //////////////////////////////////////
276     if(*verbose > 0){
277       Rprintf("\nCalculating marginal likelihood...\n");
278     }
279     Matrix<> beta_st = ::t(meanc(beta_store));
280     Matrix<> lambda = exp(X * beta_st);
281     double rho_st = exp(mean(log(rho_store)));
282 
283     //////////////////////
284     // beta
285     //////////////////////
286     Matrix<> density_beta(nstore, 1);
287     for (int iter = 0; iter<nstore; ++iter){
288       Matrix<> y_tilde(n,1);
289       Matrix<> Sigma_inv_sum(n, 1);
290       Matrix<> yp_tilde(n,1);
291       Matrix<> Sigma_plus_inv_sum(n, 1);
292       for (int t = 0; t<n ; ++t) {
293         int yt = (int) Y[t];
294         int comp1 = (int) component1_store(iter, t);
295         if (yt > 0) {
296           int comp2 = (int) component2_store(iter, t);
297           Sigma_plus_inv_sum[t] = 1/sqrt(sr2(t, comp2 - 1));
298           yp_tilde[t] = (-log(tau2_store(iter,t)) - log(nu_store(iter,t)) - mr2(t, comp2 - 1))/sqrt(sr2(t, comp2-1));
299         }
300         Sigma_inv_sum[t] = 1/sqrt(sr1[comp1 - 1]);
301         y_tilde[t] = (-log(tau1_store(iter,t)) - log(nu_store(iter, t))  - mr1[comp1 - 1])/sqrt(sr1[comp1 - 1]);
302       }
303       Matrix<> yjp = rbind(y_tilde, selif(yp_tilde, Y > 0));
304       Matrix<> Xjp = rbind(X, selif(X, Y > 0));
305       Matrix<> wip = rbind(Sigma_inv_sum, selif(Sigma_plus_inv_sum, Y > 0));
306       Matrix<> Xwj(Xjp.rows(), k);
307       for (unsigned int h = 0; h<Xjp.rows(); ++h){
308         Xwj(h, _) = Xjp(h,_)*wip[h];
309       }
310       Matrix<> Bn = invpd(B0 + ::t(Xwj)*Xwj);
311       Matrix<> bn = Bn*gaxpy(B0, b0, ::t(Xwj)*yjp);
312       if (k == 1) {
313         density_beta[iter] = exp(lndnorm(beta_st[0], bn[0], sqrt(Bn[0])));
314       } else {
315         density_beta[iter] = exp(lndmvn(beta_st, bn, Bn));
316       }
317     }
318     double pdf_beta = log(mean(density_beta));
319 
320     //////////////////////
321     // rho/nu
322     //////////////////////
323     Matrix<> density_rho(nstore, 1);
324     for (int iter = 0; iter < nstore; iter++) {
325 
326       rhostep = rho_slice_sampler(stream, Y, lambda, rho,
327                                   step_out, *g, *e, *f);
328       rho = rhostep[0];
329       double L = rhostep[3];
330       double R = rhostep[4];
331 
332       // We're going to assume that the distribution is unimodal and we can use the slice width.
333       if (rho_st > L && rho_st < R) {
334         density_rho[iter] = 1/(R-L);
335       } else {
336         density_rho[iter] = 0.0;
337       }
338     }
339 
340     double pdf_rho = log(mean(density_rho));
341 
342 
343     //////////////////////
344     // likelihood
345     //////////////////////
346     double loglike = 0;
347     for (int t = 0; t < n ; ++t){
348       int yt = (int) Y[t];
349       loglike += lngammafn(rho_st + yt) - lngammafn(rho_st) - lngammafn(yt + 1);
350       loglike += rho_st * log(rho_st) + yt*log(lambda[t]) - (rho_st + yt) * log(rho_st + lambda[t]);
351     }
352 
353     //////////////////////
354     // prior
355     //////////////////////
356     double density_beta_prior;
357     double density_rho_prior;
358 
359     if (k == 1) {
360       density_beta_prior = lndnorm(beta_st[0], b0[0], B0inv[0]);
361     } else {
362       density_beta_prior = lndmvn(beta_st, b0, B0inv);
363     }
364 
365     density_rho_prior = (*e - 1) * log(rho_st) - (*e + *f) * log(rho_st + *g);
366 
367     // compute marginal likelihood
368     const double logprior = density_beta_prior + density_rho_prior;
369     const double logmarglike = (loglike + logprior) - (pdf_beta + pdf_rho);
370     if (*verbose >0 ){
371       Rprintf("\nlogmarglike = %10.5f\n", logmarglike);
372       Rprintf("loglike = %10.5f\n", loglike);
373       Rprintf("log_prior = %10.5f\n", logprior);
374       Rprintf("log_beta = %10.5f\n", pdf_beta);
375       Rprintf("log_rho = %10.5f\n", pdf_rho);
376     }
377     logmarglikeholder[0] = logmarglike;
378     loglikeholder[0] = loglike;
379 
380   }
381 
382   R_CheckUserInterrupt();
383 
384   for (int i = 0; i<(nstore*k); ++i){
385     betaout[i] = beta_store[i];
386   }
387   for (int i = 0; i<(nstore*n); ++i){
388     nuout[i] = nu_store[i];
389     tau1out[i] = tau1_store[i];
390     tau2out[i] = tau2_store[i];
391     comp1out[i] = component1_store[i];
392     comp2out[i] = component2_store[i];
393     sr1out[i] = sr1_store[i];
394     sr2out[i] = sr2_store[i];
395     mr1out[i] = mr1_store[i];
396     mr2out[i] = mr2_store[i];
397   }
398   for (int i = 0; i<nstore; ++i){
399     rhoout[i] = rho_store[i];
400   }
401   *rhosizes = step_out;
402 }
403 
404 extern "C" {
cMCMCnegbin(double * betaout,double * nuout,double * rhoout,double * tau1out,double * tau2out,int * comp1out,int * comp2out,double * sr1out,double * sr2out,double * mr1out,double * mr2out,double * rhosizes,const double * Ydata,const int * Yrow,const int * Ycol,const double * Xdata,const int * Xrow,const int * Xcol,const int * burnin,const int * mcmc,const int * thin,const int * verbose,const double * betastart,const double * nustart,const double * rhostart,const double * tau1start,const double * tau2start,const double * component1start,const double * e,const double * f,const double * g,const double * rhostepdata,const int * uselecuyer,const int * seedarray,const int * lecuyerstream,const double * b0data,const double * B0data,double * logmarglikeholder,double * loglikeholder,const int * chib)405   void cMCMCnegbin(double *betaout,
406                    double *nuout,
407                    double *rhoout,
408                    double *tau1out,
409                    double *tau2out,
410                    int *comp1out,
411                    int *comp2out,
412                    double *sr1out,
413                    double *sr2out,
414                    double *mr1out,
415                    double *mr2out,
416                    double *rhosizes,
417                    const double *Ydata,
418                    const int *Yrow,
419                    const int *Ycol,
420                    const double *Xdata,
421                    const int *Xrow,
422                    const int *Xcol,
423                    const int *burnin,
424                    const int *mcmc,
425                    const int *thin,
426                    const int *verbose,
427                    const double *betastart,
428                    const double *nustart,
429                    const double *rhostart,
430                    const double *tau1start,
431                    const double *tau2start,
432                    const double *component1start,
433                    const double *e,
434                    const double *f,
435                    const double *g,
436                    const double *rhostepdata,
437                    const int* uselecuyer,
438                    const int* seedarray,
439                    const int* lecuyerstream,
440                    const double *b0data,
441                    const double *B0data,
442                    double *logmarglikeholder,
443                    double *loglikeholder,
444                    const int *chib){
445 
446     MCMCPACK_PASSRNG2MODEL(MCMCnegbinReg_impl,
447                            betaout, nuout, rhoout,
448                            tau1out, tau2out, comp1out, comp2out,
449                            sr1out, sr2out, mr1out, mr2out,
450                            rhosizes,
451                            Ydata, Yrow, Ycol,
452                            Xdata, Xrow, Xcol,
453                            burnin, mcmc, thin, verbose,
454                            betastart, nustart, rhostart,
455                            tau1start, tau2start, component1start,
456                            e, f, g, rhostepdata,
457                            b0data, B0data,
458                            logmarglikeholder, loglikeholder, chib);
459   }//end MCMC
460 } // end extern "C"
461 
462 
463 #endif
464