1 /******************************************************************
2   This file is a part of eco: R Package for Estimating Fitting
3   Bayesian Models of Ecological Inference for 2X2 tables
4   by Kosuke Imai, Ying Lu, and Aaron Strauss
5   Copyright: GPL version 2 or later.
6 *******************************************************************/
7 
8 #include <stddef.h>
9 #include <stdio.h>
10 #include <math.h>
11 #include <Rmath.h>
12 #include <R.h>
13 #include <Rinternals.h>
14 #include <R_ext/Utils.h>
15 #include <R_ext/PrtUtil.h>
16 #include "vector.h"
17 #include "subroutines.h"
18 #include "rand.h"
19 #include "sample.h"
20 #include "bayes.h"
21 #include "macros.h"
22 #include "fintegrate.h"
23 //#include  <gsl/gsl_integration.h>
24 
25 /**
26  * Bivariate normal distribution, with parameterization
27  * see: http://mathworld.wolfram.com/BivariateNormalDistribution.html
28  * see for param: http://www.math.uconn.edu/~binns/reviewII210.pdf
29  */
NormConstT(double * t,int n,void * param)30 void NormConstT(double *t, int n, void *param)
31 {
32   int ii;
33   int dim=2;
34   double *mu=doubleArray(dim);
35   double **Sigma=doubleMatrix(dim,dim);
36   double *W1,*W1p,*W2,*W2p;
37   double X, Y, rho;
38   double dtemp, inp, pfact;
39   int imposs;
40 
41   W1 = doubleArray(n);
42   W1p = doubleArray(n);
43   W2 = doubleArray(n);
44   W2p = doubleArray(n);
45 
46   Param *pp=(Param *)param;
47   mu[0]= pp->caseP.mu[0];
48   mu[1]= pp->caseP.mu[1];
49   Sigma[0][0]=pp->setP->Sigma[0][0];
50   Sigma[1][1]=pp->setP->Sigma[1][1];
51   Sigma[0][1]=pp->setP->Sigma[0][1];
52   Sigma[1][0]=pp->setP->Sigma[1][0];
53   rho=Sigma[0][1]/sqrt(Sigma[0][0]*Sigma[1][1]);
54   //Rprintf("TESTING: %4g %4g %4g %4g", pp->caseP.mu[0], pp->caseP.mu[1], pp->setP->Sigma[0][0],pp->setP->Sigma[0][1]);
55   X=pp->caseP.X;
56   Y=pp->caseP.Y;
57   imposs=0;
58 
59   dtemp=1/(2*M_PI*sqrt(Sigma[0][0]*Sigma[1][1]*(1-rho*rho)));
60 
61   for (ii=0; ii<n; ii++) {
62     imposs=0; inp=t[ii];
63     W1[ii]=getW1starFromT(t[ii],pp,&imposs);
64     if (!imposs) W2[ii]=getW2starFromT(t[ii],pp,&imposs);
65     if (imposs==1) t[ii]=0;
66     else {
67         W1p[ii]=getW1starPrimeFromT(t[ii],pp);
68         W2p[ii]=getW2starPrimeFromT(t[ii],pp);
69         pfact=sqrt(W1p[ii]*W1p[ii]+W2p[ii]*W2p[ii]);
70         t[ii]=exp(-1/(2*(1-rho*rho))*
71               ((W1[ii]-mu[0])*(W1[ii]-mu[0])/Sigma[0][0]+
72                (W2[ii]-mu[1])*(W2[ii]-mu[1])/Sigma[1][1]-
73                 2*rho*(W1[ii]-mu[0])*(W2[ii]-mu[1])
74               /sqrt(Sigma[0][0]*Sigma[1][1])))*dtemp*pfact;
75   //if (pp->setP->weirdness)
76    //   Rprintf("Normc... %d %d %5g -> %5g %5g => %5g with %5g imposs %d\n", ii, n, inp, W1[ii], W2[ii],t[ii],pfact,imposs);
77       //char ch;
78       //scanf(" %c", &ch );
79     }
80   }
81   Free(W1);
82   Free(W1p);
83   Free(W2);
84   Free(W2p);
85   Free(mu);
86   FreeMatrix(Sigma,dim);
87 }
88 
89 /**
90  * Integrand for computing sufficient statistic
91  * Which statistic to estimate depends on param->suff (see macros.h)
92  */
SuffExp(double * t,int n,void * param)93 void SuffExp(double *t, int n, void *param)
94 {
95   int ii,imposs,i,j;
96   sufficient_stat suff;
97   Param *pp=(Param *)param;
98   int dim = (pp->setP->ncar==1) ? 3 : 2;
99   double *mu=doubleArray(dim);
100   double **Sigma=doubleMatrix(dim,dim);
101   double **InvSigma=doubleMatrix(dim,dim);/* inverse covariance matrix*/
102   //double Sigma[dim][dim];
103   //double InvSigma[dim][dim];
104   double *W1,*W1p,*W2,*W2p,*vtemp;
105   double inp,density,pfact,normc;
106 
107   vtemp=doubleArray(dim);
108   W1 = doubleArray(n);
109   W1p = doubleArray(n);
110   W2 = doubleArray(n);
111   W2p = doubleArray(n);
112   mu[0]= pp->caseP.mu[0];
113   mu[1]= pp->caseP.mu[1];
114   for(i=0;i<dim;i++) {
115     for(j=0;j<dim;j++) {
116       if (dim==3) {
117         Sigma[i][j]=pp->setP->Sigma3[i][j];
118         InvSigma[i][j]=pp->setP->InvSigma3[i][j];
119       }
120       else {
121         Sigma[i][j]=pp->setP->Sigma[i][j];
122         InvSigma[i][j]=pp->setP->InvSigma[i][j];
123       }
124     }
125   }
126   normc=pp->caseP.normcT;
127   suff=pp->caseP.suff;
128   imposs=0;
129 
130   for (ii=0; ii<n; ii++) {
131     imposs=0; inp=t[ii];
132     W1[ii]=getW1starFromT(t[ii],pp,&imposs);
133     if (!imposs) W2[ii]=getW2starFromT(t[ii],pp,&imposs);
134     if (imposs==1) t[ii]=0;
135     else {
136       W1p[ii]=getW1starPrimeFromT(t[ii],pp);
137       W2p[ii]=getW2starPrimeFromT(t[ii],pp);
138       pfact=sqrt(W1p[ii]*W1p[ii]+W2p[ii]*W2p[ii]);
139       vtemp[0] = W1[ii];
140       vtemp[1] = W2[ii];
141       density=dBVNtomo(vtemp, pp, 0,normc);
142       t[ii] = density*pfact;
143       if (suff==SS_W1star) t[ii]=W1[ii]*t[ii];
144       else if (suff==SS_W2star) t[ii]=W2[ii]*t[ii];
145       else if (suff==SS_W1star2) t[ii]=W1[ii]*W1[ii]*t[ii];
146       else if (suff==SS_W1W2star) t[ii]=W1[ii]*W2[ii]*t[ii];
147       else if (suff==SS_W2star2) t[ii]=W2[ii]*W2[ii]*t[ii];
148       else if (suff==SS_W1) t[ii]=invLogit(W1[ii])*t[ii];
149       else if (suff==SS_W2) t[ii]=invLogit(W2[ii])*t[ii];
150       else if (suff==SS_Loglik) {
151         if (dim == 3) {
152           //if(pp->setP->verbose>=2 && dim==3) Rprintf("InvSigma loglik: %5g %5g %5g %5g %5g %5g\n",InvSigma[0][0],InvSigma[0][1],InvSigma[1][0],InvSigma[1][1],InvSigma[1][2],InvSigma[2][2]);
153           vtemp[2]=logit(pp->caseP.X,"log-likelihood");
154           mu[0]=pp->setP->pdTheta[1];
155           mu[1]=pp->setP->pdTheta[2];
156           mu[2]=pp->setP->pdTheta[0];
157         }
158         t[ii]=dMVN(vtemp,mu,InvSigma,dim,0)*pfact;
159         //t[ii]=dMVN3(vtemp,mu,(double*)(&(InvSigma[0][0])),dim,0)*pfact;
160       }
161       else if (suff!=SS_Test) Rprintf("Error Suff= %d",suff);
162     }
163   }
164   Free(W1);Free(W1p);Free(W2);Free(W2p);Free(mu);Free(vtemp);
165   FreeMatrix(Sigma,dim); FreeMatrix(InvSigma,dim);
166 }
167 
168 
169 /**
170  * Returns the log likelihood of a particular case (i.e, record, datapoint)
171  */
getLogLikelihood(Param * param)172 double getLogLikelihood(Param* param) {
173   if (param->caseP.dataType==DPT_General  && !(param->caseP.Y>=.990 || param->caseP.Y<=.010)) {
174     //non-survey data: do integration to find likelihood
175     param->caseP.suff=SS_Loglik;
176     return log(paramIntegration(&SuffExp,(void*)param));
177 
178 
179   } else if (param->caseP.dataType==DPT_Homog_X1 || param->caseP.dataType==DPT_Homog_X0) {
180       //Homogenenous data: just do normal likelihood on one dimension
181       double lik,sigma2,val,mu;
182       val = (param->caseP.dataType==DPT_Homog_X1) ? param->caseP.Wstar[0] : param->caseP.Wstar[1];
183       if (!param->setP->ncar) {
184         mu = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[0] : param->setP->pdTheta[1];
185         sigma2 = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[2] : param->setP->pdTheta[3];
186       } else {
187         mu = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[1] : param->setP->pdTheta[2];
188         sigma2 = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[4] : param->setP->pdTheta[5];
189       }
190       lik=(1/(sqrt(2*M_PI*sigma2)))*exp(-(.5/sigma2)*(val - mu)*(val - mu));
191       //return log(lik);
192       return 0; //fix later
193 
194   } else if (param->caseP.dataType==DPT_Survey || (param->caseP.Y>=.990 || param->caseP.Y<=.010)) {
195     //Survey data (or v tight bounds): multi-variate normal
196     int dim=param->setP->ncar ? 3 : 2;
197     double *mu=doubleArray(dim);
198     double *vtemp=doubleArray(dim);
199     double **InvSig=doubleMatrix(dim,dim);/* inverse covariance matrix*/
200     int i,j;
201     for(i=0;i<dim;i++) {
202       for(j=0;j<dim;j++) {
203         if (dim==3) {
204           InvSig[i][j]=param->setP->InvSigma3[i][j];
205         }
206         else {
207           InvSig[i][j]=param->setP->InvSigma[i][j];
208         }
209       }
210     }
211     double loglik;
212     vtemp[0] = param->caseP.Wstar[0];
213     vtemp[1] = param->caseP.Wstar[1];
214     mu[0]= param->caseP.mu[0];
215     mu[1]= param->caseP.mu[1];
216     if (param->setP->ncar) {
217       vtemp[2]=logit(param->caseP.X,"log-likelihood survey");
218       mu[0]=param->setP->pdTheta[1];
219       mu[1]=param->setP->pdTheta[2];
220       mu[2]=param->setP->pdTheta[0];
221       loglik=dMVN(vtemp,mu,InvSig,dim,1);
222     }
223     else {
224       loglik=dMVN(vtemp,mu,InvSig,dim,1);
225     }
226     Free(mu); Free(vtemp); FreeMatrix(InvSig,dim);
227     return loglik;
228   }
229   else { //Unknown type
230     Rprintf("Error; unkown type: %d\n",param->caseP.dataType);
231     return 0;
232   }
233 }
234 
235 /**
236  **********
237  * Line integral helper function
238  **********
239  */
240 
241 /**
242  * Returns W2star from W1star, given the following equalities
243  * Y=XW1 + (1-X)W2 and the Wi-star=logit(Wi)
244  * mutation: imposs is set to 1 if the equation cannot be satisfied
245  */
getW2starFromW1star(double X,double Y,double W1star,int * imposs)246 double getW2starFromW1star(double X, double Y, double W1star, int* imposs) {
247   double W1;
248   if (W1star>30) W1=1; //prevent overflow or underflow
249   else W1=1/(1+exp(-1*W1star));
250   double W2=Y/(1-X)-X*W1/(1-X);
251 
252   if(W2>=1 || W2<=0) *imposs=1; //impossible pair of values
253   else W2=log(W2/(1-W2));
254   return W2;
255 }
256 
257 /**
258  * Returns W1star from W2star, given the following equalities
259  * Y=XW1 + (1-X)W2 and the Wi-star=logit(Wi)
260  * mutation: imposs is set to 1 if the equation cannot be satisfied
261  */
getW1starFromW2star(double X,double Y,double W2star,int * imposs)262 double getW1starFromW2star(double X, double Y, double W2star, int* imposs) {
263   double W2;
264   if (W2star>30) W2=1; //prevent overflow or underflow
265   else W2=1/(1+exp(-1*W2star));
266   double W1=(Y-(1-X)*W2)/X;
267 
268   if(W1>=1 || W1<=0) *imposs=1; //impossible pair of values
269   else W1=log(W1/(1-W1));
270   return W1;
271 }
272 
273 /**
274  * Returns W1 from W2, X, and Y given
275  * Y=XW1 + (1-X)W2
276  */
getW1FromW2(double X,double Y,double W2)277 double getW1FromW2(double X, double Y, double W2) {
278   return (Y-(1-X)*W2)/X;
279 }
280 
281 
282  /**
283  * W1star(t)
284  * W1(t)=(W1_ub - W1_lb)*t + W1_lb
285  * mutates impossible to true if W1 is non-finite at t
286  */
getW1starFromT(double t,Param * param,int * imposs)287 double getW1starFromT(double t, Param* param, int* imposs) {
288   double W1=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0])*t + param->caseP.Wbounds[0][0];
289   if (W1==1 || W1==0) *imposs=1;
290   else W1=log(W1/(1-W1));
291   return W1;
292 }
293 
294 /**
295  * W2star(t)
296  * W2(t)=(W2_lb - W2_ub)*t + W2_lb
297  */
getW2starFromT(double t,Param * param,int * imposs)298 double getW2starFromT(double t, Param* param, int* imposs) {
299   double W2=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1])*t + param->caseP.Wbounds[1][1];
300   if (W2==1 || W2==0) *imposs=1;
301   else W2=log(W2/(1-W2));
302   return W2;
303 }
304 
305 /**
306  * W1star'(t)
307  * see paper for derivation: W1*(t) = (1/W1)*((w1_ub - w1_lb)/(1-W1)
308  */
getW1starPrimeFromT(double t,Param * param)309 double getW1starPrimeFromT(double t, Param* param) {
310   double m=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0]);
311   double W1=m*t + param->caseP.Wbounds[0][0];
312   W1=(1/W1)*(m/(1-W1));
313   return W1;
314 }
315 
316 /**
317  * W2star'(t)
318  * see paper for derivation: W2*(t) = (1/W2)*((w2_lb - w2_ub)/(1-W2)
319  */
getW2starPrimeFromT(double t,Param * param)320 double getW2starPrimeFromT(double t, Param* param) {
321   double m=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1]);
322   double W2=m*t + param->caseP.Wbounds[1][1];
323   W2=(1/W2)*(m/(1-W2));
324   return W2;
325 }
326 
327 /**
328  * parameterized line integration
329  * lower bound is t=0, upper bound is t=1
330  */
paramIntegration(integr_fn f,void * ex)331 double paramIntegration(integr_fn f, void *ex) {
332   double epsabs=pow(10,-11), epsrel=pow(10,-11);
333   double result=9999, anserr=9999;
334   int limit=100;
335   int last, neval, ier;
336   int lenw=5*limit;
337   int *iwork=(int *) Calloc(limit, int);
338   double *work=(double *)Calloc(lenw, double);
339   double lb=0.00001; double ub=.99999;
340   Rdqags(f, ex, &lb, &ub, &epsabs, &epsrel, &result,
341     &anserr, &neval, &ier, &limit, &lenw, &last, iwork, work);
342 
343   Free(iwork);
344   Free(work);
345   if (ier==0) return result;
346   else {
347     Param* p = (Param*) ex;
348     Rprintf("Integration error %d: Sf %d X %5g Y %5g [%5g,%5g] -> %5g +- %5g\n",ier,p->caseP.suff,p->caseP.X,p->caseP.Y,p->caseP.Wbounds[0][0],p->caseP.Wbounds[0][1],result,anserr);
349     char ch;
350     scanf("Hit enter to continue %c", &ch );
351     return result;
352   }
353 
354 }
355 
356 /**
357  * integrate normalizing constant and set it in param
358  */
setNormConst(Param * param)359 void setNormConst(Param* param) {
360   param->caseP.normcT=paramIntegration(&NormConstT,(void*)param);
361 }
362 
363 
364 /**
365  * Set the bounds on W1 and W2 in their parameter
366  */
setBounds(Param * param)367 void setBounds(Param* param) {
368   double X,Y,w1_lb,w1_ub,w2_lb,w2_ub;
369   //int w1_inf,w2_inf;
370   double tol0=0.0001;
371   double tol1=0.9999;
372   X=param->caseP.X;
373   Y=param->caseP.Y;
374 
375   //find bounds for W1
376   w1_ub=(Y-(1-X)*0)/X; //W2=0
377   if (w1_ub>tol1) w1_ub=1;
378   w1_lb=(Y-(1-X)*1)/X; //W2=1
379   if (w1_lb<tol0) w1_lb=0;
380 
381   //find bounds for W2
382   w2_ub=Y/(1-X)-X*0/(1-X); //W1=0
383   if (w2_ub>tol1) w2_ub=1;
384   w2_lb=Y/(1-X)-X*1/(1-X); //W1=1
385   if (w2_lb<tol0) w2_lb=0;
386 
387 
388   /*
389   if (w1_lb==0 && w1_ub==1) w1_inf=2;
390   else if (w1_lb==0) w1_inf=-1;
391   else if (w1_ub==1) w1_inf=1;
392   else w1_inf=0;
393   //w1_lb=log(w1_lb/(1-w1_lb));
394   //w1_ub=log(w1_ub/(1-w1_ub));
395 
396   if (w2_lb==0 && w2_ub==1) w2_inf=2;
397   else if (w2_lb==0) w2_inf=-1;
398   else if (w2_ub==1) w2_inf=1;
399   else w2_inf=0;
400   //w2_lb=log(w2_lb/(1-w2_lb));
401   //w2_ub=log(w2_ub/(1-w2_ub));
402   */
403   param->caseP.Wbounds[0][0]=w1_lb;
404   param->caseP.Wbounds[0][1]=w1_ub;
405   param->caseP.Wbounds[1][0]=w2_lb;
406   param->caseP.Wbounds[1][1]=w2_ub;
407   //param->W1_inf=w1_inf;
408   //param->W2_inf=w2_inf;
409 
410 }
411