1 /******************************************************************
2 This file is a part of eco: R Package for Estimating Fitting
3 Bayesian Models of Ecological Inference for 2X2 tables
4 by Kosuke Imai, Ying Lu, and Aaron Strauss
5 Copyright: GPL version 2 or later.
6 *******************************************************************/
7
8 #include <stddef.h>
9 #include <stdio.h>
10 #include <math.h>
11 #include <Rmath.h>
12 #include <R.h>
13 #include <Rinternals.h>
14 #include <R_ext/Utils.h>
15 #include <R_ext/PrtUtil.h>
16 #include "vector.h"
17 #include "subroutines.h"
18 #include "rand.h"
19 #include "sample.h"
20 #include "bayes.h"
21 #include "macros.h"
22 #include "fintegrate.h"
23 //#include <gsl/gsl_integration.h>
24
25 /**
26 * Bivariate normal distribution, with parameterization
27 * see: http://mathworld.wolfram.com/BivariateNormalDistribution.html
28 * see for param: http://www.math.uconn.edu/~binns/reviewII210.pdf
29 */
NormConstT(double * t,int n,void * param)30 void NormConstT(double *t, int n, void *param)
31 {
32 int ii;
33 int dim=2;
34 double *mu=doubleArray(dim);
35 double **Sigma=doubleMatrix(dim,dim);
36 double *W1,*W1p,*W2,*W2p;
37 double X, Y, rho;
38 double dtemp, inp, pfact;
39 int imposs;
40
41 W1 = doubleArray(n);
42 W1p = doubleArray(n);
43 W2 = doubleArray(n);
44 W2p = doubleArray(n);
45
46 Param *pp=(Param *)param;
47 mu[0]= pp->caseP.mu[0];
48 mu[1]= pp->caseP.mu[1];
49 Sigma[0][0]=pp->setP->Sigma[0][0];
50 Sigma[1][1]=pp->setP->Sigma[1][1];
51 Sigma[0][1]=pp->setP->Sigma[0][1];
52 Sigma[1][0]=pp->setP->Sigma[1][0];
53 rho=Sigma[0][1]/sqrt(Sigma[0][0]*Sigma[1][1]);
54 //Rprintf("TESTING: %4g %4g %4g %4g", pp->caseP.mu[0], pp->caseP.mu[1], pp->setP->Sigma[0][0],pp->setP->Sigma[0][1]);
55 X=pp->caseP.X;
56 Y=pp->caseP.Y;
57 imposs=0;
58
59 dtemp=1/(2*M_PI*sqrt(Sigma[0][0]*Sigma[1][1]*(1-rho*rho)));
60
61 for (ii=0; ii<n; ii++) {
62 imposs=0; inp=t[ii];
63 W1[ii]=getW1starFromT(t[ii],pp,&imposs);
64 if (!imposs) W2[ii]=getW2starFromT(t[ii],pp,&imposs);
65 if (imposs==1) t[ii]=0;
66 else {
67 W1p[ii]=getW1starPrimeFromT(t[ii],pp);
68 W2p[ii]=getW2starPrimeFromT(t[ii],pp);
69 pfact=sqrt(W1p[ii]*W1p[ii]+W2p[ii]*W2p[ii]);
70 t[ii]=exp(-1/(2*(1-rho*rho))*
71 ((W1[ii]-mu[0])*(W1[ii]-mu[0])/Sigma[0][0]+
72 (W2[ii]-mu[1])*(W2[ii]-mu[1])/Sigma[1][1]-
73 2*rho*(W1[ii]-mu[0])*(W2[ii]-mu[1])
74 /sqrt(Sigma[0][0]*Sigma[1][1])))*dtemp*pfact;
75 //if (pp->setP->weirdness)
76 // Rprintf("Normc... %d %d %5g -> %5g %5g => %5g with %5g imposs %d\n", ii, n, inp, W1[ii], W2[ii],t[ii],pfact,imposs);
77 //char ch;
78 //scanf(" %c", &ch );
79 }
80 }
81 Free(W1);
82 Free(W1p);
83 Free(W2);
84 Free(W2p);
85 Free(mu);
86 FreeMatrix(Sigma,dim);
87 }
88
89 /**
90 * Integrand for computing sufficient statistic
91 * Which statistic to estimate depends on param->suff (see macros.h)
92 */
SuffExp(double * t,int n,void * param)93 void SuffExp(double *t, int n, void *param)
94 {
95 int ii,imposs,i,j;
96 sufficient_stat suff;
97 Param *pp=(Param *)param;
98 int dim = (pp->setP->ncar==1) ? 3 : 2;
99 double *mu=doubleArray(dim);
100 double **Sigma=doubleMatrix(dim,dim);
101 double **InvSigma=doubleMatrix(dim,dim);/* inverse covariance matrix*/
102 //double Sigma[dim][dim];
103 //double InvSigma[dim][dim];
104 double *W1,*W1p,*W2,*W2p,*vtemp;
105 double inp,density,pfact,normc;
106
107 vtemp=doubleArray(dim);
108 W1 = doubleArray(n);
109 W1p = doubleArray(n);
110 W2 = doubleArray(n);
111 W2p = doubleArray(n);
112 mu[0]= pp->caseP.mu[0];
113 mu[1]= pp->caseP.mu[1];
114 for(i=0;i<dim;i++) {
115 for(j=0;j<dim;j++) {
116 if (dim==3) {
117 Sigma[i][j]=pp->setP->Sigma3[i][j];
118 InvSigma[i][j]=pp->setP->InvSigma3[i][j];
119 }
120 else {
121 Sigma[i][j]=pp->setP->Sigma[i][j];
122 InvSigma[i][j]=pp->setP->InvSigma[i][j];
123 }
124 }
125 }
126 normc=pp->caseP.normcT;
127 suff=pp->caseP.suff;
128 imposs=0;
129
130 for (ii=0; ii<n; ii++) {
131 imposs=0; inp=t[ii];
132 W1[ii]=getW1starFromT(t[ii],pp,&imposs);
133 if (!imposs) W2[ii]=getW2starFromT(t[ii],pp,&imposs);
134 if (imposs==1) t[ii]=0;
135 else {
136 W1p[ii]=getW1starPrimeFromT(t[ii],pp);
137 W2p[ii]=getW2starPrimeFromT(t[ii],pp);
138 pfact=sqrt(W1p[ii]*W1p[ii]+W2p[ii]*W2p[ii]);
139 vtemp[0] = W1[ii];
140 vtemp[1] = W2[ii];
141 density=dBVNtomo(vtemp, pp, 0,normc);
142 t[ii] = density*pfact;
143 if (suff==SS_W1star) t[ii]=W1[ii]*t[ii];
144 else if (suff==SS_W2star) t[ii]=W2[ii]*t[ii];
145 else if (suff==SS_W1star2) t[ii]=W1[ii]*W1[ii]*t[ii];
146 else if (suff==SS_W1W2star) t[ii]=W1[ii]*W2[ii]*t[ii];
147 else if (suff==SS_W2star2) t[ii]=W2[ii]*W2[ii]*t[ii];
148 else if (suff==SS_W1) t[ii]=invLogit(W1[ii])*t[ii];
149 else if (suff==SS_W2) t[ii]=invLogit(W2[ii])*t[ii];
150 else if (suff==SS_Loglik) {
151 if (dim == 3) {
152 //if(pp->setP->verbose>=2 && dim==3) Rprintf("InvSigma loglik: %5g %5g %5g %5g %5g %5g\n",InvSigma[0][0],InvSigma[0][1],InvSigma[1][0],InvSigma[1][1],InvSigma[1][2],InvSigma[2][2]);
153 vtemp[2]=logit(pp->caseP.X,"log-likelihood");
154 mu[0]=pp->setP->pdTheta[1];
155 mu[1]=pp->setP->pdTheta[2];
156 mu[2]=pp->setP->pdTheta[0];
157 }
158 t[ii]=dMVN(vtemp,mu,InvSigma,dim,0)*pfact;
159 //t[ii]=dMVN3(vtemp,mu,(double*)(&(InvSigma[0][0])),dim,0)*pfact;
160 }
161 else if (suff!=SS_Test) Rprintf("Error Suff= %d",suff);
162 }
163 }
164 Free(W1);Free(W1p);Free(W2);Free(W2p);Free(mu);Free(vtemp);
165 FreeMatrix(Sigma,dim); FreeMatrix(InvSigma,dim);
166 }
167
168
169 /**
170 * Returns the log likelihood of a particular case (i.e, record, datapoint)
171 */
getLogLikelihood(Param * param)172 double getLogLikelihood(Param* param) {
173 if (param->caseP.dataType==DPT_General && !(param->caseP.Y>=.990 || param->caseP.Y<=.010)) {
174 //non-survey data: do integration to find likelihood
175 param->caseP.suff=SS_Loglik;
176 return log(paramIntegration(&SuffExp,(void*)param));
177
178
179 } else if (param->caseP.dataType==DPT_Homog_X1 || param->caseP.dataType==DPT_Homog_X0) {
180 //Homogenenous data: just do normal likelihood on one dimension
181 double lik,sigma2,val,mu;
182 val = (param->caseP.dataType==DPT_Homog_X1) ? param->caseP.Wstar[0] : param->caseP.Wstar[1];
183 if (!param->setP->ncar) {
184 mu = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[0] : param->setP->pdTheta[1];
185 sigma2 = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[2] : param->setP->pdTheta[3];
186 } else {
187 mu = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[1] : param->setP->pdTheta[2];
188 sigma2 = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[4] : param->setP->pdTheta[5];
189 }
190 lik=(1/(sqrt(2*M_PI*sigma2)))*exp(-(.5/sigma2)*(val - mu)*(val - mu));
191 //return log(lik);
192 return 0; //fix later
193
194 } else if (param->caseP.dataType==DPT_Survey || (param->caseP.Y>=.990 || param->caseP.Y<=.010)) {
195 //Survey data (or v tight bounds): multi-variate normal
196 int dim=param->setP->ncar ? 3 : 2;
197 double *mu=doubleArray(dim);
198 double *vtemp=doubleArray(dim);
199 double **InvSig=doubleMatrix(dim,dim);/* inverse covariance matrix*/
200 int i,j;
201 for(i=0;i<dim;i++) {
202 for(j=0;j<dim;j++) {
203 if (dim==3) {
204 InvSig[i][j]=param->setP->InvSigma3[i][j];
205 }
206 else {
207 InvSig[i][j]=param->setP->InvSigma[i][j];
208 }
209 }
210 }
211 double loglik;
212 vtemp[0] = param->caseP.Wstar[0];
213 vtemp[1] = param->caseP.Wstar[1];
214 mu[0]= param->caseP.mu[0];
215 mu[1]= param->caseP.mu[1];
216 if (param->setP->ncar) {
217 vtemp[2]=logit(param->caseP.X,"log-likelihood survey");
218 mu[0]=param->setP->pdTheta[1];
219 mu[1]=param->setP->pdTheta[2];
220 mu[2]=param->setP->pdTheta[0];
221 loglik=dMVN(vtemp,mu,InvSig,dim,1);
222 }
223 else {
224 loglik=dMVN(vtemp,mu,InvSig,dim,1);
225 }
226 Free(mu); Free(vtemp); FreeMatrix(InvSig,dim);
227 return loglik;
228 }
229 else { //Unknown type
230 Rprintf("Error; unkown type: %d\n",param->caseP.dataType);
231 return 0;
232 }
233 }
234
235 /**
236 **********
237 * Line integral helper function
238 **********
239 */
240
241 /**
242 * Returns W2star from W1star, given the following equalities
243 * Y=XW1 + (1-X)W2 and the Wi-star=logit(Wi)
244 * mutation: imposs is set to 1 if the equation cannot be satisfied
245 */
getW2starFromW1star(double X,double Y,double W1star,int * imposs)246 double getW2starFromW1star(double X, double Y, double W1star, int* imposs) {
247 double W1;
248 if (W1star>30) W1=1; //prevent overflow or underflow
249 else W1=1/(1+exp(-1*W1star));
250 double W2=Y/(1-X)-X*W1/(1-X);
251
252 if(W2>=1 || W2<=0) *imposs=1; //impossible pair of values
253 else W2=log(W2/(1-W2));
254 return W2;
255 }
256
257 /**
258 * Returns W1star from W2star, given the following equalities
259 * Y=XW1 + (1-X)W2 and the Wi-star=logit(Wi)
260 * mutation: imposs is set to 1 if the equation cannot be satisfied
261 */
getW1starFromW2star(double X,double Y,double W2star,int * imposs)262 double getW1starFromW2star(double X, double Y, double W2star, int* imposs) {
263 double W2;
264 if (W2star>30) W2=1; //prevent overflow or underflow
265 else W2=1/(1+exp(-1*W2star));
266 double W1=(Y-(1-X)*W2)/X;
267
268 if(W1>=1 || W1<=0) *imposs=1; //impossible pair of values
269 else W1=log(W1/(1-W1));
270 return W1;
271 }
272
273 /**
274 * Returns W1 from W2, X, and Y given
275 * Y=XW1 + (1-X)W2
276 */
getW1FromW2(double X,double Y,double W2)277 double getW1FromW2(double X, double Y, double W2) {
278 return (Y-(1-X)*W2)/X;
279 }
280
281
282 /**
283 * W1star(t)
284 * W1(t)=(W1_ub - W1_lb)*t + W1_lb
285 * mutates impossible to true if W1 is non-finite at t
286 */
getW1starFromT(double t,Param * param,int * imposs)287 double getW1starFromT(double t, Param* param, int* imposs) {
288 double W1=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0])*t + param->caseP.Wbounds[0][0];
289 if (W1==1 || W1==0) *imposs=1;
290 else W1=log(W1/(1-W1));
291 return W1;
292 }
293
294 /**
295 * W2star(t)
296 * W2(t)=(W2_lb - W2_ub)*t + W2_lb
297 */
getW2starFromT(double t,Param * param,int * imposs)298 double getW2starFromT(double t, Param* param, int* imposs) {
299 double W2=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1])*t + param->caseP.Wbounds[1][1];
300 if (W2==1 || W2==0) *imposs=1;
301 else W2=log(W2/(1-W2));
302 return W2;
303 }
304
305 /**
306 * W1star'(t)
307 * see paper for derivation: W1*(t) = (1/W1)*((w1_ub - w1_lb)/(1-W1)
308 */
getW1starPrimeFromT(double t,Param * param)309 double getW1starPrimeFromT(double t, Param* param) {
310 double m=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0]);
311 double W1=m*t + param->caseP.Wbounds[0][0];
312 W1=(1/W1)*(m/(1-W1));
313 return W1;
314 }
315
316 /**
317 * W2star'(t)
318 * see paper for derivation: W2*(t) = (1/W2)*((w2_lb - w2_ub)/(1-W2)
319 */
getW2starPrimeFromT(double t,Param * param)320 double getW2starPrimeFromT(double t, Param* param) {
321 double m=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1]);
322 double W2=m*t + param->caseP.Wbounds[1][1];
323 W2=(1/W2)*(m/(1-W2));
324 return W2;
325 }
326
327 /**
328 * parameterized line integration
329 * lower bound is t=0, upper bound is t=1
330 */
paramIntegration(integr_fn f,void * ex)331 double paramIntegration(integr_fn f, void *ex) {
332 double epsabs=pow(10,-11), epsrel=pow(10,-11);
333 double result=9999, anserr=9999;
334 int limit=100;
335 int last, neval, ier;
336 int lenw=5*limit;
337 int *iwork=(int *) Calloc(limit, int);
338 double *work=(double *)Calloc(lenw, double);
339 double lb=0.00001; double ub=.99999;
340 Rdqags(f, ex, &lb, &ub, &epsabs, &epsrel, &result,
341 &anserr, &neval, &ier, &limit, &lenw, &last, iwork, work);
342
343 Free(iwork);
344 Free(work);
345 if (ier==0) return result;
346 else {
347 Param* p = (Param*) ex;
348 Rprintf("Integration error %d: Sf %d X %5g Y %5g [%5g,%5g] -> %5g +- %5g\n",ier,p->caseP.suff,p->caseP.X,p->caseP.Y,p->caseP.Wbounds[0][0],p->caseP.Wbounds[0][1],result,anserr);
349 char ch;
350 scanf("Hit enter to continue %c", &ch );
351 return result;
352 }
353
354 }
355
356 /**
357 * integrate normalizing constant and set it in param
358 */
setNormConst(Param * param)359 void setNormConst(Param* param) {
360 param->caseP.normcT=paramIntegration(&NormConstT,(void*)param);
361 }
362
363
364 /**
365 * Set the bounds on W1 and W2 in their parameter
366 */
setBounds(Param * param)367 void setBounds(Param* param) {
368 double X,Y,w1_lb,w1_ub,w2_lb,w2_ub;
369 //int w1_inf,w2_inf;
370 double tol0=0.0001;
371 double tol1=0.9999;
372 X=param->caseP.X;
373 Y=param->caseP.Y;
374
375 //find bounds for W1
376 w1_ub=(Y-(1-X)*0)/X; //W2=0
377 if (w1_ub>tol1) w1_ub=1;
378 w1_lb=(Y-(1-X)*1)/X; //W2=1
379 if (w1_lb<tol0) w1_lb=0;
380
381 //find bounds for W2
382 w2_ub=Y/(1-X)-X*0/(1-X); //W1=0
383 if (w2_ub>tol1) w2_ub=1;
384 w2_lb=Y/(1-X)-X*1/(1-X); //W1=1
385 if (w2_lb<tol0) w2_lb=0;
386
387
388 /*
389 if (w1_lb==0 && w1_ub==1) w1_inf=2;
390 else if (w1_lb==0) w1_inf=-1;
391 else if (w1_ub==1) w1_inf=1;
392 else w1_inf=0;
393 //w1_lb=log(w1_lb/(1-w1_lb));
394 //w1_ub=log(w1_ub/(1-w1_ub));
395
396 if (w2_lb==0 && w2_ub==1) w2_inf=2;
397 else if (w2_lb==0) w2_inf=-1;
398 else if (w2_ub==1) w2_inf=1;
399 else w2_inf=0;
400 //w2_lb=log(w2_lb/(1-w2_lb));
401 //w2_ub=log(w2_ub/(1-w2_ub));
402 */
403 param->caseP.Wbounds[0][0]=w1_lb;
404 param->caseP.Wbounds[0][1]=w1_ub;
405 param->caseP.Wbounds[1][0]=w2_lb;
406 param->caseP.Wbounds[1][1]=w2_ub;
407 //param->W1_inf=w1_inf;
408 //param->W2_inf=w2_inf;
409
410 }
411