1 #include <stddef.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include <R.h>
5 #include <Rmath.h>
6 #include <R_ext/PrtUtil.h>
7 #include "vector.h"
8 #include "subroutines.h"
9 #include "rand.h"
10 #include "sample.h"
11 #include "bayes.h"
12 #include "macros.h"
13 #include "fintegrate.h"
14 
15 
16 void readData(Param* params, int n_dim, double* pdX, double* sur_W, double* x1_W1, double* x0_W2,
17                 int n_samp, int s_samp, int x1_samp, int x0_samp);
18 void ecoSEM(double* optTheta, double* pdTheta, Param* params, double Rmat_old[7][7], double Rmat[7][7]);
19 void ecoEStep(Param* params, double* suff);
20 void ecoMStep(double* Suff, double* pdTheta, Param* params);
21 void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params);
22 void ecoMStepCCAR(double* pdTheta, Param* params);
23 void MStepHypTest(Param* params, double* pdTheta);
24 void initTheta(double* pdTheta_in,Param* params, double* pdTheta);
25 void initNCAR(Param* params, double* pdTheta);
26 void setHistory(double* t_pdTheta, double loglik, int iter,setParam* setP,double history_full[][10]);
27 int closeEnough(double* pdTheta, double* pdTheta_old, int len, double maxerr);
28 int semDoneCheck(setParam* setP);
29 void gridEStep(Param* params, int n_samp, int s_samp, int x1_samp, int x0_samp, double* suff, int verbose, double minW1, double maxW1);
30 void transformTheta(double* pdTheta, double* t_pdTheta, int len, setParam* setP);
31 void untransformTheta(double* t_pdTheta,double* pdTheta, int len, setParam* setP);
32 void ncarFixedRhoTransform(double* pdTheta);
33 void ncarFixedRhoUnTransform(double* pdTheta);
34 void printColumnHeader(int main_loop, int iteration_max, setParam* setP, int finalTheta);
35 
36 /**
37  * Main function.
38  * Important mutations (i.e., outputs): pdTheta, Suff, DMmatrix, history
39  * See internal comments for details.
40  */
41 
cEMeco(double * pdX,double * pdTheta_in,int * pin_samp,int * iteration_max,double * convergence,int * survey,int * sur_samp,double * sur_W,int * x1,int * sampx1,double * x1_W1,int * x0,int * sampx0,double * x0_W2,double * minW1,double * maxW1,int * flag,int * verbosiosity,int * calcLoglik,int * hypTest_L,double * optTheta,double * pdTheta,double * Suff,double * inSample,double * DMmatrix,int * itersUsed,double * history)42 void cEMeco(
43 	    /*data input */
44 	    double *pdX,         /* data (X, Y) */
45 	    double *pdTheta_in,  /* Theta^ t
46 				    CAR: mu1, mu2, var1, var2, rho
47 				    NCAR: mu1, mu2, var1, var2, p13,p13,p12*/
48 	    int *pin_samp,       /* sample size */
49 
50 	    /* loop vairables */
51 	    int *iteration_max,          /* number of maximum iterations */
52 	    double *convergence,          /* abs value limit before stopping */
53 
54 	    /*incorporating survey data */
55 	    int *survey,         /*1 if survey data available(W_1, W_2)
56 				   0 not*/
57 	    int *sur_samp,       /*sample size of survey data*/
58 	    double *sur_W,       /*set of known W_1, W_2 */
59 
60 	    /*incorporating homeogenous areas */
61 	    int *x1,       /* 1 if X=1 type areas available W_1 known,
62 			      W_2 unknown */
63 	    int *sampx1,   /* number X=1 type areas */
64 	    double *x1_W1, /* values of W_1 for X1 type areas */
65 
66 	    int *x0,       /* 1 if X=0 type areas available W_2 known,
67 			      W_1 unknown */
68 	    int *sampx0,   /* number X=0 type areas */
69 	    double *x0_W2, /* values of W_2 for X0 type areas */
70 
71 	    /* bounds of W1 */
72 	    double *minW1, double *maxW1,
73 
74 	    /* options */
75 	    int *flag,    /*0th (rightmost) bit: 1 = NCAR, 0=normal; 1st bit: 1 = fixed rho, 0 = not fixed rho*/
76 	    int *verbosiosity,    /*How much to print out, 0=silent, 1=cycle, 2=data*/
77       int *calcLoglik,    /*How much to print out, 0=silent, 1=cycle, 2=data*/
78 	    int *hypTest_L,   /* number of hypothesis constraints */
79 	    double *optTheta,  /*optimal theta obtained from previous EM result; if set, then we're doing SEM*/
80 
81 	    /* storage */
82       //Theta under CAR: mu1,mu2,s1,s2,p12
83       //Theta under NCAR: mu_3, mu_1, mu_2, sig_3, sig_1, sig_2, r_13, r_23, r_12
84 	    double *pdTheta,  /*EM result for Theta^(t+1) */
85 	    double *Suff,      /*out put suffucient statistics (E(W_1i|Y_i),
86 				E(E_1i*W_1i|Y_i..) when  conveges */
87       double *inSample, /* In Sample info */
88       double *DMmatrix,  /* DM matrix for SEM*/
89       int *itersUsed, /* number of iterations used */
90       double *history /* history of param (transformed) as well as logliklihood*/
91 	    ){
92 
93   int n_samp  = *pin_samp;    /* sample size */
94   int s_samp  = *survey ? *sur_samp : 0;     /* sample size of survey data */
95   int x1_samp = *x1 ? *sampx1 : 0;       /* sample size for X=1 */
96   int x0_samp = *x0 ? *sampx0 : 0;       /* sample size for X=0 */
97   //int t_samp=n_samp+s_samp+x1_samp+x0_samp;  /* total sample size*/
98   int t_samp=n_samp+s_samp;  /* total sample size, ignoring homog data*/
99   int n_dim=2;        /* dimensions */
100 
101   setParam setP;
102   //set options
103   setP.ncar=bit(*flag,0);
104   setP.fixedRho=bit(*flag,1);
105   setP.sem=bit(*flag,2) & (optTheta[2]!=-1.1);
106   setP.ccar=0; setP.ccar_nvar=0;
107 
108   //hard-coded hypothesis test
109   //hypTest is the number of constraints.  hyptTest==0 when we're not checking a hypothesis
110   setP.hypTest=(*hypTest_L);
111   if (setP.hypTest>1) error("Unable to do hypothesis testing with more than one constraint");
112   if (setP.hypTest==1) {
113     setP.hypTestCoeff=doubleMatrix(setP.ncar ? 3 : 2,setP.hypTest);
114     setP.hypTestCoeff[0][0]=1; setP.hypTestCoeff[1][0]=-1;
115     if (setP.ncar) setP.hypTestCoeff[2][0]=0;
116     setP.hypTestResult=0;
117   }
118 
119   setP.verbose=*verbosiosity;
120   if (setP.verbose>=1) Rprintf("OPTIONS::  Ncar: %s; Fixed Rho: %s; SEM: %s\n",setP.ncar==1 ? "Yes" : "No",
121    setP.fixedRho==1 ? "Yes" : "No",setP.sem==1 ? "Second run" : (bit(*flag,2)==1 ? "First run" : "No"));
122   setP.calcLoglik=*calcLoglik;
123   setP.convergence=*convergence;
124   setP.t_samp=t_samp; setP.n_samp=n_samp; setP.s_samp=s_samp; setP.x1_samp=x1_samp; setP.x0_samp=x0_samp;
125   int param_len=setP.ccar ? setP.ccar_nvar : (setP.ncar ? 9 : 5);
126   setP.param_len=param_len;
127   setP.pdTheta=doubleArray(param_len);
128   setP.suffstat_len=(setP.ncar ? 9 : 5);
129   setP.SigmaK=doubleMatrix(param_len,param_len); //CCAR
130   setP.InvSigmaK=doubleMatrix(param_len,param_len); //CCAR
131 
132   /* model parameters */
133   //double **Sigma=doubleMatrix(n_dim,n_dim);/* inverse covariance matrix*/
134   //double **InvSigma=doubleMatrix(n_dim,n_dim);/* inverse covariance matrix*/
135 
136   double *pdTheta_old=doubleArray(param_len);
137   double *t_pdTheta=doubleArray(param_len); //transformed theta
138   double *t_pdTheta_old=doubleArray(param_len);
139   double Rmat_old[7][7];
140   double Rmat[7][7];
141   double history_full[*iteration_max+1][10];
142 
143   /* misc variables */
144   int i, j,main_loop, start;   /* used for various loops */
145 
146   /* get random seed */
147   GetRNGstate();
148 
149   //assign param
150   Param* params=(Param*) R_alloc(t_samp,sizeof(Param));
151 
152   for(i=0;i<t_samp;i++) params[i].setP=&setP;
153   readData(params, n_dim, pdX, sur_W, x1_W1, x0_W2, n_samp, s_samp, x1_samp, x0_samp);
154 
155 
156 
157   /***Begin main loop ***/
158   main_loop=1;start=1;
159   while (main_loop<=*iteration_max && (start==1 ||
160           (setP.sem==0 && !closeEnough(t_pdTheta,t_pdTheta_old,param_len,*convergence)) ||
161           (setP.sem==1 && !semDoneCheck((setParam*)&setP)))) {
162   //while (main_loop<=*iteration_max && (start==1 || !closeEnough(transformTheta(pdTheta),transformTheta(pdTheta_old),param_len,*convergence))) {
163 
164     setP.iter=main_loop;
165     if (start) {
166       initTheta(pdTheta_in,params,pdTheta);
167       transformTheta(pdTheta,t_pdTheta,param_len, &setP);
168       setHistory(t_pdTheta,0,0,(setParam*)&setP,history_full);
169       if (!setP.ncar) {
170         for(i=0;i<t_samp;i++) {
171           params[i].caseP.mu[0] = pdTheta[0];
172           params[i].caseP.mu[1] = pdTheta[1];
173         }
174         setP.Sigma[0][0] = pdTheta[2];
175         setP.Sigma[1][1] = pdTheta[3];
176         setP.Sigma[0][1] = pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3]);
177         setP.Sigma[1][0] = setP.Sigma[0][1];
178         dinv2D((double*)&setP.Sigma[0][0], 2, (double*)&setP.InvSigma[0][0], "Start of main loop");
179       }
180       else {
181         if (setP.fixedRho) ncarFixedRhoTransform(pdTheta);
182         initNCAR(params,pdTheta);
183         if (setP.fixedRho) ncarFixedRhoUnTransform(pdTheta);
184       }
185       start=0;
186     }
187     for(i=0;i<param_len;i++) setP.pdTheta[i]=pdTheta[i];
188 
189     if (setP.verbose>=1) {
190       if ((main_loop - 1) % 15 == 0) printColumnHeader(main_loop,*iteration_max,&setP,0);
191 
192       Rprintf("cycle %d/%d:",main_loop,*iteration_max);
193       for(i=0;i<param_len;i++)
194         if (setP.varParam[i]) {
195           if (pdTheta[i]>=0) Rprintf("% 5.3f",pdTheta[i]);
196           else Rprintf(" % 5.2f",pdTheta[i]);
197         }
198       if (setP.calcLoglik==1 && main_loop>2)
199         Rprintf(" Prev LL: %5.2f",Suff[setP.suffstat_len]);
200       Rprintf("\n");
201     }
202     //keep the old theta around for comaprison
203     for(i=0;i<param_len;i++) pdTheta_old[i]=pdTheta[i];
204     transformTheta(pdTheta_old,t_pdTheta_old,param_len,&setP);
205 
206 
207     ecoEStep(params, Suff);
208     if (!setP.ncar)
209       ecoMStep(Suff,pdTheta,params);
210     else
211       ecoMStepNCAR(Suff,pdTheta,params);
212     transformTheta(pdTheta,t_pdTheta,param_len,&setP);
213     //char ch;
214     //scanf(" %c", &ch );
215 
216     //if we're in the second run through of SEM
217     if (setP.sem==1) {
218       ecoSEM(optTheta, pdTheta, params, Rmat_old, Rmat);
219     }
220     else {
221       setHistory(t_pdTheta,(main_loop<=1) ? 0 : Suff[setP.suffstat_len],main_loop,(setParam*)&setP,history_full);
222     }
223 
224 
225     if (setP.verbose>=2) {
226       Rprintf("theta and suff\n");
227       if (param_len>5) {
228         Rprintf("%10g%10g%10g%10g%10g%10g%10g%10g%10g\n",pdTheta[0],pdTheta[1],pdTheta[2],pdTheta[3],pdTheta[4],pdTheta[5],pdTheta[6],pdTheta[7],pdTheta[8]);
229       }
230       else {
231         Rprintf("%10g%10g%10g%10g%10g (%10g)\n",pdTheta[0],pdTheta[1],pdTheta[2],pdTheta[3],pdTheta[4],pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3]));
232       }
233       Rprintf("%10g%10g%10g%10g%10g\n",Suff[0],Suff[1],Suff[2],Suff[3],Suff[4]);
234       Rprintf("Sig: %10g%10g%10g\n",setP.Sigma[0][0],setP.Sigma[1][1],setP.Sigma[0][1]);
235       if (setP.ncar) Rprintf("Sig3: %10g%10g%10g%10g\n",setP.Sigma3[0][0],setP.Sigma3[1][1],setP.Sigma3[2][2]);
236       //char x;
237       //R_ReadConsole("hit enter\n",(char*)&x,4,0);
238     }
239     main_loop++;
240     R_FlushConsole();
241     R_CheckUserInterrupt();
242   }
243 
244   /***End main loop ***/
245   //finish up: record results and loglik
246   Param* param;
247   Suff[setP.suffstat_len]=0.0;
248   for(i=0;i<param_len;i++) setP.pdTheta[i]=pdTheta[i];
249   for(i=0;i<t_samp;i++) {
250      param=&(params[i]);
251     if(i<n_samp) {
252      for(j=0;j<2;j++) inSample[i*2+j]=param->caseP.W[j];
253       //setBounds(param);
254       //setNormConst(param);
255     }
256     Suff[setP.suffstat_len]+=getLogLikelihood(param);
257   }
258 
259   if (setP.verbose>=1) {
260     printColumnHeader(main_loop,*iteration_max,&setP,1);
261     Rprintf("Final Theta:");
262       for(i=0;i<param_len;i++) {
263         if (pdTheta[i]>=0) Rprintf("% 5.3f",pdTheta[i]);
264         else Rprintf(" % 5.2f",pdTheta[i]);
265       }
266       if (setP.calcLoglik==1 && main_loop>2) {
267         Rprintf(" Final LL: %5.2f",Suff[setP.suffstat_len]);
268         history_full[main_loop-1][param_len]=Suff[setP.suffstat_len];
269       }
270       Rprintf("\n");
271     }
272 
273   //set the DM matrix (only matters for SEM)
274   if (setP.sem==1) {
275     int DMlen=0;
276     for(i=0; i<param_len;i++)
277       if(setP.varParam[i]) DMlen++;
278     for(i=0;i<DMlen;i++)
279       for(j=0;j<DMlen;j++)
280         DMmatrix[i*DMlen+j]=Rmat[i][j];
281   }
282 
283   *itersUsed=main_loop;
284   for(i=0;i<(*itersUsed);i++) {
285     for(j=0;j<(param_len+1);j++)
286       history[i*(param_len+1)+j]=history_full[i][j];
287   }
288 
289 
290   /* write out the random seed */
291   PutRNGstate();
292 
293   /* Freeing the memory */
294   Free(pdTheta_old);
295   //FreeMatrix(Rmat_old,5);
296   //FreeMatrix(Rmat,5);
297   }
298 
299 /**
300  * initializes Theta, varParam, and semDone
301  * input: pdTheta_in,params
302  * mutates: params.setP, pdTheta
303  * CAR theta: mu_1, mu_2, sig_1, sig_2, rho_12
304  * NCAR theta: mu_3, mu_1, mu_2, sig_3, sig_1, sig_2, r_13, r_23, r_12
305  */
initTheta(double * pdTheta_in,Param * params,double * pdTheta)306 void initTheta(double* pdTheta_in,Param* params, double* pdTheta) {
307   setParam* setP=params[0].setP;
308   int param_len=setP->param_len;
309   int i;
310   if (!setP->ncar) {
311     for(i=0;i<param_len;i++) {
312       pdTheta[i]=pdTheta_in[i];
313       setP->varParam[i]=1;
314     }
315     if (setP->fixedRho) setP->varParam[4]=0;
316   }
317   else {
318     //constants
319     double lx,mu3sq;
320     pdTheta[0]=0; mu3sq=0;
321     for(i=0;i<setP->t_samp;i++) {
322       lx=logit(params[i].caseP.X,"initpdTheta0");
323       pdTheta[0] += lx;
324       mu3sq += lx*lx;
325     }
326     pdTheta[0] = pdTheta[0]/setP->t_samp;
327     mu3sq = mu3sq/setP->t_samp;
328     pdTheta[3] = mu3sq-pdTheta[0]*pdTheta[0]; //variance
329     //fill from pdTheta_in
330     pdTheta[1]=pdTheta_in[0];
331     pdTheta[2]=pdTheta_in[1];
332     pdTheta[4]=pdTheta_in[2];
333     pdTheta[5]=pdTheta_in[3];
334     pdTheta[6]=pdTheta_in[4];
335     pdTheta[7]=pdTheta_in[5];
336     pdTheta[8]=pdTheta_in[6];
337     for(i=0;i<param_len;i++) setP->varParam[i]=1;
338     setP->varParam[0]=0;setP->varParam[3]=0;
339     //if (setP->fixedRho) setP->varParam[8]=0;
340   }
341   int varlen=0;
342   for(i=0; i<param_len;i++)
343     if(setP->varParam[i]) varlen++;
344   for(i=0; i<varlen;i++)
345       setP->semDone[i]=0;
346 }
347 
348 /**
349   * The E-step for parametric ecological inference
350   * Takes in a Param array of length n_samp + t_samp + x0_samp + x1_samp
351   * Suff should be an array with the same length as the number of params (+1)
352   * On exit: suff holds the sufficient statistics and loglik as follows
353   * CAR: (0) E[W1*] (1) E[W2*] (2) E[W1*^2] (3) E[W2*^2] (4) E[W1*W2*] (5) loglik
354   * NCAR: (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik
355  **/
356 
ecoEStep(Param * params,double * suff)357 void ecoEStep(Param* params, double* suff) {
358 
359   int t_samp,n_samp,s_samp,x1_samp,x0_samp,i,j,temp0,temp1, verbose;
360   double loglik,testdens;
361   Param* param; setParam* setP; caseParam* caseP;
362   setP=params[0].setP;
363   verbose=setP->verbose;
364 
365   t_samp=setP->t_samp;
366   n_samp=setP->n_samp;
367   x1_samp=setP->x1_samp;
368   x0_samp=setP->x0_samp;
369   s_samp=setP->s_samp;
370 
371   double **Wstar=doubleMatrix(t_samp,5);     /* pseudo data(transformed)*/
372   loglik=0;
373   if (verbose>=3 && !setP->sem) Rprintf("E-step start\n");
374   for (i = 0; i<n_samp; i++) {
375     param = &(params[i]);
376     caseP=&(param->caseP);
377     if (caseP->Y>=.990 || caseP->Y<=.010) { //if Y is near the edge, then W1 and W2 are very constrained
378       Wstar[i][0]=logit(caseP->Y,"Y maxmin W1");
379       Wstar[i][1]=logit(caseP->Y,"Y maxmin W2");
380       Wstar[i][2]=Wstar[i][0]*Wstar[i][0];
381       Wstar[i][3]=Wstar[i][0]*Wstar[i][1];
382       Wstar[i][4]=Wstar[i][1]*Wstar[i][1];
383       caseP->Wstar[0]=Wstar[i][0];
384       caseP->Wstar[1]=Wstar[i][1];
385       caseP->W[0]=caseP->Y;
386       caseP->W[1]=caseP->Y;
387       if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param);
388       //Rprintf("Skipping %d, Y=%5g",i,caseP->Y);
389     }
390     else {
391       setBounds(param); //I think you only have to do this once...check later
392       /*if (verbose>=2 && setP->iter==12 && i==422) {
393         Rprintf("Bounds: %5g %5g %5g %5g\n",caseP->Wbounds[0][0],caseP->Wbounds[0][1],caseP->Wbounds[1][0],caseP->Wbounds[1][1]);
394         setP->weirdness=1;
395       }
396       else setP->weirdness=0;*/
397 
398       setNormConst(param);
399       for (j=0;j<5;j++) {
400         caseP->suff=j;
401         Wstar[i][j]=paramIntegration(&SuffExp,param);
402         if (j<2)
403           caseP->Wstar[j]=Wstar[i][j];
404       }
405       caseP->suff=SS_W1;
406       caseP->W[0]=paramIntegration(&SuffExp,param);
407       caseP->suff=SS_W2;
408       caseP->W[1]=paramIntegration(&SuffExp,param);
409       caseP->suff=SS_Test;
410       testdens=paramIntegration(&SuffExp,param);
411       if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param);
412 
413       //report error E1 if E[W1],E[W2] is not on the tomography line
414       if (fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1]))>0.011) {
415         Rprintf("E1 %d %5g %5g %5g %5g %5g %5g %5g %5g err:%5g\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], caseP->normcT,Wstar[i][0],Wstar[i][1],Wstar[i][2],fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1])));
416         char ch;
417         scanf("Hit enter to continue %c\n", &ch );
418       }
419       //report error E2 if Jensen's inequality doesn't hold
420       if (Wstar[i][4]<pow(Wstar[i][1],2) || Wstar[i][2]<pow(Wstar[i][0],2))
421         Rprintf("E2 %d %5g %5g %5g %5g %5g %5g %5g %5g\n", i, caseP->X, caseP->Y, caseP->normcT, caseP->mu[1],Wstar[i][0],Wstar[i][1],Wstar[i][2],Wstar[i][4]);
422       //used for debugging if necessary
423       if (verbose>=2 && !setP->sem && ((i<10 && verbose>=3) || (caseP->mu[1] < -1.7 && caseP->mu[0] > 1.4)))
424         Rprintf("%d %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], param->setP->Sigma[0][1], caseP->normcT, caseP->W[0],caseP->W[1],Wstar[i][2]);
425     }
426   }
427 
428 
429   /* Use the values given by the survey data */
430   //Calculate loglik also
431   for (i=n_samp; i<n_samp+s_samp; i++) {
432     param = &(params[i]);
433     caseP=&(param->caseP);
434     Wstar[i][0]=caseP->Wstar[0];
435     Wstar[i][1]=caseP->Wstar[1];
436     Wstar[i][2]=Wstar[i][0]*Wstar[i][0];
437     Wstar[i][3]=Wstar[i][0]*Wstar[i][1];
438     Wstar[i][4]=Wstar[i][1]*Wstar[i][1];
439     if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param);
440   }
441 
442   /* analytically compute E{W2_i|Y_i} given W1_i, mu and Sigma in x1 homeogeneous areas */
443   for (i=n_samp+s_samp; i<n_samp+s_samp+x1_samp; i++) {
444     /*temp0=params[i].caseP.Wstar[0];
445     temp1=params[i].caseP.mu[1]+setP->Sigma[0][1]/setP->Sigma[0][0]*(temp0-params[i].caseP.mu[0]);
446     Wstar[i][0]=temp0;
447     Wstar[i][1]=temp1;
448     Wstar[i][2]=temp0*temp0;
449     Wstar[i][3]=temp0*temp1;
450     Wstar[i][4]=temp1*temp1;*/
451   }
452 
453   /*analytically compute E{W1_i|Y_i} given W2_i, mu and Sigma in x0 homeogeneous areas */
454   for (i=n_samp+s_samp+x1_samp; i<n_samp+s_samp+x1_samp+x0_samp; i++) {
455     /*temp1=params[i].caseP.Wstar[1];
456     temp0=params[i].caseP.mu[0]+setP->Sigma[0][1]/setP->Sigma[1][1]*(temp1-params[i].caseP.mu[1]);
457     Wstar[i][0]=temp0;
458     Wstar[i][1]=temp1;
459     Wstar[i][2]=temp0*temp0;
460     Wstar[i][3]=temp0*temp1;
461     Wstar[i][4]=temp1*temp1;*/
462   }
463 
464 
465   /*Calculate sufficient statistics */
466   for (j=0; j<setP->suffstat_len; j++)
467     suff[j]=0;
468 
469 
470   //CAR: (0) E[W1*] (1) E[W2*] (2) E[W1*^2] (3) E[W2*^2] (4) E[W1*W2*] (5) loglik
471   //NCAR: (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik
472   /* compute sufficient statistics */
473   for (i=0; i<t_samp; i++) {
474     if (!setP->ncar) {
475       suff[0] += Wstar[i][0];  /* sumE(W_i1|Y_i) */
476       suff[1] += Wstar[i][1];  /* sumE(W_i2|Y_i) */
477       suff[2] += Wstar[i][2];  /* sumE(W_i1^2|Y_i) */
478       suff[3] += Wstar[i][4];  /* sumE(W_i2^2|Y_i) */
479       suff[4] += Wstar[i][3];  /* sumE(W_i1*W_i2|Y_i) */
480     }
481     else if (setP->ncar) {
482       double lx= logit(params[i].caseP.X,"mstep X");
483       suff[0] += lx;
484       suff[1] += Wstar[i][0];
485       suff[2] += Wstar[i][1];
486       suff[3] += lx*lx;
487       suff[4] += Wstar[i][2];
488       suff[5] += Wstar[i][4];
489       suff[6] += params[i].caseP.Wstar[0]*lx;
490       suff[7] += params[i].caseP.Wstar[1]*lx;
491       suff[8] += Wstar[i][3];
492     }
493   }
494 
495   for(j=0; j<setP->suffstat_len; j++)
496     suff[j]=suff[j]/t_samp;
497   //Rprintf("%5g suff0,2,4 %5g %5g %5g\n",setP->pdTheta[6],suff[0],suff[2],suff[4]);
498   //if(verbose>=1) Rprintf("Log liklihood %15g\n",loglik);
499   suff[setP->suffstat_len]=loglik;
500 
501   FreeMatrix(Wstar,t_samp);
502 }
503 
504 /**
505  * CAR M-Step
506  * inputs: Suff (sufficient statistics)
507  *    CAR Suff: E[W1], E[W2], E[W1^2], E[W2^2], E[W1W2]
508  * mutated (i.e., output): pdTheta, params
509  */
ecoMStep(double * Suff,double * pdTheta,Param * params)510 void ecoMStep(double* Suff, double* pdTheta, Param* params) {
511 
512   int i;
513   setParam* setP=params[0].setP;
514 
515   pdTheta[0]=Suff[0];  /*mu1*/
516   pdTheta[1]=Suff[1];  /*mu2*/
517 
518   if (setP->hypTest>0) {
519     MStepHypTest(params,pdTheta);
520   }
521 
522   if (!setP->fixedRho) { //standard
523     pdTheta[2]=Suff[2]-2*Suff[0]*pdTheta[0]+pdTheta[0]*pdTheta[0];  //sigma11
524     pdTheta[3]=Suff[3]-2*Suff[1]*pdTheta[1]+pdTheta[1]*pdTheta[1];  //sigma22
525     pdTheta[4]=Suff[4]-Suff[0]*pdTheta[1]-Suff[1]*pdTheta[0]+pdTheta[0]*pdTheta[1]; //sigma12
526     pdTheta[4]=pdTheta[4]/sqrt(pdTheta[2]*pdTheta[3]); /*rho*/
527   }
528   else { //fixed rho
529 
530     double Imat[2][2];
531     Imat[0][0]=Suff[2]-2*pdTheta[0]*Suff[0]+pdTheta[0]*pdTheta[0];  //I_11
532     Imat[1][1]=Suff[3]-2*Suff[1]*pdTheta[1]+pdTheta[1]*pdTheta[1];  //I_22
533     Imat[0][1]=Suff[4]-Suff[0]*pdTheta[1]-Suff[1]*pdTheta[0]+pdTheta[0]*pdTheta[1];  //I_12
534 
535     pdTheta[2]=(Imat[0][0]-pdTheta[4]*Imat[0][1]*pow(Imat[0][0]/Imat[1][1],0.5))/(1-pdTheta[4]*pdTheta[4]); //sigma11
536     pdTheta[3]=(Imat[1][1]-pdTheta[4]*Imat[0][1]*pow(Imat[1][1]/Imat[0][0],0.5))/(1-pdTheta[4]*pdTheta[4]); //sigma22
537     //sigma12 will be determined below by rho
538   }
539 
540     //set Sigma
541   setP->Sigma[0][0] = pdTheta[2];
542   setP->Sigma[1][1] = pdTheta[3];
543   setP->Sigma[0][1] = pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3]);
544   setP->Sigma[1][0] = setP->Sigma[0][1];
545 
546   //if(setP->verbose>=3) Rprintf("Sigma mstep: %5g %5g %5g %5g\n",setP->Sigma[0][0],setP->Sigma[0][1],setP->Sigma[1][0],setP->Sigma[1][1]);
547   dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"regular M-step");
548 
549   /* assign each data point the new mu (same for all points) */
550   for(i=0;i<setP->t_samp;i++) {
551     params[i].caseP.mu[0]=pdTheta[0];
552     params[i].caseP.mu[1]=pdTheta[1];
553   }
554 }
555 
556 
557 /**
558  * M-Step under NCAR
559  * Input: Suff (sufficient statistics)
560  *    (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik
561  * mutated (i.e., output): pdTheta, params
562  */
ecoMStepNCAR(double * Suff,double * pdTheta,Param * params)563 void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params) {
564 
565   setParam* setP=params[0].setP;
566   //double Sigma[2][2]=setP->Sigma;
567   //double[2][2] InvSigma=setP->InvSigma;
568   //double[3][3] Sigma3=setP->Sigma3;   /* covariance matrix*/
569   //double[3][3] InvSigma3=setP->Sigma3;   /* inverse covariance matrix*/
570   int ii,i,j,verbose,t_samp;
571   verbose=setP->verbose;
572   t_samp=setP->t_samp;
573 
574 
575   //set E[XW*]
576   double XW1=Suff[6];
577   double XW2=Suff[7];
578 
579 
580 
581   //for(i = 0;i<9; i++) Rprintf("%f5.2\n",pdTheta[i]);
582   if (!setP->fixedRho) { //variable rho
583 
584 
585     //pdTheta[0] is const
586     pdTheta[1]=Suff[1];  /*mu1*/
587     pdTheta[2]=Suff[2];  /*mu2*/
588 
589     //set variances and correlations
590     //pdTheta[3] is const
591     pdTheta[4]=Suff[4]-2*Suff[1]*pdTheta[1]+pdTheta[1]*pdTheta[1]; //s11
592     pdTheta[5]=Suff[5]-2*Suff[2]*pdTheta[2]+pdTheta[2]*pdTheta[2]; //s22
593     pdTheta[6]=(XW1 - pdTheta[0]*Suff[1])/sqrt((Suff[4] - Suff[1]*Suff[1])*pdTheta[3]); //rho_13
594     pdTheta[7]=(XW2 - pdTheta[0]*Suff[2])/sqrt((Suff[5] - Suff[2]*Suff[2])*pdTheta[3]); //rho_23
595     pdTheta[8]=Suff[8]-Suff[1]*pdTheta[2]-Suff[2]*pdTheta[1]+pdTheta[1]*pdTheta[2]; //sigma12
596     pdTheta[8]=pdTheta[8]/sqrt(pdTheta[4]*pdTheta[5]); //rho_12
597 
598 
599     //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
600     //variances
601     setP->Sigma3[0][0] = pdTheta[4];
602     setP->Sigma3[1][1] = pdTheta[5];
603     setP->Sigma3[2][2] = pdTheta[3];
604 
605     //covariances
606     setP->Sigma3[0][1] = pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]);
607     setP->Sigma3[0][2] = pdTheta[6]*sqrt(pdTheta[4]*pdTheta[3]);
608     setP->Sigma3[1][2] = pdTheta[7]*sqrt(pdTheta[5]*pdTheta[3]);
609 
610     //symmetry
611     setP->Sigma3[1][0] = setP->Sigma3[0][1];
612     setP->Sigma3[2][0] = setP->Sigma3[0][2];
613     setP->Sigma3[2][1] = setP->Sigma3[1][2];
614               //if (verbose>=2) {
615             //Rprintf("Sigma3: %5g %5g %5g %5g %5g\n",setP->Sigma3[0][0],setP->Sigma3[0][1],setP->Sigma3[1][1],setP->Sigma3[1][2],setP->Sigma3[2][2]);
616           //}
617 
618   }
619   else { //fixed rho
620     //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3
621 
622     ncarFixedRhoTransform(pdTheta); //need the fixed param (pdTheta[8]) to be the conditional correlation
623 
624     //CODE BLOCK D
625     //compute beta based on previous sigma
626     //beta is mu1,beta1,mu2,beta, which are pdTheta 1,2,6,7
627     double **InvSigma=doubleMatrix(2,2);
628     double **Zmat=doubleMatrix(4,2);
629     double **Zmat_t=doubleMatrix(2,4);
630     double **tmp41=doubleMatrix(4,1);
631     double **tmp42=doubleMatrix(4,2);
632     double **tmp44=doubleMatrix(4,4);
633     double **tmp21=doubleMatrix(2,1);
634     double **denom=doubleMatrix(4,4);
635     double **numer=doubleMatrix(4,1);
636     for (i=0;i<4;i++) {
637       for(j=0;j<4;j++) {
638         if (j<2) {
639           if (i<2) InvSigma[i][j]=setP->InvSigma[i][j];
640           Zmat[i][j]=0; Zmat_t[j][i]=0;
641         }
642         denom[i][j]=0;
643       }
644       numer[i][0]=0;
645     }
646     //Rprintf("InvSigma %5g %5g %5g\n",InvSigma[0][0],InvSigma[1][1],InvSigma[0][1]);
647     for(ii=0;ii<setP->t_samp;ii++) {
648         double lx=logit(params[ii].caseP.X,"NCAR beta");
649         for(j=0;j<2;j++) {
650           Zmat_t[j][j*2+1]=lx - pdTheta[0];
651           Zmat_t[j][j*2]=1;
652           Zmat[j*2+1][j]=lx - pdTheta[0];
653           Zmat[j*2][j]=1;
654         }
655         matrixMul(Zmat,InvSigma,4,2,2,2,tmp42);
656         matrixMul(tmp42,Zmat_t,4,2,2,4,tmp44);
657         for (i=0;i<4;i++)
658           for(j=0;j<4;j++)
659             denom[i][j]+=tmp44[i][j];
660         //for (i=0;i<2;i++) tmp21[i][0]=(params[ii].caseP.Wstar[i] - pdTheta[i+1]); //Wtilde ??
661         for (i=0;i<2;i++) tmp21[i][0]=params[ii].caseP.Wstar[i]; //Wstar
662         //matrixMul(Zmat,InvSigma,4,2,2,2,tmp42);  //no need to repeat calculation
663         matrixMul(tmp42,tmp21,4,2,2,1,tmp41);
664         for (i=0;i<4;i++) numer[i][0]+=tmp41[i][0];
665     }
666     dinv(denom,4,denom);
667     matrixMul(denom,numer,4,4,4,1,numer);
668 
669     pdTheta[1]=numer[0][0]; //mu1
670     pdTheta[6]=numer[1][0]; //beta1
671     pdTheta[2]=numer[2][0]; //mu2
672     pdTheta[7]=numer[3][0]; //beta2
673     //pdTheta[8] is constant
674     //Rprintf("Compare Suff1 %5g to pdT1 %5g \n",Suff[1],pdTheta[1]);
675     //Rprintf("Compare Suff2 %5g to pdT2 %5g \n",Suff[2],pdTheta[2]);
676 
677     if (setP->hypTest>0) {
678       MStepHypTest(params,pdTheta);
679     }
680 
681     //CAR: (0) E[W1*] (1) E[W2*] (2) E[W1*^2] (3) E[W2*^2] (4) E[W1*W2*] (5) loglik
682     //NCAR: (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik
683     //0->1, 1->2, 2->4, 3->5, 4->8
684 
685 
686     //CODE BLOCK C
687     //Compute sigma conditional on beta
688     //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3
689     double Smat[2][2]; //the S matrix (divided by n) in the paper
690     double Tmat[2][2]; //the T matrix (divided by n) in the paper
691     double S1=Suff[1]; //S_1 = Sufficient stat of W1* - beta1 * (sum of [(X_i - \mu3)]) ; second term goes to zero
692     double S2=Suff[2]; //S_2 =  Sufficient stat of W2*
693 
694     Smat[0][0]=Suff[4] - 2*pdTheta[6]*(XW1 - pdTheta[0]*Suff[1]) + pdTheta[6]*pdTheta[6]*pdTheta[3];  //S_11
695     Smat[1][1]=Suff[5] - 2*pdTheta[7]*(XW2 - pdTheta[0]*Suff[2]) + pdTheta[7]*pdTheta[7]*pdTheta[3];  //S_22
696     Smat[0][1]=Suff[8] - pdTheta[6]*(XW2 - pdTheta[0]*Suff[2]) - pdTheta[7]*(XW1 - pdTheta[0]*Suff[1]) + pdTheta[6]*pdTheta[7]*pdTheta[3] ;  //S_12
697 
698     Tmat[0][0]=Smat[0][0] - S1*S1;
699     Tmat[1][1]=Smat[1][1] - S2*S2;
700     Tmat[0][1]=Smat[0][1] - S1*S2;
701 
702     pdTheta[4]=(Tmat[0][0]-pdTheta[8]*Tmat[0][1]*pow(Tmat[0][0]/Tmat[1][1],0.5))/(1-pdTheta[8]*pdTheta[8]); //sigma11 | 3
703     pdTheta[5]=(Tmat[1][1]-pdTheta[8]*Tmat[0][1]*pow(Tmat[1][1]/Tmat[0][0],0.5))/(1-pdTheta[8]*pdTheta[8]); //sigma22 | 3
704 
705     //variances
706     //CODE BLOCK B
707     setP->Sigma3[0][0] = pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3];
708     setP->Sigma3[1][1] = pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3];
709     setP->Sigma3[2][2] = pdTheta[3];
710 
711     //covariances
712     setP->Sigma3[0][1] = (pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]) + pdTheta[6]*pdTheta[7]*pdTheta[3])/
713                           (sqrt((pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3])*(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3])));//rho_12 unconditional
714     setP->Sigma3[0][1] = setP->Sigma3[0][1]*sqrt(setP->Sigma3[0][0]*setP->Sigma3[1][1]); //sig_12
715     setP->Sigma3[0][2] = pdTheta[6]*sqrt((pdTheta[3])/(pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3]))*sqrt(setP->Sigma3[0][0]*setP->Sigma3[2][2]);
716     setP->Sigma3[1][2] = pdTheta[7]*sqrt((pdTheta[3])/(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3]))*sqrt(setP->Sigma3[1][1]*setP->Sigma3[2][2]);
717 
718     //symmetry
719     setP->Sigma3[1][0] = setP->Sigma3[0][1];
720     setP->Sigma3[2][0] = setP->Sigma3[0][2];
721     setP->Sigma3[2][1] = setP->Sigma3[1][2];
722   }
723   dinv2D((double*)(&(setP->Sigma3[0][0])), 3, (double*)(&(setP->InvSigma3[0][0])),"NCAR M-step S3");
724   initNCAR(params,pdTheta);
725   if (setP->fixedRho) ncarFixedRhoUnTransform(pdTheta);
726 }
727 
728 /**
729  * M-Step under CCAR
730  * Input: params
731  * mutated (i.e., output): pdTheta, params
732  */
ecoMStepCCAR(double * pdTheta,Param * params)733 void ecoMStepCCAR(double* pdTheta, Param* params) {
734   setParam* setP=params[0].setP;
735   int k=setP->ccar_nvar;
736   int ii,i,j,verbose,t_samp;
737   verbose=setP->verbose;
738   t_samp=setP->t_samp;
739   double **InvSigma=doubleMatrix(2,2);
740   double **Z_i=doubleMatrix(k,2);
741   double **Z_i_t=doubleMatrix(2,k);
742   double **tmpk1=doubleMatrix(k,1);
743   double **tmpk2=doubleMatrix(k,2);
744   double **tmpkk=doubleMatrix(k,k);
745   double **tmp21=doubleMatrix(2,1);
746   double **tmp21_b=doubleMatrix(2,1);
747   double **tmp12=doubleMatrix(1,2);
748   double **tmp22=doubleMatrix(2,2);
749   double **denom=doubleMatrix(k,k);
750   double **numer=doubleMatrix(k,1);
751   //betas
752   for (i=0;i<k;i++) {
753     for(j=0;j<k;j++) {
754       if (j<2) {
755         if (i<2) InvSigma[i][j]=setP->InvSigma[i][j];
756       }
757       denom[i][j]=0;
758     }
759     numer[i][0]=0;
760   }
761   //Rprintf("InvSigma %5g %5g %5g\n",InvSigma[0][0],InvSigma[1][1],InvSigma[0][1]);
762   for(ii=0;ii<setP->t_samp;ii++) {
763     for (i=0;i<k;i++) {
764       for(j=0;j<k;j++) {
765         Z_i[i][j]=params[ii].caseP.Z_i[i][j];
766         Z_i_t[i][j]=params[ii].caseP.Z_i[j][i];
767       }
768     }
769       matrixMul(Z_i,InvSigma,k,2,2,2,tmpk2);
770       matrixMul(tmpk2,Z_i_t,k,2,2,k,tmpkk);
771       for (i=0;i<k;i++)
772         for(j=0;j<k;j++)
773           denom[i][j]+=tmpkk[i][j];
774       for (i=0;i<2;i++) tmp21[i][0]=params[ii].caseP.Wstar[i]; //Wstar
775       matrixMul(tmpk2,tmp21,k,2,2,1,tmpk1);
776       for (i=0;i<k;i++) numer[i][0]+=tmpk1[i][0];
777   }
778   dinv(denom,k,denom);
779   matrixMul(denom,numer,k,k,k,1,numer);
780   for(i=0; i<k;i++) pdTheta[i]=numer[i][0]; //betas
781 
782 
783   if (setP->hypTest>0) {
784     MStepHypTest(params,pdTheta);
785   }
786 
787   //conditional Sigma
788   //start at 0
789   for(i=0; i<2;i++)
790     for(j=0; j<2;j++)
791       setP->Sigma[i][j] = 0;
792 
793 
794   for(ii=0;ii<setP->t_samp;ii++) {
795     for (i=0;i<k;i++) {
796       for(j=0;j<k;j++) {
797         Z_i_t[i][j]=params[ii].caseP.Z_i[j][i];
798       }
799     }
800     matrixMul(Z_i_t,numer,2,k,k,1,tmp21_b);
801     for (i=0;i<2;i++) tmp21[i][0]=params[ii].caseP.Wstar[i]; //Wstar
802     for (i=0;i<2;i++) tmp21[i][0] = tmp21[i][0] - tmp21_b[i][0]; //Wstar - Z_t*B
803     for (i=0;i<2;i++) tmp12[0][i] = tmp21[i][0]; //invserse
804     matrixMul(tmp21,tmp12,2,1,1,2,tmp22);
805     for(i=0; i<2;i++)
806       for(j=0; j<2;j++)
807         setP->Sigma[i][j] += tmp22[i][j];
808   }
809   dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"CCAR M-step S2");
810 
811   //variances
812   //CODE BLOCK B
813   setP->Sigma3[0][0] = pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3];
814   setP->Sigma3[1][1] = pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3];
815   setP->Sigma3[2][2] = pdTheta[3];
816 
817   //covariances
818   setP->Sigma3[0][1] = (pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]) + pdTheta[6]*pdTheta[7]*pdTheta[3])/
819                         (sqrt((pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3])*(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3])));//rho_12 unconditional
820   setP->Sigma3[0][1] = setP->Sigma3[0][1]*sqrt(setP->Sigma3[0][0]*setP->Sigma3[1][1]); //sig_12
821   setP->Sigma3[0][2] = pdTheta[6]*sqrt((pdTheta[3])/(pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3]))*sqrt(setP->Sigma3[0][0]*setP->Sigma3[2][2]);
822   setP->Sigma3[1][2] = pdTheta[7]*sqrt((pdTheta[3])/(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3]))*sqrt(setP->Sigma3[1][1]*setP->Sigma3[2][2]);
823 
824   //symmetry
825   setP->Sigma3[1][0] = setP->Sigma3[0][1];
826   setP->Sigma3[2][0] = setP->Sigma3[0][2];
827   setP->Sigma3[2][1] = setP->Sigma3[1][2];
828 
829   dinv2D((double*)(&(setP->Sigma3[0][0])), 3, (double*)(&(setP->InvSigma3[0][0])),"NCAR M-step S3");
830   initNCAR(params,pdTheta);
831 
832 }
833 
834 /**
835  * Exta M-Step for hypothesis testing
836  * Input: params
837  * Mutates pdTheta
838  */
MStepHypTest(Param * params,double * pdTheta)839 void MStepHypTest(Param* params, double* pdTheta) {
840   setParam* setP=params[0].setP;
841   double offset,denom;
842   int dim,i,j,l,k;
843   dim=setP->ncar ? 3 : 2;
844   l=setP->hypTest;
845   double** Sigma=doubleMatrix(dim,dim);
846   double** temp_LbyD=doubleMatrix(l,dim);
847   double** temp_DbyL=doubleMatrix(dim,l);
848   double** temp_LbyL=doubleMatrix(l,l);
849 
850   for(i=0;i<dim;i++)
851     for(j=0;j<dim;j++) {
852       if (dim==3) {
853         Sigma[i][j]=setP->Sigma3[i][j];
854       }
855       else {
856         Sigma[i][j]=setP->Sigma[i][j];
857       }
858     }
859   //transpose
860   double** hypTestCoeffT=doubleMatrix(l,dim);
861   for(i=0;i<dim;i++) hypTestCoeffT[0][i]=setP->hypTestCoeff[i][0];
862 
863   //numerator
864   for(k=0;k<2;k++) temp_DbyL[k][0]=0;
865   for(i=0;i<setP->t_samp;i++) {
866     temp_DbyL[0][0]+=params[i].caseP.Wstar[0];
867     temp_DbyL[1][0]+=params[i].caseP.Wstar[1];
868   }
869   matrixMul(hypTestCoeffT,temp_DbyL,l,dim,dim,l,temp_LbyL);
870   temp_LbyL[0][0]=temp_LbyL[0][0]-(setP->t_samp*setP->hypTestResult);
871   matrixMul(Sigma,setP->hypTestCoeff,dim,dim,dim,l,temp_DbyL);
872   for(k=0;k<2;k++) temp_DbyL[k][0]*=temp_LbyL[0][0];
873 
874   //denominator
875   //matrixMul(hypTestCoeffT,InvSigma,l,dim,dim,dim,temp_LbyD);
876   matrixMul(hypTestCoeffT,Sigma,l,dim,dim,dim,temp_LbyD);
877   matrixMul(temp_LbyD,setP->hypTestCoeff,l,dim,dim,l,temp_LbyL);
878   denom=setP->t_samp*temp_LbyL[0][0];
879 
880   //offset theta
881   for(k=0;k<2;k++) {
882     offset=temp_DbyL[k][0]/denom;
883     int kindex= (setP->ncar) ? (k+1) : k;
884     pdTheta[kindex]=pdTheta[kindex]-offset;
885   }
886 
887 }
888 
889 
890 /**
891  * NCAR initialize
892  * note that for fixed rho, the input is the UNTRANSFORMED PARAMETERS
893  * input: pdTheta
894  * mutates: params
895  */
initNCAR(Param * params,double * pdTheta)896 void initNCAR(Param* params, double* pdTheta) {
897   setParam* setP=params[0].setP;
898   int i;
899   if (!setP->fixedRho) { //variable rho
900     //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
901 
902     setP->Sigma[0][0]= pdTheta[4]*(1 - pdTheta[6]*pdTheta[6]);
903     setP->Sigma[1][1]= pdTheta[5]*(1 - pdTheta[7]*pdTheta[7]);
904     setP->Sigma[0][1]= (pdTheta[8] - pdTheta[6]*pdTheta[7])/sqrt((1 - pdTheta[6]*pdTheta[6])*(1 - pdTheta[7]*pdTheta[7])); //correlation
905     setP->Sigma[0][1]= setP->Sigma[0][1]*sqrt(setP->Sigma[0][0]*setP->Sigma[1][1]); //covar
906     setP->Sigma[1][0]= setP->Sigma[0][1]; //symmetry
907     dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"NCAR M-step S2");
908 
909     //assign each data point the new mu (different for each point)
910     for(i=0;i<setP->t_samp;i++) {
911       params[i].caseP.mu[0]=pdTheta[1] + pdTheta[6]*sqrt(pdTheta[4]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]);
912       params[i].caseP.mu[1]=pdTheta[2] + pdTheta[7]*sqrt(pdTheta[5]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu1")-pdTheta[0]);
913       if(setP->verbose>=2 && !setP->sem && (i<3 || i==422))
914       //if(setP->verbose>=2  && i<3)
915         Rprintf("mu primes for %d: %5g %5g (mu2: %5g p7: %5g p5: %5g X-T: %5g)\n",i,params[i].caseP.mu[0],params[i].caseP.mu[1],pdTheta[2],pdTheta[7],pdTheta[5],logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]);
916     }
917   }
918   else { //fixed rho
919     //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3
920     //CODE BLOCK A
921     setP->Sigma[0][0]= pdTheta[4];
922     setP->Sigma[1][1]= pdTheta[5];
923     setP->Sigma[0][1]= pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]); //covar
924     setP->Sigma[1][0]= setP->Sigma[0][1]; //symmetry
925     dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"NCAR M-step S2");
926 
927     for(i=0;i<setP->t_samp;i++) {
928       params[i].caseP.mu[0]=pdTheta[1] + pdTheta[6]*(logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]);
929       params[i].caseP.mu[1]=pdTheta[2] + pdTheta[7]*(logit(params[i].caseP.X,"initNCAR mu1")-pdTheta[0]);
930       if(setP->verbose>=2 && !setP->sem && (i<3 || i==422))
931       //if(setP->verbose>=2  && i<3)
932         Rprintf("mu primes for %d: %5g %5g (mu2: %5g p7: %5g p5: %5g X-T: %5g)\n",i,params[i].caseP.mu[0],params[i].caseP.mu[1],pdTheta[2],pdTheta[7],pdTheta[5],logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]);
933     }
934 
935   }
936 }
937 
938 /**
939  * CCAR initialize
940  * Note that fixed rho is currently unimplemented
941  * input: pdTheta
942  * mutates: params
943  */
initCCAR(Param * params,double * pdTheta)944 void initCCAR(Param* params, double* pdTheta) {
945   setParam* setP=params[0].setP;
946   int i;
947   if (!setP->fixedRho) { //variable rho
948     //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
949 
950     setP->Sigma[0][0]= pdTheta[4]*(1 - pdTheta[6]*pdTheta[6]);
951     setP->Sigma[1][1]= pdTheta[5]*(1 - pdTheta[7]*pdTheta[7]);
952     setP->Sigma[0][1]= (pdTheta[8] - pdTheta[6]*pdTheta[7])/sqrt((1 - pdTheta[6]*pdTheta[6])*(1 - pdTheta[7]*pdTheta[7])); //correlation
953     setP->Sigma[0][1]= setP->Sigma[0][1]*sqrt(setP->Sigma[0][0]*setP->Sigma[1][1]); //covar
954     setP->Sigma[1][0]= setP->Sigma[0][1]; //symmetry
955     dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"NCAR M-step S2");
956     //assign each data point the new mu (different for each point)
957     for(i=0;i<setP->t_samp;i++) {
958       params[i].caseP.mu[0]=pdTheta[1] + pdTheta[6]*sqrt(pdTheta[4]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]);
959       params[i].caseP.mu[1]=pdTheta[2] + pdTheta[7]*sqrt(pdTheta[5]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu1")-pdTheta[0]);
960       if(setP->verbose>=2 && !setP->sem && (i<3 || i==422))
961       //if(setP->verbose>=2  && i<3)
962         Rprintf("mu primes for %d: %5g %5g (mu2: %5g p7: %5g p5: %5g X-T: %5g)\n",i,params[i].caseP.mu[0],params[i].caseP.mu[1],pdTheta[2],pdTheta[7],pdTheta[5],logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]);
963     }
964   }
965   else { //fixed rho
966   }
967 }
968 
969 /**
970  * input: optTheta,pdTheta,params,Rmat
971  * mutate/output: matrices Rmat and Rmat_old (dimensions of param_len x param_len)
972  * optTheta is optimal theta
973  * pdTheta is current theta
974  * Rmat_old contains the input Rmat
975  */
ecoSEM(double * optTheta,double * pdTheta,Param * params,double Rmat_old[7][7],double Rmat[7][7])976  void ecoSEM(double* optTheta, double* pdTheta, Param* params, double Rmat_old[7][7], double Rmat[7][7]) {
977   //assume we have optTheta, ie \hat{phi}
978   //pdTheta is phi^{t+1}
979   int i,j,verbose,len,param_len;
980   setParam setP_sem=*(params[0].setP);
981   param_len=setP_sem.param_len;
982   double *SuffSem=doubleArray(setP_sem.suffstat_len+1); //sufficient stats
983   double phiTI[param_len]; //phi^t_i
984   double phiTp1I[param_len]; //phi^{t+1}_i
985   double t_optTheta[param_len]; //transformed optimal
986   double t_phiTI[param_len]; //transformed phi^t_i
987   double t_phiTp1I[param_len]; //transformed phi^{t+1}_i
988   Param* params_sem=(Param*) Calloc(params->setP->t_samp,Param);
989   verbose=setP_sem.verbose;
990   //determine length of R matrix
991   len=0;
992   for(j=0; j<param_len;j++)
993     if(setP_sem.varParam[j]) len++;
994 
995   //first, save old Rmat
996   for(i=0;i<len;i++)
997     for(j=0;j<len;j++)
998       Rmat_old[i][j]=Rmat[i][j];
999 
1000   for(i=0;i<len;i++) {
1001     if (!setP_sem.semDone[i]) { //we're not done with this row
1002       //step 1: set phi^t_i
1003       if (verbose>=2) Rprintf("Theta(%d):",(i+1));
1004       int switch_index_ir=0; int switch_index_it;
1005       for(j=0;j<param_len;j++) {
1006         if (!setP_sem.varParam[j]) //const
1007           phiTI[j]=optTheta[j];
1008         else {
1009           if (i==switch_index_ir) {
1010             phiTI[j]=pdTheta[j]; //current value
1011             switch_index_it=j;
1012           }
1013           else phiTI[j]=optTheta[j]; //optimal value
1014           switch_index_ir++;
1015         }
1016         if (verbose>=2) Rprintf(" %5g ", phiTI[j]);
1017       }
1018       //if (setP_sem.fixedRho) {
1019       //  phiTI[len-1]=pdTheta[len-1];
1020       //  phiTp1I[len-1]=pdTheta[len-1];
1021       // if (verbose>=2) Rprintf(" %5g ", phiTI[len-1]);
1022       //}
1023       if (verbose>=2) Rprintf("\n");
1024       for(j=0;j<param_len;j++) phiTp1I[j]=phiTI[j]; //init next iteration
1025 
1026       //step 2: run an E-step and an M-step with phi^t_i
1027       //initialize params
1028       if (!setP_sem.ncar) {
1029         for(j=0;j<setP_sem.t_samp;j++) {
1030           params_sem[j].setP=&setP_sem;
1031           params_sem[j].caseP=params[j].caseP;
1032           params_sem[j].caseP.mu[0] = phiTI[0];
1033           params_sem[j].caseP.mu[1] = phiTI[1];
1034         }
1035         setP_sem.Sigma[0][0] = phiTI[2];
1036         setP_sem.Sigma[1][1] = phiTI[3];
1037         setP_sem.Sigma[0][1] = phiTI[4]*sqrt(phiTI[2]*phiTI[3]);
1038         setP_sem.Sigma[1][0] = setP_sem.Sigma[0][1];
1039         dinv2D((double*)(&(setP_sem.Sigma[0][0])), 2, (double*)(&(setP_sem.InvSigma[0][0])), "SEM: CAR init ");
1040       }
1041       else {
1042         for(j=0;j<setP_sem.t_samp;j++) {
1043           params_sem[j].setP=&setP_sem;
1044           params_sem[j].caseP=params[j].caseP;
1045         }
1046         setP_sem.Sigma3[0][0] = phiTI[4];
1047         setP_sem.Sigma3[1][1] = phiTI[5];
1048         setP_sem.Sigma3[2][2] = phiTI[3];
1049 
1050         //covariances
1051         setP_sem.Sigma3[0][1] = phiTI[8]*sqrt(phiTI[4]*phiTI[5]);
1052         setP_sem.Sigma3[0][2] = phiTI[6]*sqrt(phiTI[4]*phiTI[3]);
1053         setP_sem.Sigma3[1][2] = phiTI[7]*sqrt(phiTI[5]*phiTI[3]);
1054 
1055         //symmetry
1056         setP_sem.Sigma3[1][0] = setP_sem.Sigma3[0][1];
1057         setP_sem.Sigma3[2][0] = setP_sem.Sigma3[0][2];
1058         setP_sem.Sigma3[2][1] = setP_sem.Sigma3[1][2];
1059         if (verbose>=2) {
1060           Rprintf("Sigma3: %5g %5g %5g %5g %5g %5g; %5g %5g\n",setP_sem.Sigma3[0][0],setP_sem.Sigma3[0][1],setP_sem.Sigma3[1][1],setP_sem.Sigma3[0][2],setP_sem.Sigma3[1][2],setP_sem.Sigma3[2][2],*(&(setP_sem.Sigma3[0][0])+0),*(&(setP_sem.Sigma3[0][0])+8));
1061         }
1062         dinv2D((double*)(&(setP_sem.Sigma3[0][0])), 3, (double*)(&(setP_sem.InvSigma3[0][0])),"SEM: NCAR Sig3 init");
1063         if (verbose>=2) {
1064           Rprintf("Check 1");
1065         }
1066         if (setP_sem.fixedRho) ncarFixedRhoTransform(phiTI);
1067         initNCAR(params_sem,phiTI);
1068         if (setP_sem.fixedRho) ncarFixedRhoUnTransform(phiTI);
1069         if (verbose>=2) {
1070           Rprintf("Check 2");
1071         }
1072       }
1073 
1074       //if (verbose>=2) {
1075       //  Rprintf("Sigma: %5g %5g %5g %5g\n",setP_sem.Sigma[0][0],setP_sem.Sigma[0][1],setP_sem.Sigma[1][0],setP_sem.Sigma[1][1]);
1076       //}
1077 
1078       ecoEStep(params_sem, SuffSem);
1079       if (!params[0].setP->ncar)
1080         ecoMStep(SuffSem,phiTp1I,params_sem);
1081       else
1082         ecoMStepNCAR(SuffSem,phiTp1I,params_sem);
1083 
1084       //step 3: create new R matrix row
1085       transformTheta(phiTp1I,t_phiTp1I,setP_sem.param_len,&setP_sem);
1086       transformTheta(optTheta,t_optTheta,setP_sem.param_len,&setP_sem);
1087       transformTheta(phiTI,t_phiTI,setP_sem.param_len,&setP_sem);
1088       /*if (verbose>=2) {
1089         Rprintf("T+1:");
1090         for (j=0;j<param_len;j++) Rprintf(" %5g ", phiTp1I[j]);
1091         Rprintf("\nOpt:");
1092         for (j=0;j<param_len;j++) Rprintf(" %5g ", optTheta[j]);
1093         Rprintf("\n 2nd item: %5g %5g %5g %5g", t_phiTp1I[2], t_optTheta[2], t_phiTI[switch_index_it], t_optTheta[switch_index_it]);
1094       }*/
1095       int index_jr=0;
1096       for(j = 0; j<param_len; j++) {
1097         if (setP_sem.varParam[j]) {
1098           Rmat[i][index_jr]=(t_phiTp1I[j]-t_optTheta[j])/(t_phiTI[switch_index_it]-t_optTheta[switch_index_it]);
1099           index_jr++;
1100         }
1101       }
1102 
1103       //step 4: check for difference
1104       params[0].setP->semDone[i]=closeEnough((double*)Rmat[i],(double*)Rmat_old[i],len,sqrt(params[0].setP->convergence));
1105 
1106     }
1107     else { //keep row the same
1108       for(j = 0; j<len; j++)
1109         Rmat[i][j]=Rmat_old[i][j];
1110     }
1111   }
1112   if(verbose>=1) {
1113     for(i=0;i<len;i++) {
1114       Rprintf("\nR Matrix row %d (%s): ", (i+1), (params[0].setP->semDone[i]) ? "    Done" : "Not done");
1115       for(j=0;j<len;j++) {
1116         Rprintf(" %5.2f ",Rmat[i][j]);
1117       }
1118     }
1119     Rprintf("\n\n");
1120   }
1121   Free(SuffSem);
1122   Free(params_sem);
1123 }
1124 
1125 
1126 
1127 /**
1128  * Read in the data set and population params
1129  * inputs:
1130  *   ndim: number of dimensions
1131  *   pdX: non-survey, non-homogenous data (length n_samp)
1132  *   sur_W: survey data (length s_samp)
1133  *   x1_W1: homogenous data (X==1) (length x1_samp)
1134  *   x0_W2: homogenous data (X==0) (length x0_samp)
1135  * mutates: params
1136  */
readData(Param * params,int n_dim,double * pdX,double * sur_W,double * x1_W1,double * x0_W2,int n_samp,int s_samp,int x1_samp,int x0_samp)1137  void readData(Param* params, int n_dim, double* pdX, double* sur_W, double* x1_W1, double* x0_W2,
1138                 int n_samp, int s_samp, int x1_samp, int x0_samp) {
1139      /* read the data set */
1140   int itemp,i,j,surv_dim;
1141   double dtemp;
1142   setParam* setP=params[0].setP;
1143 
1144   /** Packing Y, X  **/
1145   itemp = 0;
1146   for (j = 0; j < n_dim; j++)
1147     for (i = 0; i < n_samp; i++) {
1148       params[i].caseP.data[j] = pdX[itemp++];
1149     }
1150 
1151   for (i = 0; i < n_samp; i++) {
1152     params[i].caseP.dataType=DPT_General;
1153     params[i].caseP.X=params[i].caseP.data[0];
1154     params[i].caseP.Y=params[i].caseP.data[1];
1155     //fix X edge cases
1156     params[i].caseP.X=(params[i].caseP.X >= 1) ? .9999 : ((params[i].caseP.X <= 0) ? 0.0001 : params[i].caseP.X);
1157     //fix Y edge cases
1158     params[i].caseP.Y=(params[i].caseP.Y >= 1) ? .9999 : ((params[i].caseP.Y <= 0) ? 0.0001 : params[i].caseP.Y);
1159   }
1160 
1161   /*read the survey data */
1162   itemp=0;
1163   surv_dim=n_dim + (setP->ncar ? 1 : 0); //if NCAR, the survey data will include X's
1164   for (j=0; j<surv_dim; j++) {
1165     for (i=n_samp; i<n_samp+s_samp; i++) {
1166       dtemp=sur_W[itemp++];
1167       params[i].caseP.dataType=DPT_Survey;
1168       if (j<n_dim) {
1169         params[i].caseP.W[j]=(dtemp == 1) ? .9999 : ((dtemp==0) ? .0001 : dtemp);
1170         params[i].caseP.Wstar[j]=logit(params[i].caseP.W[j],"Survey read");
1171       }
1172       else { //if given the X (NCAR), we set the X and contruct Y
1173         params[i].caseP.X=(dtemp == 1) ? .9999 : ((dtemp==0) ? .0001 : dtemp);
1174         params[i].caseP.Y=params[i].caseP.X*params[i].caseP.W[0]+(1-params[i].caseP.X);
1175       }
1176     }
1177   }
1178 
1179   /*read homeogenous areas information */
1180   for (i=n_samp+s_samp; i<n_samp+s_samp+x1_samp; i++) {
1181     /*params[i].caseP.dataType=DPT_Homog_X1;
1182     params[i].caseP.W[0]=(x1_W1[i] == 1) ? .9999 : ((x1_W1[i]==0) ? .0001 : x1_W1[i]);
1183     params[i].caseP.Wstar[0]=logit(params[i].caseP.W[0],"X1 read");*/
1184   }
1185 
1186   for (i=n_samp+s_samp+x1_samp; i<n_samp+s_samp+x1_samp+x0_samp; i++) {
1187     /*params[i].caseP.dataType=DPT_Homog_X0;
1188     params[i].caseP.W[1]=(x0_W2[i] == 1) ? .9999 : ((x0_W2[i]==0) ? .0001 : x0_W2[i]);
1189     params[i].caseP.Wstar[1]=logit(params[i].caseP.W[1],"X0 read");*/
1190   }
1191 
1192   /*Current version of program does not handle homogenous data*/
1193   if ((x1_samp+x0_samp)>0) {
1194     Rprintf("WARNING: Homogenous data is ignored and not handled by the current version of eco.");
1195   }
1196 
1197 
1198   if (setP->verbose>=2) {
1199     Rprintf("Y X\n");
1200     for(i=0;i<5;i++) Rprintf("%5d%14g%14g\n",i,params[i].caseP.Y,params[i].caseP.X);
1201     if (s_samp>0) {
1202       Rprintf("SURVEY data\nY X\n");
1203       int s_max=fmin2(n_samp+x1_samp+x0_samp+s_samp,n_samp+x1_samp+x0_samp+5);
1204       for(i=n_samp+x1_samp+x0_samp; i<s_max; i++) Rprintf("%5d%14g%14g\n",i,params[i].caseP.Y,params[i].caseP.X);
1205     }
1206   }
1207 
1208 }
1209 
1210 /**
1211  * During the main E-M loop, this function prints the output column headers
1212  * finalTheta: 1 if this is for the final theta -- include static variables
1213  **/
printColumnHeader(int main_loop,int iteration_max,setParam * setP,int finalTheta)1214 void printColumnHeader(int main_loop, int iteration_max, setParam* setP, int finalTheta) {
1215   int i;
1216   int param_len;
1217   param_len = setP->param_len;
1218 
1219   //trying to print nicely, but it throws an error
1220   //char temp[50]; int hlen;
1221   //if (!finalTheta) hlen=sprintf(temp, "cycle %d/%d:",main_loop,iteration_max); //Length of cycle text
1222   //else hlen=sprintf(temp, "Final Theta:");
1223   //for (i=0;i<hlen;i++) Rprintf(" ");
1224 
1225   if (!finalTheta) Rprintf("cycle %d/%d:",main_loop,iteration_max);
1226   else Rprintf("Final Theta:");
1227 
1228   if (param_len<=5) { //CAR
1229     Rprintf("  mu_1  mu_2 sig_1 sig_2");
1230     if (!setP->fixedRho || finalTheta) Rprintf("  r_12");
1231   } else { //NCAR
1232     if (finalTheta) {
1233       Rprintf("  mu_3  mu_1  mu_2 sig_3 sig_1 sig_2  r_13  r_23  r_12");
1234     }
1235     else {
1236       Rprintf("  mu_1  mu_2 sig_1 sig_2  r_13  r_23  r_12");
1237     }
1238   }
1239   Rprintf("\n");
1240 }
1241 
1242 /**
1243  * Parameterizes the elements of theta
1244  * Input: pdTheta
1245  * Mutates: t_pdTheta
1246  */
transformTheta(double * pdTheta,double * t_pdTheta,int len,setParam * setP)1247 void transformTheta(double* pdTheta, double* t_pdTheta, int len, setParam* setP) {
1248   if (len<=5) {
1249     t_pdTheta[0]=pdTheta[0];
1250     t_pdTheta[1]=pdTheta[1];
1251     t_pdTheta[2]=log(pdTheta[2]);
1252     t_pdTheta[3]=log(pdTheta[3]);
1253     t_pdTheta[4]=.5*(log(1+pdTheta[4])-log(1-pdTheta[4]));
1254   }
1255   else {
1256     t_pdTheta[0]=pdTheta[0];
1257     t_pdTheta[1]=pdTheta[1];
1258     t_pdTheta[2]=pdTheta[2];
1259     t_pdTheta[3]=log(pdTheta[3]);
1260     t_pdTheta[4]=log(pdTheta[4]);
1261     t_pdTheta[5]=log(pdTheta[5]);
1262     t_pdTheta[6]=.5*(log(1+pdTheta[6])-log(1-pdTheta[6]));
1263     t_pdTheta[7]=.5*(log(1+pdTheta[7])-log(1-pdTheta[7]));
1264     t_pdTheta[8]=.5*(log(1+pdTheta[8])-log(1-pdTheta[8]));
1265   }
1266 }
1267 
1268 /**
1269  * Un-parameterizes the elements of theta
1270  * Input: t_pdTheta
1271  * Mutates: pdTheta
1272  */
untransformTheta(double * t_pdTheta,double * pdTheta,int len,setParam * setP)1273 void untransformTheta(double* t_pdTheta,double* pdTheta, int len, setParam* setP) {
1274   if (len<=5) {
1275     pdTheta[0]=t_pdTheta[0];
1276     pdTheta[1]=t_pdTheta[1];
1277     pdTheta[2]=exp(t_pdTheta[2]);
1278     pdTheta[3]=exp(t_pdTheta[3]);
1279     pdTheta[4]=(exp(2*t_pdTheta[4])-1)/(exp(2*t_pdTheta[4])+1);
1280   }
1281   else {
1282     pdTheta[0]=t_pdTheta[0];
1283     pdTheta[1]=t_pdTheta[1];
1284     pdTheta[2]=t_pdTheta[2];
1285     pdTheta[3]=exp(t_pdTheta[3]);
1286     pdTheta[4]=exp(t_pdTheta[4]);
1287     pdTheta[5]=exp(t_pdTheta[5]);
1288     if (!setP->fixedRho) {
1289       pdTheta[6]=(exp(2*t_pdTheta[6])-1)/(exp(2*t_pdTheta[6])+1);
1290       pdTheta[7]=(exp(2*t_pdTheta[7])-1)/(exp(2*t_pdTheta[7])+1);
1291     }
1292     else {
1293       pdTheta[6]=t_pdTheta[6];
1294       pdTheta[7]=t_pdTheta[7];
1295     }
1296     pdTheta[8]=(exp(2*t_pdTheta[8])-1)/(exp(2*t_pdTheta[8])+1);
1297   }
1298 }
1299 
1300 /**
1301  * untransforms theta under ncar -- fixed rho
1302  * input reference:  (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3
1303  * output reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
1304  * mutates: pdTheta
1305  **/
ncarFixedRhoUnTransform(double * pdTheta)1306 void ncarFixedRhoUnTransform(double* pdTheta) {
1307   double* tmp=doubleArray(9);
1308   int i;
1309   for (i=0;i<9;i++) tmp[i]=pdTheta[i];
1310   pdTheta[0]=tmp[0];
1311   pdTheta[1]=tmp[1];
1312   pdTheta[2]=tmp[2];
1313   pdTheta[3]=tmp[3];
1314   pdTheta[4]=tmp[4] + tmp[6]*tmp[6]*tmp[3];
1315   pdTheta[5]=tmp[5] + tmp[7]*tmp[7]*tmp[3];
1316   pdTheta[6]=(tmp[6]*sqrt(tmp[3]))/(sqrt(pdTheta[4]));
1317   pdTheta[7]=(tmp[7]*sqrt(tmp[3]))/(sqrt(pdTheta[5]));
1318   pdTheta[8]=(tmp[8]*sqrt(tmp[4]*tmp[5]) + tmp[6]*tmp[7]*tmp[3])/(sqrt(pdTheta[4]*pdTheta[5]));
1319   Free(tmp);
1320 }
1321 
1322 /**
1323  * transforms theta under ncar -- fixed rho
1324  * input reference:  (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
1325  * output reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3
1326  * mutates: pdTheta
1327  **/
ncarFixedRhoTransform(double * pdTheta)1328 void ncarFixedRhoTransform(double* pdTheta) {
1329   double* tmp=doubleArray(9);
1330   int i;
1331   for (i=0;i<9;i++) tmp[i]=pdTheta[i];
1332   pdTheta[0]=tmp[0];
1333   pdTheta[1]=tmp[1];
1334   pdTheta[2]=tmp[2];
1335   pdTheta[3]=tmp[3];
1336   pdTheta[4]=tmp[4] - tmp[6]*tmp[6]*tmp[4];
1337   pdTheta[5]=tmp[5] - tmp[7]*tmp[7]*tmp[5];
1338   pdTheta[6]=tmp[6]*sqrt(tmp[4]/tmp[3]);
1339   pdTheta[7]=tmp[7]*sqrt(tmp[5]/tmp[3]);
1340   pdTheta[8]=(tmp[8] - tmp[6]*tmp[7])/(sqrt((1 - tmp[6]*tmp[6])*(1 - tmp[7]*tmp[7])));
1341   Free(tmp);
1342 }
1343 
1344 
1345 /**
1346  * Input transformed theta, loglikelihood, iteration
1347  * Mutates: history_full
1348  **/
setHistory(double * t_pdTheta,double loglik,int iter,setParam * setP,double history_full[][10])1349 void setHistory(double* t_pdTheta, double loglik, int iter,setParam* setP,double history_full[][10]) {
1350   int len=setP->param_len;
1351   int j;
1352   for(j=0;j<len;j++)
1353     history_full[iter][j]=t_pdTheta[j];
1354   if (iter>0) history_full[iter-1][len]=loglik;
1355 }
1356 
1357 /**
1358  * Determines whether we have converged
1359  * Takes in the current and old (one step previous) array of theta values
1360  * maxerr is the maximum difference two corresponding values can have before the
1361  *  function returns false
1362  */
closeEnough(double * pdTheta,double * pdTheta_old,int len,double maxerr)1363 int closeEnough(double* pdTheta, double* pdTheta_old, int len, double maxerr) {
1364   int j;
1365   for(j = 0; j<len; j++)
1366     if (fabs(pdTheta[j]-pdTheta_old[j])>=maxerr) return 0;
1367   return 1;
1368 }
1369 
1370 /**
1371  * Is the SEM process completely done.
1372  **/
semDoneCheck(setParam * setP)1373 int semDoneCheck(setParam* setP) {
1374   int varlen=0; int j;
1375   for(j=0; j<setP->param_len;j++)
1376     if(setP->varParam[j]) varlen++;
1377   for(j=0;j<varlen;j++)
1378     if(setP->semDone[j]==0) return 0;
1379   return 1;
1380 }
1381 
1382 /**
1383  * Older function. No longer used.
1384  **/
gridEStep(Param * params,int n_samp,int s_samp,int x1_samp,int x0_samp,double * suff,int verbose,double minW1,double maxW1)1385 void gridEStep(Param* params, int n_samp, int s_samp, int x1_samp, int x0_samp, double* suff, int verbose, double minW1, double maxW1) {
1386 
1387   int n_dim=2;
1388   int n_step=5000;    /* The default size of grid step */
1389   int ndraw=10000;
1390   int trapod=0;       /* 1 if use trapozodial ~= in numer. int.*/
1391   int *n_grid=intArray(n_samp);                /* grid size */
1392   double **W1g=doubleMatrix(n_samp, n_step);   /* grids for W1 */
1393   double **W2g=doubleMatrix(n_samp, n_step);   /* grids for W2 */
1394   double *vtemp=doubleArray(n_dim);
1395   int *mflag=intArray(n_step);
1396   double *prob_grid=doubleArray(n_step);
1397   double *prob_grid_cum=doubleArray(n_step);
1398   double **X=doubleMatrix(n_samp,n_dim);     /* Y and covariates */
1399 
1400   int itemp,i,j,k,t_samp;
1401   double dtemp,dtemp1,temp0,temp1;
1402 
1403   t_samp=n_samp+x1_samp+x0_samp+s_samp;
1404 
1405   double **W=doubleMatrix(t_samp,n_dim);     /* W1 and W2 matrix */
1406   double **Wstar=doubleMatrix(t_samp,5);     /* pseudo data(transformed*/
1407 
1408   for (i=0;i<t_samp;i++)
1409     for(j=0;j<n_dim;j++)
1410       X[i][j]=params[i].caseP.data[j];
1411 
1412   GridPrep(W1g, W2g, (double**) params[i].caseP.data, (double*)&maxW1, (double*)&minW1, n_grid, n_samp, n_step);
1413 
1414     for (i=0; i<n_step; i++) {
1415     mflag[i]=0;
1416   }
1417 
1418 
1419   //update W, Wstar given mu, Sigma in regular areas
1420   for (i=0;i<n_samp;i++){
1421     if ( params[i].caseP.Y!=0 && params[i].caseP.Y!=1 ) {
1422       // project BVN(mu, Sigma) on the inth tomo line
1423       dtemp=0;
1424       for (j=0;j<n_grid[i];j++){
1425         vtemp[0]=log(W1g[i][j])-log(1-W1g[i][j]);
1426         vtemp[1]=log(W2g[i][j])-log(1-W2g[i][j]);
1427         prob_grid[j]=dMVN(vtemp, params[i].caseP.mu, (double**)(params[i].setP->InvSigma), 2, 1) -
1428           log(W1g[i][j])-log(W2g[i][j])-log(1-W1g[i][j])-log(1-W2g[i][j]);
1429         prob_grid[j]=exp(prob_grid[j]);
1430         dtemp+=prob_grid[j];
1431         prob_grid_cum[j]=dtemp;
1432       }
1433       for (j=0;j<n_grid[i];j++){
1434         prob_grid_cum[j]/=dtemp; //standardize prob.grid
1435       }
1436       // MC numerical integration, compute E(W_i|Y_i, X_i, theta)
1437       //2 sample ndraw W_i on the ith tomo line
1438       //   use inverse CDF method to draw
1439       //   0-1 by 1/ndraw approx uniform distribution
1440       //3 compute Wsta_i from W_i
1441       j=0;
1442       itemp=1;
1443 
1444       for (k=0; k<ndraw; k++){
1445         j=findInterval(prob_grid_cum, n_grid[i],
1446 		      (double)(1+k)/(ndraw+1), 1, 1, itemp, mflag);
1447         itemp=j-1;
1448 
1449 
1450         if ((W1g[i][j]==0) || (W1g[i][j]==1))
1451           Rprintf("W1g%5d%5d%14g", i, j, W1g[i][j]);
1452         if ((W2g[i][j]==0) || (W2g[i][j]==1))
1453           Rprintf("W2g%5d%5d%14g", i, j, W2g[i][j]);
1454 
1455         if (j==0 || trapod==0) {
1456           W[i][0]=W1g[i][j];
1457           W[i][1]=W2g[i][j];
1458         }
1459         else if (j>=1 && trapod==1) {
1460           if (prob_grid_cum[j]!=prob_grid_cum[(j-1)]) {
1461             dtemp1=((double)(1+k)/(ndraw+1)-prob_grid_cum[(j-1)])/(prob_grid_cum[j]-prob_grid_cum[(j-1)]);
1462             W[i][0]=dtemp1*(W1g[i][j]-W1g[i][(j-1)])+W1g[i][(j-1)];
1463             W[i][1]=dtemp1*(W2g[i][j]-W2g[i][(j-1)])+W2g[i][(j-1)];
1464           }
1465           else if (prob_grid_cum[j]==prob_grid_cum[(j-1)]) {
1466             W[i][0]=W1g[i][j];
1467             W[i][1]=W2g[i][j];
1468           }
1469         }
1470         temp0=log(W[i][0])-log(1-W[i][0]);
1471         temp1=log(W[i][1])-log(1-W[i][1]);
1472         Wstar[i][0]+=temp0;
1473         Wstar[i][1]+=temp1;
1474         Wstar[i][2]+=temp0*temp0;
1475         Wstar[i][3]+=temp0*temp1;
1476         Wstar[i][4]+=temp1*temp1;
1477       }
1478     }
1479   }
1480 
1481   // compute E_{W_i|Y_i} for n_samp
1482   for (i=0; i<n_samp; i++) {
1483     if ( X[i][1]!=0 && X[i][1]!=1 ) {
1484       Wstar[i][0]/=ndraw;  //E(W1i)
1485       Wstar[i][1]/=ndraw;  //E(W2i)
1486       Wstar[i][2]/=ndraw;  //E(W1i^2)
1487       Wstar[i][3]/=ndraw;  //E(W1iW2i)
1488       Wstar[i][4]/=ndraw;  //E(W2i^2)
1489     }
1490   } //for x0type, x1type and survey data, E-step is either the observed value or the analytical expectation
1491 
1492   /* compute sufficient statistics */
1493   for (j=0; j<5; j++)
1494     suff[j]=0;
1495 
1496   for (i=0; i<t_samp; i++) {
1497     suff[0]+=Wstar[i][0];  /* sumE(W_i1|Y_i) */
1498     suff[1]+=Wstar[i][1];  /* sumE(W_i2|Y_i) */
1499     suff[2]+=Wstar[i][2];  /* sumE(W_i1^2|Y_i) */
1500     suff[3]+=Wstar[i][4];  /* sumE(W_i2^2|Y_i) */
1501     suff[4]+=Wstar[i][3];  /* sumE(W_i1^W_i2|Y_i) */
1502   }
1503 
1504 
1505   for(j=0; j<5; j++)
1506     suff[j]=suff[j]/t_samp;
1507 
1508   Free(n_grid);Free(vtemp);Free(mflag);Free(prob_grid);Free(prob_grid_cum);
1509   FreeMatrix(W1g,n_samp);FreeMatrix(W2g,n_samp);FreeMatrix(X,n_samp);
1510   FreeMatrix(W,t_samp);FreeMatrix(Wstar,t_samp);
1511 
1512 }
1513