1 /*
2  Genome-wide Efficient Mixed Model Association (GEMMA)
3  Copyright (C) 2011-2017, Xiang Zhou
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include <fstream>
20 #include <iostream>
21 #include <sstream>
22 
23 #include <assert.h>
24 #include <bitset>
25 #include <cmath>
26 #include <cstring>
27 #include <iomanip>
28 #include <iostream>
29 #include <stdio.h>
30 #include <stdlib.h>
31 
32 #include "gsl/gsl_blas.h"
33 #include "gsl/gsl_cdf.h"
34 #include "gsl/gsl_linalg.h"
35 #include "gsl/gsl_matrix.h"
36 #include "gsl/gsl_min.h"
37 #include "gsl/gsl_roots.h"
38 #include "gsl/gsl_vector.h"
39 
40 #include "fastblas.h"
41 #include "gzstream.h"
42 #include "gemma_io.h"
43 #include "lapack.h"
44 #include "mathfunc.h"
45 #include "lmm.h"
46 #include "mvlmm.h"
47 
48 using namespace std;
49 
50 // In this file, X, Y are already transformed (i.e. UtX and UtY).
CopyFromParam(PARAM & cPar)51 void MVLMM::CopyFromParam(PARAM &cPar) {
52   a_mode = cPar.a_mode;
53   d_pace = cPar.d_pace;
54 
55   file_bfile = cPar.file_bfile;
56   file_geno = cPar.file_geno;
57   file_out = cPar.file_out;
58   path_out = cPar.path_out;
59 
60   l_min = cPar.l_min;
61   l_max = cPar.l_max;
62   n_region = cPar.n_region;
63   p_nr = cPar.p_nr;
64   em_iter = cPar.em_iter;
65   nr_iter = cPar.nr_iter;
66   em_prec = cPar.em_prec;
67   nr_prec = cPar.nr_prec;
68   crt = cPar.crt;
69 
70   Vg_remle_null = cPar.Vg_remle_null;
71   Ve_remle_null = cPar.Ve_remle_null;
72   Vg_mle_null = cPar.Vg_mle_null;
73   Ve_mle_null = cPar.Ve_mle_null;
74 
75   time_UtX = 0.0;
76   time_opt = 0.0;
77 
78   ni_total = cPar.ni_total;
79   ns_total = cPar.ns_total;
80   ni_test = cPar.ni_test;
81   ns_test = cPar.ns_test;
82   n_cvt = cPar.n_cvt;
83 
84   n_ph = cPar.n_ph;
85 
86   indicator_idv = cPar.indicator_idv;
87   indicator_snp = cPar.indicator_snp;
88   snpInfo = cPar.snpInfo;
89 
90   return;
91 }
92 
CopyToParam(PARAM & cPar)93 void MVLMM::CopyToParam(PARAM &cPar) {
94   cPar.time_UtX = time_UtX;
95   cPar.time_opt = time_opt;
96 
97   cPar.Vg_remle_null = Vg_remle_null;
98   cPar.Ve_remle_null = Ve_remle_null;
99   cPar.Vg_mle_null = Vg_mle_null;
100   cPar.Ve_mle_null = Ve_mle_null;
101 
102   cPar.VVg_remle_null = VVg_remle_null;
103   cPar.VVe_remle_null = VVe_remle_null;
104   cPar.VVg_mle_null = VVg_mle_null;
105   cPar.VVe_mle_null = VVe_mle_null;
106 
107   cPar.beta_remle_null = beta_remle_null;
108   cPar.se_beta_remle_null = se_beta_remle_null;
109   cPar.beta_mle_null = beta_mle_null;
110   cPar.se_beta_mle_null = se_beta_mle_null;
111 
112   cPar.logl_remle_H0 = logl_remle_H0;
113   cPar.logl_mle_H0 = logl_mle_H0;
114   return;
115 }
116 
WriteFiles()117 void MVLMM::WriteFiles() {
118   string file_str;
119   file_str = path_out + "/" + file_out;
120   file_str += ".assoc.txt";
121 
122   ofstream outfile(file_str.c_str(), ofstream::out);
123   if (!outfile) {
124     cout << "error writing file: " << file_str.c_str() << endl;
125     return;
126   }
127 
128   outfile << "chr"
129           << "\t"
130           << "rs"
131           << "\t"
132           << "ps"
133           << "\t"
134           << "n_miss"
135           << "\t"
136           << "allele1"
137           << "\t"
138           << "allele0"
139           << "\t"
140           << "af"
141           << "\t";
142 
143   for (size_t i = 0; i < n_ph; i++) {
144     outfile << "beta_" << i + 1 << "\t";
145   }
146   for (size_t i = 0; i < n_ph; i++) {
147     for (size_t j = i; j < n_ph; j++) {
148       outfile << "Vbeta_" << i + 1 << "_" << j + 1 << "\t";
149     }
150   }
151 
152   if (a_mode == 1) {
153     outfile << "p_wald" << endl;
154   } else if (a_mode == 2) {
155     outfile << "p_lrt" << endl;
156   } else if (a_mode == 3) {
157     outfile << "p_score" << endl;
158   } else if (a_mode == 4) {
159     outfile << "p_wald"
160             << "\t"
161             << "p_lrt"
162             << "\t"
163             << "p_score" << endl;
164   } else {
165   }
166 
167   size_t t = 0, c = 0;
168   for (size_t i = 0; i < snpInfo.size(); ++i) {
169     if (indicator_snp[i] == 0) {
170       continue;
171     }
172 
173     outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
174             << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
175             << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" << fixed
176             << setprecision(3) << snpInfo[i].maf << "\t";
177 
178     outfile << scientific << setprecision(6);
179 
180     for (size_t i = 0; i < n_ph; i++) {
181       outfile << sumStat[t].v_beta[i] << "\t";
182     }
183 
184     c = 0;
185     for (size_t i = 0; i < n_ph; i++) {
186       for (size_t j = i; j < n_ph; j++) {
187         outfile << sumStat[t].v_Vbeta[c] << "\t";
188         c++;
189       }
190     }
191 
192     if (a_mode == 1) {
193       outfile << sumStat[t].p_wald << endl;
194     } else if (a_mode == 2) {
195       outfile << sumStat[t].p_lrt << endl;
196     } else if (a_mode == 3) {
197       outfile << sumStat[t].p_score << endl;
198     } else if (a_mode == 4) {
199       outfile << sumStat[t].p_wald << "\t" << sumStat[t].p_lrt << "\t"
200               << sumStat[t].p_score << endl;
201     } else {
202     }
203 
204     t++;
205   }
206 
207   outfile.close();
208   outfile.clear();
209   return;
210 }
211 
212 // Below are functions for EM algorithm.
EigenProc(const gsl_matrix * V_g,const gsl_matrix * V_e,gsl_vector * D_l,gsl_matrix * UltVeh,gsl_matrix * UltVehi)213 double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
214                  gsl_matrix *UltVeh, gsl_matrix *UltVehi) {
215   size_t d_size = V_g->size1;
216   double d, logdet_Ve = 0.0;
217 
218   // Eigen decomposition of V_e.
219   gsl_matrix *Lambda = gsl_matrix_alloc(d_size, d_size);
220   gsl_matrix *V_e_temp = gsl_matrix_alloc(d_size, d_size);
221   gsl_matrix *V_e_h = gsl_matrix_alloc(d_size, d_size);
222   gsl_matrix *V_e_hi = gsl_matrix_alloc(d_size, d_size);
223   gsl_matrix *VgVehi = gsl_matrix_alloc(d_size, d_size);
224   gsl_matrix *U_l = gsl_matrix_alloc(d_size, d_size);
225 
226   gsl_matrix_memcpy(V_e_temp, V_e);
227   EigenDecomp(V_e_temp, U_l, D_l, 0);
228 
229   // Calculate V_e_h and V_e_hi.
230   gsl_matrix_set_zero(V_e_h);
231   gsl_matrix_set_zero(V_e_hi);
232   for (size_t i = 0; i < d_size; i++) {
233     d = gsl_vector_get(D_l, i);
234     if (d <= 0) {
235       continue;
236     }
237     logdet_Ve += safe_log(d);
238 
239     gsl_vector_view U_col = gsl_matrix_column(U_l, i);
240     d = safe_sqrt(d);
241     gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_h);
242     d = 1.0 / d;
243     gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_hi);
244   }
245 
246   // Copy the upper part to lower part.
247   for (size_t i = 0; i < d_size; i++) {
248     for (size_t j = 0; j < i; j++) {
249       gsl_matrix_set(V_e_h, i, j, gsl_matrix_get(V_e_h, j, i));
250       gsl_matrix_set(V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i));
251     }
252   }
253 
254   // Calculate Lambda=V_ehi V_g V_ehi.
255   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_g, V_e_hi, 0.0, VgVehi);
256   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_e_hi, VgVehi, 0.0, Lambda);
257 
258   // Eigen decomposition of Lambda.
259   EigenDecomp(Lambda, U_l, D_l, 0);
260 
261   for (size_t i = 0; i < d_size; i++) {
262     d = gsl_vector_get(D_l, i);
263     if (d < 0) {
264       gsl_vector_set(D_l, i, 0);
265     }
266   }
267 
268 
269   // Calculate UltVeh and UltVehi.
270   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_h, 0.0, UltVeh);
271   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_hi, 0.0, UltVehi);
272 
273   // free memory
274   gsl_matrix_free(Lambda);
275   gsl_matrix_free(V_e_temp);
276   gsl_matrix_free(V_e_h);
277   gsl_matrix_free(V_e_hi);
278   gsl_matrix_free(VgVehi);
279   gsl_matrix_free(U_l);
280 
281   return logdet_Ve;
282 }
283 
284 // Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}.
CalcQi(const gsl_vector * eval,const gsl_vector * D_l,const gsl_matrix * X,gsl_matrix * Qi)285 double CalcQi(const gsl_vector *eval, const gsl_vector *D_l,
286               const gsl_matrix *X, gsl_matrix *Qi) {
287   size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1;
288   size_t c_size = dc_size / d_size;
289 
290   double delta, dl, d1, d2, d, logdet_Q;
291 
292   gsl_matrix *Q = gsl_matrix_alloc(dc_size, dc_size);
293   gsl_matrix_set_zero(Q);
294 
295   for (size_t i = 0; i < c_size; i++) {
296     for (size_t j = 0; j < c_size; j++) {
297       for (size_t l = 0; l < d_size; l++) {
298         dl = gsl_vector_get(D_l, l);
299 
300         if (j < i) {
301           d = gsl_matrix_get(Q, j * d_size + l, i * d_size + l);
302         } else {
303           d = 0.0;
304           for (size_t k = 0; k < n_size; k++) {
305             d1 = gsl_matrix_get(X, i, k);
306             d2 = gsl_matrix_get(X, j, k);
307             delta = gsl_vector_get(eval, k);
308             d += d1 * d2 / (dl * delta + 1.0); // @@
309           }
310         }
311 
312         gsl_matrix_set(Q, i * d_size + l, j * d_size + l, d);
313       }
314     }
315   }
316 
317   // Calculate LU decomposition of Q, and invert Q and calculate |Q|.
318   int sig;
319   gsl_permutation *pmt = gsl_permutation_alloc(dc_size);
320   LUDecomp(Q, pmt, &sig);
321   LUInvert(Q, pmt, Qi);
322 
323   logdet_Q = LULndet(Q);
324 
325   gsl_matrix_free(Q);
326   gsl_permutation_free(pmt);
327 
328   return logdet_Q;
329 }
330 
331 // xHiy=\sum_{k=1}^n x_k\otimes ((delta_k*Dl+I)^{-1}Ul^TVe^{-1/2}y.
332 //
333 // FIXME: mvlmm spends a massive amount of time here
CalcXHiY(const gsl_vector * eval,const gsl_vector * D_l,const gsl_matrix * X,const gsl_matrix * UltVehiY,gsl_vector * xHiy)334 void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l,
335               const gsl_matrix *X, const gsl_matrix *UltVehiY,
336               gsl_vector *xHiy) {
337   // debug_msg("enter");
338   size_t n_size = eval->size, c_size = X->size1, d_size = D_l->size;
339 
340   // gsl_vector_set_zero(xHiy);
341 
342   double x, delta, dl, y, d;
343   for (size_t i = 0; i < d_size; i++) {
344     dl = gsl_vector_get(D_l, i);
345     for (size_t j = 0; j < c_size; j++) {
346       d = 0.0;
347       for (size_t k = 0; k < n_size; k++) {
348         x = gsl_matrix_get(X, j, k);
349         y = gsl_matrix_get(UltVehiY, i, k);
350         delta = gsl_vector_get(eval, k);
351         d += x * y / (delta * dl + 1.0);
352       }
353       gsl_vector_set(xHiy, j * d_size + i, d);
354     }
355   }
356   // debug_msg("exit");
357 
358   return;
359 }
360 
361 // OmegaU=D_l/(delta Dl+I)^{-1}
362 // OmegaE=delta D_l/(delta Dl+I)^{-1}
CalcOmega(const gsl_vector * eval,const gsl_vector * D_l,gsl_matrix * OmegaU,gsl_matrix * OmegaE)363 void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l,
364                gsl_matrix *OmegaU, gsl_matrix *OmegaE) {
365   size_t n_size = eval->size, d_size = D_l->size;
366   double delta, dl, d_u, d_e;
367 
368   for (size_t k = 0; k < n_size; k++) {
369     delta = gsl_vector_get(eval, k);
370     for (size_t i = 0; i < d_size; i++) {
371       dl = gsl_vector_get(D_l, i);
372 
373       d_u = dl / (delta * dl + 1.0);  // @@
374       d_e = delta * d_u;
375 
376       gsl_matrix_set(OmegaU, i, k, d_u);
377       gsl_matrix_set(OmegaE, i, k, d_e);
378     }
379   }
380 
381   return;
382 }
383 
UpdateU(const gsl_matrix * OmegaE,const gsl_matrix * UltVehiY,const gsl_matrix * UltVehiBX,gsl_matrix * UltVehiU)384 void UpdateU(const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY,
385              const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) {
386   gsl_matrix_memcpy(UltVehiU, UltVehiY);
387   gsl_matrix_sub(UltVehiU, UltVehiBX);
388 
389   gsl_matrix_mul_elements(UltVehiU, OmegaE);
390   return;
391 }
392 
UpdateE(const gsl_matrix * UltVehiY,const gsl_matrix * UltVehiBX,const gsl_matrix * UltVehiU,gsl_matrix * UltVehiE)393 void UpdateE(const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX,
394              const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) {
395   gsl_matrix_memcpy(UltVehiE, UltVehiY);
396   gsl_matrix_sub(UltVehiE, UltVehiBX);
397   gsl_matrix_sub(UltVehiE, UltVehiU);
398 
399   return;
400 }
401 
UpdateL_B(const gsl_matrix * X,const gsl_matrix * XXti,const gsl_matrix * UltVehiY,const gsl_matrix * UltVehiU,gsl_matrix * UltVehiBX,gsl_matrix * UltVehiB)402 void UpdateL_B(const gsl_matrix *X, const gsl_matrix *XXti,
403                const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU,
404                gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) {
405   size_t c_size = X->size1, d_size = UltVehiY->size1;
406 
407   gsl_matrix *YUX = gsl_matrix_alloc(d_size, c_size);
408 
409   gsl_matrix_memcpy(UltVehiBX, UltVehiY);
410   gsl_matrix_sub(UltVehiBX, UltVehiU);
411 
412   gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, UltVehiBX, X, 0.0, YUX);
413   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, YUX, XXti, 0.0, UltVehiB);
414 
415   gsl_matrix_free(YUX);
416 
417   return;
418 }
419 
UpdateRL_B(const gsl_vector * xHiy,const gsl_matrix * Qi,gsl_matrix * UltVehiB)420 void UpdateRL_B(const gsl_vector *xHiy, const gsl_matrix *Qi,
421                 gsl_matrix *UltVehiB) {
422   size_t d_size = UltVehiB->size1, c_size = UltVehiB->size2,
423          dc_size = Qi->size1;
424 
425   gsl_vector *b = gsl_vector_alloc(dc_size);
426 
427   // Calculate b=Qiv.
428   gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b);
429 
430   // Copy b to UltVehiB.
431   for (size_t i = 0; i < c_size; i++) {
432     gsl_vector_view UltVehiB_col = gsl_matrix_column(UltVehiB, i);
433     gsl_vector_const_view b_subcol =
434         gsl_vector_const_subvector(b, i * d_size, d_size);
435     gsl_vector_memcpy(&UltVehiB_col.vector, &b_subcol.vector);
436   }
437 
438   gsl_vector_free(b);
439 
440   return;
441 }
442 
UpdateV(const gsl_vector * eval,const gsl_matrix * U,const gsl_matrix * E,const gsl_matrix * Sigma_uu,const gsl_matrix * Sigma_ee,gsl_matrix * V_g,gsl_matrix * V_e)443 void UpdateV(const gsl_vector *eval, const gsl_matrix *U, const gsl_matrix *E,
444              const gsl_matrix *Sigma_uu, const gsl_matrix *Sigma_ee,
445              gsl_matrix *V_g, gsl_matrix *V_e) {
446   size_t n_size = eval->size, d_size = U->size1;
447 
448   gsl_matrix_set_zero(V_g);
449   gsl_matrix_set_zero(V_e);
450 
451   double delta;
452 
453   // Calculate the first part: UD^{-1}U^T and EE^T.
454   for (size_t k = 0; k < n_size; k++) {
455     delta = gsl_vector_get(eval, k);
456     if (delta == 0) {
457       continue;
458     }
459 
460     gsl_vector_const_view U_col = gsl_matrix_const_column(U, k);
461     gsl_blas_dsyr(CblasUpper, 1.0 / delta, &U_col.vector, V_g);
462   }
463 
464   gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
465 
466   // Copy the upper part to lower part.
467   for (size_t i = 0; i < d_size; i++) {
468     for (size_t j = 0; j < i; j++) {
469       gsl_matrix_set(V_g, i, j, gsl_matrix_get(V_g, j, i));
470       gsl_matrix_set(V_e, i, j, gsl_matrix_get(V_e, j, i));
471     }
472   }
473 
474   // Add Sigma.
475   gsl_matrix_add(V_g, Sigma_uu);
476   gsl_matrix_add(V_e, Sigma_ee);
477 
478   // Scale by 1/n.
479   gsl_matrix_scale(V_g, 1.0 / (double)n_size);
480   gsl_matrix_scale(V_e, 1.0 / (double)n_size);
481 
482   return;
483 }
484 
CalcSigma(const char func_name,const gsl_vector * eval,const gsl_vector * D_l,const gsl_matrix * X,const gsl_matrix * OmegaU,const gsl_matrix * OmegaE,const gsl_matrix * UltVeh,const gsl_matrix * Qi,gsl_matrix * Sigma_uu,gsl_matrix * Sigma_ee)485 void CalcSigma(const char func_name, const gsl_vector *eval,
486                const gsl_vector *D_l, const gsl_matrix *X,
487                const gsl_matrix *OmegaU, const gsl_matrix *OmegaE,
488                const gsl_matrix *UltVeh, const gsl_matrix *Qi,
489                gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) {
490   if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
491       func_name != 'l') {
492     cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
493          << "likelihood, 'L' for log-likelihood." << endl;
494     return;
495   }
496 
497   size_t n_size = eval->size, c_size = X->size1;
498   size_t d_size = D_l->size, dc_size = Qi->size1;
499 
500   gsl_matrix_set_zero(Sigma_uu);
501   gsl_matrix_set_zero(Sigma_ee);
502 
503   double delta, dl, x, d;
504 
505   // Calculate the first diagonal term.
506   gsl_vector_view Suu_diag = gsl_matrix_diagonal(Sigma_uu);
507   gsl_vector_view See_diag = gsl_matrix_diagonal(Sigma_ee);
508 
509   for (size_t k = 0; k < n_size; k++) {
510     gsl_vector_const_view OmegaU_col = gsl_matrix_const_column(OmegaU, k);
511     gsl_vector_const_view OmegaE_col = gsl_matrix_const_column(OmegaE, k);
512 
513     gsl_vector_add(&Suu_diag.vector, &OmegaU_col.vector);
514     gsl_vector_add(&See_diag.vector, &OmegaE_col.vector);
515   }
516 
517   // Calculate the second term for REML.
518   if (func_name == 'R' || func_name == 'r') {
519     gsl_matrix *M_u = gsl_matrix_alloc(dc_size, d_size);
520     gsl_matrix *M_e = gsl_matrix_alloc(dc_size, d_size);
521     gsl_matrix *QiM = gsl_matrix_alloc(dc_size, d_size);
522 
523     gsl_matrix_set_zero(M_u);
524     gsl_matrix_set_zero(M_e);
525 
526     for (size_t k = 0; k < n_size; k++) {
527       delta = gsl_vector_get(eval, k);
528 
529       for (size_t i = 0; i < d_size; i++) {
530         dl = gsl_vector_get(D_l, i);
531         for (size_t j = 0; j < c_size; j++) {
532           x = gsl_matrix_get(X, j, k);
533           d = x / (delta * dl + 1.0);
534           gsl_matrix_set(M_e, j * d_size + i, i, d);
535           gsl_matrix_set(M_u, j * d_size + i, i, d * dl);
536         }
537       }
538       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_u, 0.0, QiM);
539       gsl_blas_dgemm(CblasTrans, CblasNoTrans, delta, M_u, QiM, 1.0, Sigma_uu);
540 
541       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_e, 0.0, QiM);
542       gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, M_e, QiM, 1.0, Sigma_ee);
543     }
544 
545     gsl_matrix_free(M_u);
546     gsl_matrix_free(M_e);
547     gsl_matrix_free(QiM);
548   }
549 
550   // Multiply both sides by VehUl.
551   gsl_matrix *M = gsl_matrix_alloc(d_size, d_size);
552 
553   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_uu, UltVeh, 0.0, M);
554   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_uu);
555   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_ee, UltVeh, 0.0, M);
556   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_ee);
557 
558   gsl_matrix_free(M);
559   return;
560 }
561 
562 // 'R' for restricted likelihood and 'L' for likelihood.
563 // 'R' update B and 'L' don't.
564 // only calculate -0.5*\sum_{k=1}^n|H_k|-0.5yPxy.
MphCalcLogL(const gsl_vector * eval,const gsl_vector * xHiy,const gsl_vector * D_l,const gsl_matrix * UltVehiY,const gsl_matrix * Qi)565 double MphCalcLogL(const gsl_vector *eval, const gsl_vector *xHiy,
566                    const gsl_vector *D_l, const gsl_matrix *UltVehiY,
567                    const gsl_matrix *Qi) {
568   size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1;
569   double logl = 0.0, delta, dl, y, d;
570 
571   // Calculate yHiy+log|H_k|.
572   for (size_t k = 0; k < n_size; k++) {
573     delta = gsl_vector_get(eval, k);
574     for (size_t i = 0; i < d_size; i++) {
575       y = gsl_matrix_get(UltVehiY, i, k);
576       dl = gsl_vector_get(D_l, i);
577       d = delta * dl + 1.0;
578 
579       logl += y * y / d + safe_log(d);
580     }
581   }
582 
583   // Calculate the rest of yPxy.
584   gsl_vector *Qiv = gsl_vector_alloc(dc_size);
585 
586   gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv);
587   gsl_blas_ddot(xHiy, Qiv, &d);
588 
589   logl -= d;
590 
591   gsl_vector_free(Qiv);
592 
593   return -0.5 * logl;
594 }
595 
596 // Y is a dxn matrix, X is a cxn matrix, B is a dxc matrix, V_g is a
597 // dxd matrix, V_e is a dxd matrix, eval is a size n vector
598 //'R' for restricted likelihood and 'L' for likelihood.
MphEM(const char func_name,const size_t max_iter,const double max_prec,const gsl_vector * eval,const gsl_matrix * X,const gsl_matrix * Y,gsl_matrix * U_hat,gsl_matrix * E_hat,gsl_matrix * OmegaU,gsl_matrix * OmegaE,gsl_matrix * UltVehiY,gsl_matrix * UltVehiBX,gsl_matrix * UltVehiU,gsl_matrix * UltVehiE,gsl_matrix * V_g,gsl_matrix * V_e,gsl_matrix * B)599 double MphEM(const char func_name, const size_t max_iter, const double max_prec,
600              const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y,
601              gsl_matrix *U_hat, gsl_matrix *E_hat, gsl_matrix *OmegaU,
602              gsl_matrix *OmegaE, gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX,
603              gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, gsl_matrix *V_g,
604              gsl_matrix *V_e, gsl_matrix *B) {
605   if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
606       func_name != 'l') {
607     cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
608          << "likelihood, 'L' for log-likelihood." << endl;
609     return 0.0;
610   }
611 
612   size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
613   size_t dc_size = d_size * c_size;
614 
615   gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size);
616   gsl_matrix *XXti = gsl_matrix_alloc(c_size, c_size);
617   gsl_vector *D_l = gsl_vector_alloc(d_size);
618   gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
619   gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
620   gsl_matrix *UltVehiB = gsl_matrix_alloc(d_size, c_size);
621   gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
622   gsl_matrix *Sigma_uu = gsl_matrix_alloc(d_size, d_size);
623   gsl_matrix *Sigma_ee = gsl_matrix_alloc(d_size, d_size);
624   gsl_vector *xHiy = gsl_vector_alloc(dc_size);
625   gsl_permutation *pmt = gsl_permutation_alloc(c_size);
626 
627   double logl_const = 0.0, logl_old = 0.0, logl_new = 0.0;
628   double logdet_Q, logdet_Ve;
629   int sig;
630 
631   // Calculate |XXt| and (XXt)^{-1}.
632   gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
633   for (size_t i = 0; i < c_size; ++i) {
634     for (size_t j = 0; j < i; ++j) {
635       gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i));
636     }
637   }
638 
639   LUDecomp(XXt, pmt, &sig);
640   LUInvert(XXt, pmt, XXti);
641 
642   // Calculate the constant for logl.
643   if (func_name == 'R' || func_name == 'r') {
644     logl_const =
645         -0.5 * (double)(n_size - c_size) * (double)d_size * safe_log(2.0 * M_PI) +
646         0.5 * (double)d_size * LULndet(XXt);
647   } else {
648     logl_const = -0.5 * (double)n_size * (double)d_size * safe_log(2.0 * M_PI);
649   }
650 
651   // Start EM.
652   for (size_t t = 0; t < max_iter; t++) {
653     logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
654 
655     logdet_Q = CalcQi(eval, D_l, X, Qi);
656 
657     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
658     CalcXHiY(eval, D_l, X, UltVehiY, xHiy);
659 
660     // Calculate log likelihood/restricted likelihood value, and
661     // terminate if change is small.
662     logl_new = logl_const + MphCalcLogL(eval, xHiy, D_l, UltVehiY, Qi) -
663                0.5 * (double)n_size * logdet_Ve;
664     if (func_name == 'R' || func_name == 'r') {
665       logl_new += -0.5 * (logdet_Q - (double)c_size * logdet_Ve);
666     }
667     if (t != 0 && abs(logl_new - logl_old) < max_prec) {
668       break;
669     }
670     logl_old = logl_new;
671 
672     CalcOmega(eval, D_l, OmegaU, OmegaE);
673 
674     // Update UltVehiB, UltVehiU.
675     if (func_name == 'R' || func_name == 'r') {
676       UpdateRL_B(xHiy, Qi, UltVehiB);
677       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
678                      UltVehiBX);
679     } else if (t == 0) {
680       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, 0.0,
681                      UltVehiB);
682       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
683                      UltVehiBX);
684     }
685 
686     UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU);
687 
688     if (func_name == 'L' || func_name == 'l') {
689 
690       // UltVehiBX is destroyed here.
691       UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB);
692       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
693                      UltVehiBX);
694     }
695 
696     UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE);
697 
698     // Calculate U_hat, E_hat and B.
699     gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiU, 0.0, U_hat);
700     gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiE, 0.0, E_hat);
701     gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiB, 0.0, B);
702 
703     // Calculate Sigma_uu and Sigma_ee.
704     CalcSigma(func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, Qi, Sigma_uu,
705               Sigma_ee);
706 
707     // Update V_g and V_e.
708     UpdateV(eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
709   }
710 
711   gsl_matrix_free(XXt);
712   gsl_matrix_free(XXti);
713   gsl_vector_free(D_l);
714   gsl_matrix_free(UltVeh);
715   gsl_matrix_free(UltVehi);
716   gsl_matrix_free(UltVehiB);
717   gsl_matrix_free(Qi);
718   gsl_matrix_free(Sigma_uu);
719   gsl_matrix_free(Sigma_ee);
720   gsl_vector_free(xHiy);
721   gsl_permutation_free(pmt);
722 
723   return logl_new;
724 }
725 
726 // Calculate p-value, beta (d by 1 vector) and V(beta).
MphCalcP(const gsl_vector * eval,const gsl_vector * x_vec,const gsl_matrix * W,const gsl_matrix * Y,const gsl_matrix * V_g,const gsl_matrix * V_e,gsl_matrix * UltVehiY,gsl_vector * beta,gsl_matrix * Vbeta)727 double MphCalcP(const gsl_vector *eval, const gsl_vector *x_vec,
728                 const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g,
729                 const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_vector *beta,
730                 gsl_matrix *Vbeta) {
731   size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
732   size_t dc_size = d_size * c_size;
733   double delta, dl, d, d1, d2, dy, dx, dw; //  logdet_Ve, logdet_Q, p_value;
734 
735   gsl_vector *D_l = gsl_vector_alloc(d_size);
736   gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
737   gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
738   gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
739   gsl_matrix *WHix = gsl_matrix_alloc(dc_size, d_size);
740   gsl_matrix *QiWHix = gsl_matrix_alloc(dc_size, d_size);
741 
742   gsl_matrix *xPx = gsl_matrix_alloc(d_size, d_size);
743   gsl_vector *xPy = gsl_vector_alloc(d_size);
744   gsl_vector *WHiy = gsl_vector_alloc(dc_size);
745 
746   gsl_matrix_set_zero(xPx);
747   gsl_matrix_set_zero(WHix);
748   gsl_vector_set_zero(xPy);
749   gsl_vector_set_zero(WHiy);
750 
751   // Eigen decomposition and calculate log|Ve|.
752   // double logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
753   EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
754 
755   // Calculate Qi and log|Q|.
756   // double logdet_Q = CalcQi(eval, D_l, W, Qi);
757   CalcQi(eval, D_l, W, Qi);
758 
759   // Calculate UltVehiY.
760   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
761 
762   // Calculate WHix, WHiy, xHiy, xHix.
763   for (size_t i = 0; i < d_size; i++) {
764     dl = gsl_vector_get(D_l, i);
765 
766     d1 = 0.0;
767     d2 = 0.0;
768     for (size_t k = 0; k < n_size; k++) {
769       delta = gsl_vector_get(eval, k);
770       dx = gsl_vector_get(x_vec, k);
771       dy = gsl_matrix_get(UltVehiY, i, k);
772 
773       d1 += dx * dy / (delta * dl + 1.0);
774       d2 += dx * dx / (delta * dl + 1.0);
775     }
776     gsl_vector_set(xPy, i, d1);
777     gsl_matrix_set(xPx, i, i, d2);
778 
779     for (size_t j = 0; j < c_size; j++) {
780       d1 = 0.0;
781       d2 = 0.0;
782       for (size_t k = 0; k < n_size; k++) {
783         delta = gsl_vector_get(eval, k);
784         dx = gsl_vector_get(x_vec, k);
785         dw = gsl_matrix_get(W, j, k);
786         dy = gsl_matrix_get(UltVehiY, i, k);
787 
788         d1 += dx * dw / (delta * dl + 1.0);
789         d2 += dy * dw / (delta * dl + 1.0);
790       }
791       gsl_matrix_set(WHix, j * d_size + i, i, d1);
792       gsl_vector_set(WHiy, j * d_size + i, d2);
793     }
794   }
795 
796   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix);
797   gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx);
798   gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy);
799 
800   // Calculate V(beta) and beta.
801   int sig;
802   gsl_permutation *pmt = gsl_permutation_alloc(d_size);
803   LUDecomp(xPx, pmt, &sig);
804   LUSolve(xPx, pmt, xPy, D_l);
805   LUInvert(xPx, pmt, Vbeta);
806 
807   // Need to multiply UltVehi on both sides or one side.
808   gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta);
809   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, 0.0, xPx);
810   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
811 
812   // Calculate test statistic and p value.
813   gsl_blas_ddot(D_l, xPy, &d);
814 
815   double p_value = gsl_cdf_chisq_Q(d, (double)d_size);
816 
817   gsl_vector_free(D_l);
818   gsl_matrix_free(UltVeh);
819   gsl_matrix_free(UltVehi);
820   gsl_matrix_free(Qi);
821   gsl_matrix_free(WHix);
822   gsl_matrix_free(QiWHix);
823 
824   gsl_matrix_free(xPx);
825   gsl_vector_free(xPy);
826   gsl_vector_free(WHiy);
827 
828   gsl_permutation_free(pmt);
829 
830   return p_value;
831 }
832 
833 // Calculate B and its standard error (which is a matrix of the same
834 // dimension as B).
MphCalcBeta(const gsl_vector * eval,const gsl_matrix * W,const gsl_matrix * Y,const gsl_matrix * V_g,const gsl_matrix * V_e,gsl_matrix * UltVehiY,gsl_matrix * B,gsl_matrix * se_B)835 void MphCalcBeta(const gsl_vector *eval, const gsl_matrix *W,
836                  const gsl_matrix *Y, const gsl_matrix *V_g,
837                  const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_matrix *B,
838                  gsl_matrix *se_B) {
839   size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
840   size_t dc_size = d_size * c_size;
841   double delta, dl, d, dy, dw; // , logdet_Ve, logdet_Q;
842 
843   gsl_vector *D_l = gsl_vector_alloc(d_size);
844   gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
845   gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
846   gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
847   gsl_matrix *Qi_temp = gsl_matrix_alloc(dc_size, dc_size);
848   gsl_vector *WHiy = gsl_vector_alloc(dc_size);
849   gsl_vector *QiWHiy = gsl_vector_alloc(dc_size);
850   gsl_vector *beta = gsl_vector_alloc(dc_size);
851   gsl_matrix *Vbeta = gsl_matrix_alloc(dc_size, dc_size);
852 
853   gsl_vector_set_zero(WHiy);
854 
855   // Eigen decomposition and calculate log|Ve|.
856   // double logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
857   EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
858 
859   // Calculate Qi and log|Q|.
860   // double logdet_Q = CalcQi(eval, D_l, W, Qi);
861   CalcQi(eval, D_l, W, Qi);
862 
863   // Calculate UltVehiY.
864   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
865 
866   // Calculate WHiy.
867   for (size_t i = 0; i < d_size; i++) {
868     dl = gsl_vector_get(D_l, i);
869 
870     for (size_t j = 0; j < c_size; j++) {
871       d = 0.0;
872       for (size_t k = 0; k < n_size; k++) {
873         delta = gsl_vector_get(eval, k);
874         dw = gsl_matrix_get(W, j, k);
875         dy = gsl_matrix_get(UltVehiY, i, k);
876 
877         d += dy * dw / (delta * dl + 1.0);
878       }
879       gsl_vector_set(WHiy, j * d_size + i, d);
880     }
881   }
882 
883   gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy);
884 
885   // Need to multiply I_c\otimes UltVehi on both sides or one side.
886   for (size_t i = 0; i < c_size; i++) {
887     gsl_vector_view QiWHiy_sub =
888         gsl_vector_subvector(QiWHiy, i * d_size, d_size);
889     gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size);
890     gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0,
891                    &beta_sub.vector);
892 
893     for (size_t j = 0; j < c_size; j++) {
894       gsl_matrix_view Qi_sub =
895           gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size);
896       gsl_matrix_view Qitemp_sub =
897           gsl_matrix_submatrix(Qi_temp, i * d_size, j * d_size, d_size, d_size);
898       gsl_matrix_view Vbeta_sub =
899           gsl_matrix_submatrix(Vbeta, i * d_size, j * d_size, d_size, d_size);
900 
901       if (j < i) {
902         gsl_matrix_view Vbeta_sym =
903             gsl_matrix_submatrix(Vbeta, j * d_size, i * d_size, d_size, d_size);
904         gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix, &Vbeta_sym.matrix);
905       } else {
906         gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh,
907                        0.0, &Qitemp_sub.matrix);
908         gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh,
909                        &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix);
910       }
911     }
912   }
913 
914   // Copy beta to B, and Vbeta to se_B.
915   for (size_t j = 0; j < B->size2; j++) {
916     for (size_t i = 0; i < B->size1; i++) {
917       gsl_matrix_set(B, i, j, gsl_vector_get(beta, j * d_size + i));
918       gsl_matrix_set(se_B, i, j, safe_sqrt(gsl_matrix_get(Vbeta, j * d_size + i,
919                                                      j * d_size + i)));
920     }
921   }
922 
923   // Free matrices.
924   gsl_vector_free(D_l);
925   gsl_matrix_free(UltVeh);
926   gsl_matrix_free(UltVehi);
927   gsl_matrix_free(Qi);
928   gsl_matrix_free(Qi_temp);
929   gsl_vector_free(WHiy);
930   gsl_vector_free(QiWHiy);
931   gsl_vector_free(beta);
932   gsl_matrix_free(Vbeta);
933 
934   return;
935 }
936 
937 // Below are functions for Newton-Raphson's algorithm.
938 
939 // Calculate all Hi and return logdet_H=\sum_{k=1}^{n}log|H_k|
940 // and calculate Qi and return logdet_Q
941 // and calculate yPy.
CalcHiQi(const gsl_vector * eval,const gsl_matrix * X,const gsl_matrix * V_g,const gsl_matrix * V_e,gsl_matrix * Hi_all,gsl_matrix * Qi,double & logdet_H,double & logdet_Q)942 void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X,
943               const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *Hi_all,
944               gsl_matrix *Qi, double &logdet_H, double &logdet_Q) {
945   gsl_matrix_set_zero(Hi_all);
946   gsl_matrix_set_zero(Qi);
947   logdet_H = 0.0;
948   logdet_Q = 0.0;
949 
950   size_t n_size = eval->size, c_size = X->size1, d_size = V_g->size1;
951   double logdet_Ve = 0.0, delta, dl, d;
952 
953   gsl_matrix *mat_dd = gsl_matrix_alloc(d_size, d_size);
954   gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
955   gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
956   gsl_vector *D_l = gsl_vector_alloc(d_size);
957 
958   // Calculate D_l, UltVeh and UltVehi.
959   logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
960 
961   // Calculate each Hi and log|H_k|.
962   logdet_H = (double)n_size * logdet_Ve;
963   for (size_t k = 0; k < n_size; k++) {
964     delta = gsl_vector_get(eval, k);
965 
966     gsl_matrix_memcpy(mat_dd, UltVehi);
967     for (size_t i = 0; i < d_size; i++) {
968       dl = gsl_vector_get(D_l, i);
969       d = delta * dl + 1.0;
970 
971       gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i);
972       gsl_vector_scale(&mat_row.vector, 1.0 / d); // @@
973 
974       logdet_H += safe_log(d);
975     }
976 
977     gsl_matrix_view Hi_k =
978         gsl_matrix_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
979     gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi, mat_dd, 0.0,
980                    &Hi_k.matrix);
981   }
982 
983   // Calculate Qi, and multiply I\o times UtVeh on both side and
984   // calculate logdet_Q, don't forget to substract
985   // c_size*logdet_Ve.
986   logdet_Q = CalcQi(eval, D_l, X, Qi) - (double)c_size * logdet_Ve;
987 
988   for (size_t i = 0; i < c_size; i++) {
989     for (size_t j = 0; j < c_size; j++) {
990       gsl_matrix_view Qi_sub =
991           gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size);
992       if (j < i) {
993         gsl_matrix_view Qi_sym =
994             gsl_matrix_submatrix(Qi, j * d_size, i * d_size, d_size, d_size);
995         gsl_matrix_transpose_memcpy(&Qi_sub.matrix, &Qi_sym.matrix);
996       } else {
997         gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh,
998                        0.0, mat_dd);
999         gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, mat_dd, 0.0,
1000                        &Qi_sub.matrix);
1001       }
1002     }
1003   }
1004 
1005   // Free memory.
1006   gsl_matrix_free(mat_dd);
1007   gsl_matrix_free(UltVeh);
1008   gsl_matrix_free(UltVehi);
1009   gsl_vector_free(D_l);
1010 
1011   return;
1012 }
1013 
1014 // Calculate all Hiy.
Calc_Hiy_all(const gsl_matrix * Y,const gsl_matrix * Hi_all,gsl_matrix * Hiy_all)1015 void Calc_Hiy_all(const gsl_matrix *Y, const gsl_matrix *Hi_all,
1016                   gsl_matrix *Hiy_all) {
1017   gsl_matrix_set_zero(Hiy_all);
1018 
1019   size_t n_size = Y->size2, d_size = Y->size1;
1020 
1021   for (size_t k = 0; k < n_size; k++) {
1022     gsl_matrix_const_view Hi_k =
1023         gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
1024     gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
1025     gsl_vector_view Hiy_k = gsl_matrix_column(Hiy_all, k);
1026 
1027     gsl_blas_dgemv(CblasNoTrans, 1.0, &Hi_k.matrix, &y_k.vector, 0.0,
1028                    &Hiy_k.vector);
1029   }
1030 
1031   return;
1032 }
1033 
1034 // Calculate all xHi.
Calc_xHi_all(const gsl_matrix * X,const gsl_matrix * Hi_all,gsl_matrix * xHi_all)1035 void Calc_xHi_all(const gsl_matrix *X, const gsl_matrix *Hi_all,
1036                   gsl_matrix *xHi_all) {
1037   gsl_matrix_set_zero(xHi_all);
1038 
1039   size_t n_size = X->size2, c_size = X->size1, d_size = Hi_all->size1;
1040 
1041   double d;
1042 
1043   for (size_t k = 0; k < n_size; k++) {
1044     gsl_matrix_const_view Hi_k =
1045         gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
1046 
1047     for (size_t i = 0; i < c_size; i++) {
1048       d = gsl_matrix_get(X, i, k);
1049       gsl_matrix_view xHi_sub =
1050           gsl_matrix_submatrix(xHi_all, i * d_size, k * d_size, d_size, d_size);
1051       gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix);
1052       gsl_matrix_scale(&xHi_sub.matrix, d);
1053     }
1054   }
1055 
1056   return;
1057 }
1058 
1059 // Calculate scalar yHiy.
Calc_yHiy(const gsl_matrix * Y,const gsl_matrix * Hiy_all)1060 double Calc_yHiy(const gsl_matrix *Y, const gsl_matrix *Hiy_all) {
1061   double yHiy = 0.0, d;
1062   size_t n_size = Y->size2;
1063 
1064   for (size_t k = 0; k < n_size; k++) {
1065     gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
1066     gsl_vector_const_view Hiy_k = gsl_matrix_const_column(Hiy_all, k);
1067 
1068     gsl_blas_ddot(&Hiy_k.vector, &y_k.vector, &d);
1069     yHiy += d;
1070   }
1071 
1072   return yHiy;
1073 }
1074 
1075 // Calculate the vector xHiy.
Calc_xHiy(const gsl_matrix * Y,const gsl_matrix * xHi,gsl_vector * xHiy)1076 void Calc_xHiy(const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) {
1077   gsl_vector_set_zero(xHiy);
1078 
1079   size_t n_size = Y->size2, d_size = Y->size1, dc_size = xHi->size1;
1080 
1081   for (size_t k = 0; k < n_size; k++) {
1082     gsl_matrix_const_view xHi_k =
1083         gsl_matrix_const_submatrix(xHi, 0, k * d_size, dc_size, d_size);
1084     gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
1085 
1086     gsl_blas_dgemv(CblasNoTrans, 1.0, &xHi_k.matrix, &y_k.vector, 1.0, xHiy);
1087   }
1088 
1089   return;
1090 }
1091 
1092 // 0<=i,j<d_size
GetIndex(const size_t i,const size_t j,const size_t d_size)1093 size_t GetIndex(const size_t i, const size_t j, const size_t d_size) {
1094   if (i >= d_size || j >= d_size) {
1095     cout << "error in GetIndex." << endl;
1096     return 0;
1097   }
1098 
1099   size_t s, l;
1100   if (j < i) {
1101     s = j;
1102     l = i;
1103   } else {
1104     s = i;
1105     l = j;
1106   }
1107 
1108   return (2 * d_size - s + 1) * s / 2 + l - s;
1109 }
1110 
Calc_yHiDHiy(const gsl_vector * eval,const gsl_matrix * Hiy,const size_t i,const size_t j,double & yHiDHiy_g,double & yHiDHiy_e)1111 void Calc_yHiDHiy(const gsl_vector *eval, const gsl_matrix *Hiy, const size_t i,
1112                   const size_t j, double &yHiDHiy_g, double &yHiDHiy_e) {
1113   yHiDHiy_g = 0.0;
1114   yHiDHiy_e = 0.0;
1115 
1116   size_t n_size = eval->size;
1117 
1118   double delta, d1, d2;
1119 
1120   for (size_t k = 0; k < n_size; k++) {
1121     delta = gsl_vector_get(eval, k);
1122     d1 = gsl_matrix_get(Hiy, i, k);
1123     d2 = gsl_matrix_get(Hiy, j, k);
1124 
1125     if (i == j) {
1126       yHiDHiy_g += delta * d1 * d2;
1127       yHiDHiy_e += d1 * d2;
1128     } else {
1129       yHiDHiy_g += delta * d1 * d2 * 2.0;
1130       yHiDHiy_e += d1 * d2 * 2.0;
1131     }
1132   }
1133 
1134   return;
1135 }
1136 
Calc_xHiDHiy(const gsl_vector * eval,const gsl_matrix * xHi,const gsl_matrix * Hiy,const size_t i,const size_t j,gsl_vector * xHiDHiy_g,gsl_vector * xHiDHiy_e)1137 void Calc_xHiDHiy(const gsl_vector *eval, const gsl_matrix *xHi,
1138                   const gsl_matrix *Hiy, const size_t i, const size_t j,
1139                   gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) {
1140   gsl_vector_set_zero(xHiDHiy_g);
1141   gsl_vector_set_zero(xHiDHiy_e);
1142 
1143   size_t n_size = eval->size, d_size = Hiy->size1;
1144 
1145   double delta, d;
1146 
1147   for (size_t k = 0; k < n_size; k++) {
1148     delta = gsl_vector_get(eval, k);
1149 
1150     gsl_vector_const_view xHi_col_i =
1151         gsl_matrix_const_column(xHi, k * d_size + i);
1152     d = gsl_matrix_get(Hiy, j, k);
1153 
1154     gsl_blas_daxpy(d * delta, &xHi_col_i.vector, xHiDHiy_g);
1155     gsl_blas_daxpy(d, &xHi_col_i.vector, xHiDHiy_e);
1156 
1157     if (i != j) {
1158       gsl_vector_const_view xHi_col_j =
1159           gsl_matrix_const_column(xHi, k * d_size + j);
1160       d = gsl_matrix_get(Hiy, i, k);
1161 
1162       gsl_blas_daxpy(d * delta, &xHi_col_j.vector, xHiDHiy_g);
1163       gsl_blas_daxpy(d, &xHi_col_j.vector, xHiDHiy_e);
1164     }
1165   }
1166 
1167   return;
1168 }
1169 
Calc_xHiDHix(const gsl_vector * eval,const gsl_matrix * xHi,const size_t i,const size_t j,gsl_matrix * xHiDHix_g,gsl_matrix * xHiDHix_e)1170 void Calc_xHiDHix(const gsl_vector *eval, const gsl_matrix *xHi, const size_t i,
1171                   const size_t j, gsl_matrix *xHiDHix_g,
1172                   gsl_matrix *xHiDHix_e) {
1173   gsl_matrix_set_zero(xHiDHix_g);
1174   gsl_matrix_set_zero(xHiDHix_e);
1175 
1176   size_t n_size = eval->size, dc_size = xHi->size1;
1177   size_t d_size = xHi->size2 / n_size;
1178 
1179   double delta;
1180 
1181   gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size);
1182   gsl_matrix *mat_dcdc_t = gsl_matrix_alloc(dc_size, dc_size);
1183 
1184   for (size_t k = 0; k < n_size; k++) {
1185     delta = gsl_vector_get(eval, k);
1186 
1187     gsl_vector_const_view xHi_col_i =
1188         gsl_matrix_const_column(xHi, k * d_size + i);
1189     gsl_vector_const_view xHi_col_j =
1190         gsl_matrix_const_column(xHi, k * d_size + j);
1191 
1192     gsl_matrix_set_zero(mat_dcdc);
1193     gsl_blas_dger(1.0, &xHi_col_i.vector, &xHi_col_j.vector, mat_dcdc);
1194 
1195     gsl_matrix_transpose_memcpy(mat_dcdc_t, mat_dcdc);
1196 
1197     gsl_matrix_add(xHiDHix_e, mat_dcdc);
1198 
1199     gsl_matrix_scale(mat_dcdc, delta);
1200     gsl_matrix_add(xHiDHix_g, mat_dcdc);
1201 
1202     if (i != j) {
1203       gsl_matrix_add(xHiDHix_e, mat_dcdc_t);
1204 
1205       gsl_matrix_scale(mat_dcdc_t, delta);
1206       gsl_matrix_add(xHiDHix_g, mat_dcdc_t);
1207     }
1208   }
1209 
1210   gsl_matrix_free(mat_dcdc);
1211   gsl_matrix_free(mat_dcdc_t);
1212 
1213   return;
1214 }
1215 
Calc_yHiDHiDHiy(const gsl_vector * eval,const gsl_matrix * Hi,const gsl_matrix * Hiy,const size_t i1,const size_t j1,const size_t i2,const size_t j2,double & yHiDHiDHiy_gg,double & yHiDHiDHiy_ee,double & yHiDHiDHiy_ge)1216 void Calc_yHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi,
1217                      const gsl_matrix *Hiy, const size_t i1, const size_t j1,
1218                      const size_t i2, const size_t j2, double &yHiDHiDHiy_gg,
1219                      double &yHiDHiDHiy_ee, double &yHiDHiDHiy_ge) {
1220   yHiDHiDHiy_gg = 0.0;
1221   yHiDHiDHiy_ee = 0.0;
1222   yHiDHiDHiy_ge = 0.0;
1223 
1224   size_t n_size = eval->size, d_size = Hiy->size1;
1225 
1226   double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2;
1227   double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
1228 
1229   for (size_t k = 0; k < n_size; k++) {
1230     delta = gsl_vector_get(eval, k);
1231 
1232     d_Hiy_i1 = gsl_matrix_get(Hiy, i1, k);
1233     d_Hiy_j1 = gsl_matrix_get(Hiy, j1, k);
1234     d_Hiy_i2 = gsl_matrix_get(Hiy, i2, k);
1235     d_Hiy_j2 = gsl_matrix_get(Hiy, j2, k);
1236 
1237     d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
1238     d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
1239     d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
1240     d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
1241 
1242     if (i1 == j1) {
1243       yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
1244       yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
1245       yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
1246 
1247       if (i2 != j2) {
1248         yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
1249         yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
1250         yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
1251       }
1252     } else {
1253       yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 +
1254                                         d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
1255       yHiDHiDHiy_ee +=
1256           (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 + d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
1257       yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 +
1258                                 d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
1259 
1260       if (i2 != j2) {
1261         yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 +
1262                                           d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
1263         yHiDHiDHiy_ee +=
1264             (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 + d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
1265         yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 +
1266                                   d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
1267       }
1268     }
1269   }
1270 
1271   return;
1272 }
1273 
Calc_xHiDHiDHiy(const gsl_vector * eval,const gsl_matrix * Hi,const gsl_matrix * xHi,const gsl_matrix * Hiy,const size_t i1,const size_t j1,const size_t i2,const size_t j2,gsl_vector * xHiDHiDHiy_gg,gsl_vector * xHiDHiDHiy_ee,gsl_vector * xHiDHiDHiy_ge)1274 void Calc_xHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi,
1275                      const gsl_matrix *xHi, const gsl_matrix *Hiy,
1276                      const size_t i1, const size_t j1, const size_t i2,
1277                      const size_t j2, gsl_vector *xHiDHiDHiy_gg,
1278                      gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) {
1279   gsl_vector_set_zero(xHiDHiDHiy_gg);
1280   gsl_vector_set_zero(xHiDHiDHiy_ee);
1281   gsl_vector_set_zero(xHiDHiDHiy_ge);
1282 
1283   size_t n_size = eval->size, d_size = Hiy->size1;
1284 
1285   double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2;
1286   double d_Hi_j1i2, d_Hi_j1j2;
1287 
1288   for (size_t k = 0; k < n_size; k++) {
1289     delta = gsl_vector_get(eval, k);
1290 
1291     gsl_vector_const_view xHi_col_i =
1292         gsl_matrix_const_column(xHi, k * d_size + i1);
1293     gsl_vector_const_view xHi_col_j =
1294         gsl_matrix_const_column(xHi, k * d_size + j1);
1295 
1296     d_Hiy_i = gsl_matrix_get(Hiy, i2, k);
1297     d_Hiy_j = gsl_matrix_get(Hiy, j2, k);
1298 
1299     d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
1300     d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
1301     d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
1302     d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
1303 
1304     if (i1 == j1) {
1305       gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
1306                      xHiDHiDHiy_gg);
1307       gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
1308       gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
1309                      xHiDHiDHiy_ge);
1310 
1311       if (i2 != j2) {
1312         gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
1313                        xHiDHiDHiy_gg);
1314         gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
1315         gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
1316                        xHiDHiDHiy_ge);
1317       }
1318     } else {
1319       gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
1320                      xHiDHiDHiy_gg);
1321       gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
1322       gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
1323                      xHiDHiDHiy_ge);
1324 
1325       gsl_blas_daxpy(delta * delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector,
1326                      xHiDHiDHiy_gg);
1327       gsl_blas_daxpy(d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector, xHiDHiDHiy_ee);
1328       gsl_blas_daxpy(delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector,
1329                      xHiDHiDHiy_ge);
1330 
1331       if (i2 != j2) {
1332         gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
1333                        xHiDHiDHiy_gg);
1334         gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
1335         gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
1336                        xHiDHiDHiy_ge);
1337 
1338         gsl_blas_daxpy(delta * delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector,
1339                        xHiDHiDHiy_gg);
1340         gsl_blas_daxpy(d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector, xHiDHiDHiy_ee);
1341         gsl_blas_daxpy(delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector,
1342                        xHiDHiDHiy_ge);
1343       }
1344     }
1345   }
1346 
1347   return;
1348 }
1349 
Calc_xHiDHiDHix(const gsl_vector * eval,const gsl_matrix * Hi,const gsl_matrix * xHi,const size_t i1,const size_t j1,const size_t i2,const size_t j2,gsl_matrix * xHiDHiDHix_gg,gsl_matrix * xHiDHiDHix_ee,gsl_matrix * xHiDHiDHix_ge)1350 void Calc_xHiDHiDHix(const gsl_vector *eval, const gsl_matrix *Hi,
1351                      const gsl_matrix *xHi, const size_t i1, const size_t j1,
1352                      const size_t i2, const size_t j2,
1353                      gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee,
1354                      gsl_matrix *xHiDHiDHix_ge) {
1355   gsl_matrix_set_zero(xHiDHiDHix_gg);
1356   gsl_matrix_set_zero(xHiDHiDHix_ee);
1357   gsl_matrix_set_zero(xHiDHiDHix_ge);
1358 
1359   size_t n_size = eval->size, d_size = Hi->size1, dc_size = xHi->size1;
1360 
1361   double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
1362 
1363   gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size);
1364 
1365   for (size_t k = 0; k < n_size; k++) {
1366     delta = gsl_vector_get(eval, k);
1367 
1368     gsl_vector_const_view xHi_col_i1 =
1369         gsl_matrix_const_column(xHi, k * d_size + i1);
1370     gsl_vector_const_view xHi_col_j1 =
1371         gsl_matrix_const_column(xHi, k * d_size + j1);
1372     gsl_vector_const_view xHi_col_i2 =
1373         gsl_matrix_const_column(xHi, k * d_size + i2);
1374     gsl_vector_const_view xHi_col_j2 =
1375         gsl_matrix_const_column(xHi, k * d_size + j2);
1376 
1377     d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
1378     d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
1379     d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
1380     d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
1381 
1382     if (i1 == j1) {
1383       gsl_matrix_set_zero(mat_dcdc);
1384       gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector,
1385                     mat_dcdc);
1386 
1387       gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
1388       gsl_matrix_scale(mat_dcdc, delta);
1389       gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
1390       gsl_matrix_scale(mat_dcdc, delta);
1391       gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
1392 
1393       if (i2 != j2) {
1394         gsl_matrix_set_zero(mat_dcdc);
1395         gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector,
1396                       mat_dcdc);
1397 
1398         gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
1399         gsl_matrix_scale(mat_dcdc, delta);
1400         gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
1401         gsl_matrix_scale(mat_dcdc, delta);
1402         gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
1403       }
1404     } else {
1405       gsl_matrix_set_zero(mat_dcdc);
1406       gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector,
1407                     mat_dcdc);
1408 
1409       gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
1410       gsl_matrix_scale(mat_dcdc, delta);
1411       gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
1412       gsl_matrix_scale(mat_dcdc, delta);
1413       gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
1414 
1415       gsl_matrix_set_zero(mat_dcdc);
1416       gsl_blas_dger(d_Hi_i1i2, &xHi_col_j1.vector, &xHi_col_j2.vector,
1417                     mat_dcdc);
1418 
1419       gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
1420       gsl_matrix_scale(mat_dcdc, delta);
1421       gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
1422       gsl_matrix_scale(mat_dcdc, delta);
1423       gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
1424 
1425       if (i2 != j2) {
1426         gsl_matrix_set_zero(mat_dcdc);
1427         gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector,
1428                       mat_dcdc);
1429 
1430         gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
1431         gsl_matrix_scale(mat_dcdc, delta);
1432         gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
1433         gsl_matrix_scale(mat_dcdc, delta);
1434         gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
1435 
1436         gsl_matrix_set_zero(mat_dcdc);
1437         gsl_blas_dger(d_Hi_i1j2, &xHi_col_j1.vector, &xHi_col_i2.vector,
1438                       mat_dcdc);
1439 
1440         gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
1441         gsl_matrix_scale(mat_dcdc, delta);
1442         gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
1443         gsl_matrix_scale(mat_dcdc, delta);
1444         gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
1445       }
1446     }
1447   }
1448 
1449   gsl_matrix_free(mat_dcdc);
1450 
1451   return;
1452 }
1453 
Calc_traceHiD(const gsl_vector * eval,const gsl_matrix * Hi,const size_t i,const size_t j,double & tHiD_g,double & tHiD_e)1454 void Calc_traceHiD(const gsl_vector *eval, const gsl_matrix *Hi, const size_t i,
1455                    const size_t j, double &tHiD_g, double &tHiD_e) {
1456   tHiD_g = 0.0;
1457   tHiD_e = 0.0;
1458 
1459   size_t n_size = eval->size, d_size = Hi->size1;
1460   double delta, d;
1461 
1462   for (size_t k = 0; k < n_size; k++) {
1463     delta = gsl_vector_get(eval, k);
1464     d = gsl_matrix_get(Hi, j, k * d_size + i);
1465 
1466     if (i == j) {
1467       tHiD_g += delta * d;
1468       tHiD_e += d;
1469     } else {
1470       tHiD_g += delta * d * 2.0;
1471       tHiD_e += d * 2.0;
1472     }
1473   }
1474 
1475   return;
1476 }
1477 
Calc_traceHiDHiD(const gsl_vector * eval,const gsl_matrix * Hi,const size_t i1,const size_t j1,const size_t i2,const size_t j2,double & tHiDHiD_gg,double & tHiDHiD_ee,double & tHiDHiD_ge)1478 void Calc_traceHiDHiD(const gsl_vector *eval, const gsl_matrix *Hi,
1479                       const size_t i1, const size_t j1, const size_t i2,
1480                       const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee,
1481                       double &tHiDHiD_ge) {
1482   tHiDHiD_gg = 0.0;
1483   tHiDHiD_ee = 0.0;
1484   tHiDHiD_ge = 0.0;
1485 
1486   size_t n_size = eval->size, d_size = Hi->size1;
1487   double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
1488 
1489   for (size_t k = 0; k < n_size; k++) {
1490     delta = gsl_vector_get(eval, k);
1491 
1492     d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
1493     d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
1494     d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
1495     d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
1496 
1497     if (i1 == j1) {
1498       tHiDHiD_gg += delta * delta * d_Hi_i1j2 * d_Hi_j1i2;
1499       tHiDHiD_ee += d_Hi_i1j2 * d_Hi_j1i2;
1500       tHiDHiD_ge += delta * d_Hi_i1j2 * d_Hi_j1i2;
1501 
1502       if (i2 != j2) {
1503         tHiDHiD_gg += delta * delta * d_Hi_i1i2 * d_Hi_j1j2;
1504         tHiDHiD_ee += d_Hi_i1i2 * d_Hi_j1j2;
1505         tHiDHiD_ge += delta * d_Hi_i1i2 * d_Hi_j1j2;
1506       }
1507     } else {
1508       tHiDHiD_gg +=
1509           delta * delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
1510       tHiDHiD_ee += (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
1511       tHiDHiD_ge += delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
1512 
1513       if (i2 != j2) {
1514         tHiDHiD_gg +=
1515             delta * delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
1516         tHiDHiD_ee += (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
1517         tHiDHiD_ge += delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
1518       }
1519     }
1520   }
1521 
1522   return;
1523 }
1524 
1525 // trace(PD) = trace((Hi-HixQixHi)D)=trace(HiD) - trace(HixQixHiD)
Calc_tracePD(const gsl_vector * eval,const gsl_matrix * Qi,const gsl_matrix * Hi,const gsl_matrix * xHiDHix_all_g,const gsl_matrix * xHiDHix_all_e,const size_t i,const size_t j,double & tPD_g,double & tPD_e)1526 void Calc_tracePD(const gsl_vector *eval, const gsl_matrix *Qi,
1527                   const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g,
1528                   const gsl_matrix *xHiDHix_all_e, const size_t i,
1529                   const size_t j, double &tPD_g, double &tPD_e) {
1530   size_t dc_size = Qi->size1, d_size = Hi->size1;
1531   size_t v = GetIndex(i, j, d_size);
1532 
1533   double d;
1534 
1535   // Calculate the first part: trace(HiD).
1536   Calc_traceHiD(eval, Hi, i, j, tPD_g, tPD_e);
1537 
1538   // Calculate the second part: -trace(HixQixHiD).
1539   for (size_t k = 0; k < dc_size; k++) {
1540     gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, k);
1541     gsl_vector_const_view xHiDHix_g_col =
1542         gsl_matrix_const_column(xHiDHix_all_g, v * dc_size + k);
1543     gsl_vector_const_view xHiDHix_e_col =
1544         gsl_matrix_const_column(xHiDHix_all_e, v * dc_size + k);
1545 
1546     gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d);
1547     tPD_g -= d;
1548     gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d);
1549     tPD_e -= d;
1550   }
1551 
1552   return;
1553 }
1554 
1555 // trace(PDPD) = trace((Hi-HixQixHi)D(Hi-HixQixHi)D)
1556 //             = trace(HiDHiD) - trace(HixQixHiDHiD)
1557 //               - trace(HiDHixQixHiD) + trace(HixQixHiDHixQixHiD)
Calc_tracePDPD(const gsl_vector * eval,const gsl_matrix * Qi,const gsl_matrix * Hi,const gsl_matrix * xHi,const gsl_matrix * QixHiDHix_all_g,const gsl_matrix * QixHiDHix_all_e,const gsl_matrix * xHiDHiDHix_all_gg,const gsl_matrix * xHiDHiDHix_all_ee,const gsl_matrix * xHiDHiDHix_all_ge,const size_t i1,const size_t j1,const size_t i2,const size_t j2,double & tPDPD_gg,double & tPDPD_ee,double & tPDPD_ge)1558 void Calc_tracePDPD(const gsl_vector *eval, const gsl_matrix *Qi,
1559                     const gsl_matrix *Hi, const gsl_matrix *xHi,
1560                     const gsl_matrix *QixHiDHix_all_g,
1561                     const gsl_matrix *QixHiDHix_all_e,
1562                     const gsl_matrix *xHiDHiDHix_all_gg,
1563                     const gsl_matrix *xHiDHiDHix_all_ee,
1564                     const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1,
1565                     const size_t j1, const size_t i2, const size_t j2,
1566                     double &tPDPD_gg, double &tPDPD_ee, double &tPDPD_ge) {
1567   size_t dc_size = Qi->size1, d_size = Hi->size1;
1568   size_t v_size = d_size * (d_size + 1) / 2;
1569   size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size);
1570 
1571   double d;
1572 
1573   // Calculate the first part: trace(HiDHiD).
1574   Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
1575 
1576   // Calculate the second and third parts:
1577   // -trace(HixQixHiDHiD) - trace(HiDHixQixHiD)
1578   for (size_t i = 0; i < dc_size; i++) {
1579     gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, i);
1580     gsl_vector_const_view xHiDHiDHix_gg_col = gsl_matrix_const_column(
1581         xHiDHiDHix_all_gg, (v1 * v_size + v2) * dc_size + i);
1582     gsl_vector_const_view xHiDHiDHix_ee_col = gsl_matrix_const_column(
1583         xHiDHiDHix_all_ee, (v1 * v_size + v2) * dc_size + i);
1584     gsl_vector_const_view xHiDHiDHix_ge_col = gsl_matrix_const_column(
1585         xHiDHiDHix_all_ge, (v1 * v_size + v2) * dc_size + i);
1586 
1587     gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d);
1588     tPDPD_gg -= d * 2.0;
1589     gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d);
1590     tPDPD_ee -= d * 2.0;
1591     gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d);
1592     tPDPD_ge -= d * 2.0;
1593   }
1594 
1595   // Calculate the fourth part: trace(HixQixHiDHixQixHiD).
1596   for (size_t i = 0; i < dc_size; i++) {
1597 
1598     gsl_vector_const_view QixHiDHix_g_fullrow1 =
1599         gsl_matrix_const_row(QixHiDHix_all_g, i);
1600     gsl_vector_const_view QixHiDHix_e_fullrow1 =
1601         gsl_matrix_const_row(QixHiDHix_all_e, i);
1602     gsl_vector_const_view QixHiDHix_g_row1 = gsl_vector_const_subvector(
1603         &QixHiDHix_g_fullrow1.vector, v1 * dc_size, dc_size);
1604     gsl_vector_const_view QixHiDHix_e_row1 = gsl_vector_const_subvector(
1605         &QixHiDHix_e_fullrow1.vector, v1 * dc_size, dc_size);
1606 
1607     gsl_vector_const_view QixHiDHix_g_col2 =
1608         gsl_matrix_const_column(QixHiDHix_all_g, v2 * dc_size + i);
1609     gsl_vector_const_view QixHiDHix_e_col2 =
1610         gsl_matrix_const_column(QixHiDHix_all_e, v2 * dc_size + i);
1611 
1612     gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_g_col2.vector, &d);
1613     tPDPD_gg += d;
1614     gsl_blas_ddot(&QixHiDHix_e_row1.vector, &QixHiDHix_e_col2.vector, &d);
1615     tPDPD_ee += d;
1616     gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_e_col2.vector, &d);
1617     tPDPD_ge += d;
1618   }
1619 
1620   return;
1621 }
1622 
1623 // Calculate (xHiDHiy) for every pair (i,j).
Calc_xHiDHiy_all(const gsl_vector * eval,const gsl_matrix * xHi,const gsl_matrix * Hiy,gsl_matrix * xHiDHiy_all_g,gsl_matrix * xHiDHiy_all_e)1624 void Calc_xHiDHiy_all(const gsl_vector *eval, const gsl_matrix *xHi,
1625                       const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g,
1626                       gsl_matrix *xHiDHiy_all_e) {
1627   gsl_matrix_set_zero(xHiDHiy_all_g);
1628   gsl_matrix_set_zero(xHiDHiy_all_e);
1629 
1630   size_t d_size = Hiy->size1;
1631   size_t v;
1632 
1633   for (size_t i = 0; i < d_size; i++) {
1634     for (size_t j = 0; j < d_size; j++) {
1635       if (j < i) {
1636         continue;
1637       }
1638       v = GetIndex(i, j, d_size);
1639 
1640       gsl_vector_view xHiDHiy_g = gsl_matrix_column(xHiDHiy_all_g, v);
1641       gsl_vector_view xHiDHiy_e = gsl_matrix_column(xHiDHiy_all_e, v);
1642 
1643       Calc_xHiDHiy(eval, xHi, Hiy, i, j, &xHiDHiy_g.vector, &xHiDHiy_e.vector);
1644     }
1645   }
1646   return;
1647 }
1648 
1649 // Calculate (xHiDHix) for every pair (i,j).
Calc_xHiDHix_all(const gsl_vector * eval,const gsl_matrix * xHi,gsl_matrix * xHiDHix_all_g,gsl_matrix * xHiDHix_all_e)1650 void Calc_xHiDHix_all(const gsl_vector *eval, const gsl_matrix *xHi,
1651                       gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) {
1652   gsl_matrix_set_zero(xHiDHix_all_g);
1653   gsl_matrix_set_zero(xHiDHix_all_e);
1654 
1655   size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1;
1656   size_t v;
1657 
1658   for (size_t i = 0; i < d_size; i++) {
1659     for (size_t j = 0; j < d_size; j++) {
1660       if (j < i) {
1661         continue;
1662       }
1663       v = GetIndex(i, j, d_size);
1664 
1665       gsl_matrix_view xHiDHix_g =
1666           gsl_matrix_submatrix(xHiDHix_all_g, 0, v * dc_size, dc_size, dc_size);
1667       gsl_matrix_view xHiDHix_e =
1668           gsl_matrix_submatrix(xHiDHix_all_e, 0, v * dc_size, dc_size, dc_size);
1669 
1670       Calc_xHiDHix(eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix);
1671     }
1672   }
1673   return;
1674 }
1675 
1676 // Calculate (xHiDHiy) for every pair (i,j).
Calc_xHiDHiDHiy_all(const size_t v_size,const gsl_vector * eval,const gsl_matrix * Hi,const gsl_matrix * xHi,const gsl_matrix * Hiy,gsl_matrix * xHiDHiDHiy_all_gg,gsl_matrix * xHiDHiDHiy_all_ee,gsl_matrix * xHiDHiDHiy_all_ge)1677 void Calc_xHiDHiDHiy_all(const size_t v_size, const gsl_vector *eval,
1678                          const gsl_matrix *Hi, const gsl_matrix *xHi,
1679                          const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg,
1680                          gsl_matrix *xHiDHiDHiy_all_ee,
1681                          gsl_matrix *xHiDHiDHiy_all_ge) {
1682   gsl_matrix_set_zero(xHiDHiDHiy_all_gg);
1683   gsl_matrix_set_zero(xHiDHiDHiy_all_ee);
1684   gsl_matrix_set_zero(xHiDHiDHiy_all_ge);
1685 
1686   size_t d_size = Hiy->size1;
1687   size_t v1, v2;
1688 
1689   for (size_t i1 = 0; i1 < d_size; i1++) {
1690     for (size_t j1 = 0; j1 < d_size; j1++) {
1691       if (j1 < i1) {
1692         continue;
1693       }
1694       v1 = GetIndex(i1, j1, d_size);
1695 
1696       for (size_t i2 = 0; i2 < d_size; i2++) {
1697         for (size_t j2 = 0; j2 < d_size; j2++) {
1698           if (j2 < i2) {
1699             continue;
1700           }
1701           v2 = GetIndex(i2, j2, d_size);
1702 
1703           gsl_vector_view xHiDHiDHiy_gg =
1704               gsl_matrix_column(xHiDHiDHiy_all_gg, v1 * v_size + v2);
1705           gsl_vector_view xHiDHiDHiy_ee =
1706               gsl_matrix_column(xHiDHiDHiy_all_ee, v1 * v_size + v2);
1707           gsl_vector_view xHiDHiDHiy_ge =
1708               gsl_matrix_column(xHiDHiDHiy_all_ge, v1 * v_size + v2);
1709 
1710           Calc_xHiDHiDHiy(eval, Hi, xHi, Hiy, i1, j1, i2, j2,
1711                           &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector,
1712                           &xHiDHiDHiy_ge.vector);
1713         }
1714       }
1715     }
1716   }
1717   return;
1718 }
1719 
1720 // Calculate (xHiDHix) for every pair (i,j).
Calc_xHiDHiDHix_all(const size_t v_size,const gsl_vector * eval,const gsl_matrix * Hi,const gsl_matrix * xHi,gsl_matrix * xHiDHiDHix_all_gg,gsl_matrix * xHiDHiDHix_all_ee,gsl_matrix * xHiDHiDHix_all_ge)1721 void Calc_xHiDHiDHix_all(const size_t v_size, const gsl_vector *eval,
1722                          const gsl_matrix *Hi, const gsl_matrix *xHi,
1723                          gsl_matrix *xHiDHiDHix_all_gg,
1724                          gsl_matrix *xHiDHiDHix_all_ee,
1725                          gsl_matrix *xHiDHiDHix_all_ge) {
1726   gsl_matrix_set_zero(xHiDHiDHix_all_gg);
1727   gsl_matrix_set_zero(xHiDHiDHix_all_ee);
1728   gsl_matrix_set_zero(xHiDHiDHix_all_ge);
1729 
1730   size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1;
1731   size_t v1, v2;
1732 
1733   for (size_t i1 = 0; i1 < d_size; i1++) {
1734     for (size_t j1 = 0; j1 < d_size; j1++) {
1735       if (j1 < i1) {
1736         continue;
1737       }
1738       v1 = GetIndex(i1, j1, d_size);
1739 
1740       for (size_t i2 = 0; i2 < d_size; i2++) {
1741         for (size_t j2 = 0; j2 < d_size; j2++) {
1742           if (j2 < i2) {
1743             continue;
1744           }
1745           v2 = GetIndex(i2, j2, d_size);
1746 
1747           if (v2 < v1) {
1748             continue;
1749           }
1750 
1751           gsl_matrix_view xHiDHiDHix_gg1 = gsl_matrix_submatrix(
1752               xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size,
1753               dc_size);
1754           gsl_matrix_view xHiDHiDHix_ee1 = gsl_matrix_submatrix(
1755               xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size,
1756               dc_size);
1757           gsl_matrix_view xHiDHiDHix_ge1 = gsl_matrix_submatrix(
1758               xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size,
1759               dc_size);
1760 
1761           Calc_xHiDHiDHix(eval, Hi, xHi, i1, j1, i2, j2, &xHiDHiDHix_gg1.matrix,
1762                           &xHiDHiDHix_ee1.matrix, &xHiDHiDHix_ge1.matrix);
1763 
1764           if (v2 != v1) {
1765             gsl_matrix_view xHiDHiDHix_gg2 = gsl_matrix_submatrix(
1766                 xHiDHiDHix_all_gg, 0, (v2 * v_size + v1) * dc_size, dc_size,
1767                 dc_size);
1768             gsl_matrix_view xHiDHiDHix_ee2 = gsl_matrix_submatrix(
1769                 xHiDHiDHix_all_ee, 0, (v2 * v_size + v1) * dc_size, dc_size,
1770                 dc_size);
1771             gsl_matrix_view xHiDHiDHix_ge2 = gsl_matrix_submatrix(
1772                 xHiDHiDHix_all_ge, 0, (v2 * v_size + v1) * dc_size, dc_size,
1773                 dc_size);
1774 
1775             gsl_matrix_memcpy(&xHiDHiDHix_gg2.matrix, &xHiDHiDHix_gg1.matrix);
1776             gsl_matrix_memcpy(&xHiDHiDHix_ee2.matrix, &xHiDHiDHix_ee1.matrix);
1777             gsl_matrix_memcpy(&xHiDHiDHix_ge2.matrix, &xHiDHiDHix_ge1.matrix);
1778           }
1779         }
1780       }
1781     }
1782   }
1783 
1784   return;
1785 }
1786 
1787 // Calculate (xHiDHix)Qi(xHiy) for every pair (i,j).
Calc_xHiDHixQixHiy_all(const gsl_matrix * xHiDHix_all_g,const gsl_matrix * xHiDHix_all_e,const gsl_vector * QixHiy,gsl_matrix * xHiDHixQixHiy_all_g,gsl_matrix * xHiDHixQixHiy_all_e)1788 void Calc_xHiDHixQixHiy_all(const gsl_matrix *xHiDHix_all_g,
1789                             const gsl_matrix *xHiDHix_all_e,
1790                             const gsl_vector *QixHiy,
1791                             gsl_matrix *xHiDHixQixHiy_all_g,
1792                             gsl_matrix *xHiDHixQixHiy_all_e) {
1793   size_t dc_size = xHiDHix_all_g->size1;
1794   size_t v_size = xHiDHix_all_g->size2 / dc_size;
1795 
1796   for (size_t i = 0; i < v_size; i++) {
1797     gsl_matrix_const_view xHiDHix_g = gsl_matrix_const_submatrix(
1798         xHiDHix_all_g, 0, i * dc_size, dc_size, dc_size);
1799     gsl_matrix_const_view xHiDHix_e = gsl_matrix_const_submatrix(
1800         xHiDHix_all_e, 0, i * dc_size, dc_size, dc_size);
1801 
1802     gsl_vector_view xHiDHixQixHiy_g = gsl_matrix_column(xHiDHixQixHiy_all_g, i);
1803     gsl_vector_view xHiDHixQixHiy_e = gsl_matrix_column(xHiDHixQixHiy_all_e, i);
1804 
1805     gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_g.matrix, QixHiy, 0.0,
1806                    &xHiDHixQixHiy_g.vector);
1807     gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_e.matrix, QixHiy, 0.0,
1808                    &xHiDHixQixHiy_e.vector);
1809   }
1810 
1811   return;
1812 }
1813 
1814 // Calculate Qi(xHiDHiy) and Qi(xHiDHix)Qi(xHiy) for each pair of i,j (i<=j).
Calc_QiVec_all(const gsl_matrix * Qi,const gsl_matrix * vec_all_g,const gsl_matrix * vec_all_e,gsl_matrix * Qivec_all_g,gsl_matrix * Qivec_all_e)1815 void Calc_QiVec_all(const gsl_matrix *Qi, const gsl_matrix *vec_all_g,
1816                     const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g,
1817                     gsl_matrix *Qivec_all_e) {
1818   for (size_t i = 0; i < vec_all_g->size2; i++) {
1819     gsl_vector_const_view vec_g = gsl_matrix_const_column(vec_all_g, i);
1820     gsl_vector_const_view vec_e = gsl_matrix_const_column(vec_all_e, i);
1821 
1822     gsl_vector_view Qivec_g = gsl_matrix_column(Qivec_all_g, i);
1823     gsl_vector_view Qivec_e = gsl_matrix_column(Qivec_all_e, i);
1824 
1825     gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_g.vector, 0.0, &Qivec_g.vector);
1826     gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_e.vector, 0.0, &Qivec_e.vector);
1827   }
1828 
1829   return;
1830 }
1831 
1832 // Calculate Qi(xHiDHix) for each pair of i,j (i<=j).
Calc_QiMat_all(const gsl_matrix * Qi,const gsl_matrix * mat_all_g,const gsl_matrix * mat_all_e,gsl_matrix * Qimat_all_g,gsl_matrix * Qimat_all_e)1833 void Calc_QiMat_all(const gsl_matrix *Qi, const gsl_matrix *mat_all_g,
1834                     const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g,
1835                     gsl_matrix *Qimat_all_e) {
1836   size_t dc_size = Qi->size1;
1837   size_t v_size = mat_all_g->size2 / mat_all_g->size1;
1838 
1839   for (size_t i = 0; i < v_size; i++) {
1840     gsl_matrix_const_view mat_g =
1841         gsl_matrix_const_submatrix(mat_all_g, 0, i * dc_size, dc_size, dc_size);
1842     gsl_matrix_const_view mat_e =
1843         gsl_matrix_const_submatrix(mat_all_e, 0, i * dc_size, dc_size, dc_size);
1844 
1845     gsl_matrix_view Qimat_g =
1846         gsl_matrix_submatrix(Qimat_all_g, 0, i * dc_size, dc_size, dc_size);
1847     gsl_matrix_view Qimat_e =
1848         gsl_matrix_submatrix(Qimat_all_e, 0, i * dc_size, dc_size, dc_size);
1849 
1850     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_g.matrix, 0.0,
1851                    &Qimat_g.matrix);
1852     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_e.matrix, 0.0,
1853                    &Qimat_e.matrix);
1854   }
1855 
1856   return;
1857 }
1858 
1859 // Calculate yPDPy
1860 // yPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)y
1861 //       = ytHiDHiy - (yHix)Qi(xHiDHiy) - (yHiDHix)Qi(xHiy)
1862 //         + (yHix)Qi(xHiDHix)Qi(xtHiy)
Calc_yPDPy(const gsl_vector * eval,const gsl_matrix * Hiy,const gsl_vector * QixHiy,const gsl_matrix * xHiDHiy_all_g,const gsl_matrix * xHiDHiy_all_e,const gsl_matrix * xHiDHixQixHiy_all_g,const gsl_matrix * xHiDHixQixHiy_all_e,const size_t i,const size_t j,double & yPDPy_g,double & yPDPy_e)1863 void Calc_yPDPy(const gsl_vector *eval, const gsl_matrix *Hiy,
1864                 const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
1865                 const gsl_matrix *xHiDHiy_all_e,
1866                 const gsl_matrix *xHiDHixQixHiy_all_g,
1867                 const gsl_matrix *xHiDHixQixHiy_all_e, const size_t i,
1868                 const size_t j, double &yPDPy_g, double &yPDPy_e) {
1869   size_t d_size = Hiy->size1;
1870   size_t v = GetIndex(i, j, d_size);
1871 
1872   double d;
1873 
1874   // First part: ytHiDHiy.
1875   Calc_yHiDHiy(eval, Hiy, i, j, yPDPy_g, yPDPy_e);
1876 
1877   // Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
1878   gsl_vector_const_view xHiDHiy_g = gsl_matrix_const_column(xHiDHiy_all_g, v);
1879   gsl_vector_const_view xHiDHiy_e = gsl_matrix_const_column(xHiDHiy_all_e, v);
1880 
1881   gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d);
1882   yPDPy_g -= d * 2.0;
1883   gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d);
1884   yPDPy_e -= d * 2.0;
1885 
1886   // Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy).
1887   gsl_vector_const_view xHiDHixQixHiy_g =
1888       gsl_matrix_const_column(xHiDHixQixHiy_all_g, v);
1889   gsl_vector_const_view xHiDHixQixHiy_e =
1890       gsl_matrix_const_column(xHiDHixQixHiy_all_e, v);
1891 
1892   gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d);
1893   yPDPy_g += d;
1894   gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d);
1895   yPDPy_e += d;
1896 
1897   return;
1898 }
1899 
1900 // calculate yPDPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)D(Hi-HixQixHi)y
1901 //           yPDPDPy = yHiDHiDHiy
1902 //                     - (yHix)Qi(xHiDHiDHiy)-(yHiDHiDHix)Qi(xHiy)
1903 //                     - (yHiDHix)Qi(xHiDHiy)
1904 //                     + (yHix)Qi(xHiDHix)Qi(xHiDHiy)
1905 //                     + (yHiDHix)Qi(xHiDHix)Qi(xHiy)
1906 //                     + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
1907 //                     - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy)
Calc_yPDPDPy(const gsl_vector * eval,const gsl_matrix * Hi,const gsl_matrix * xHi,const gsl_matrix * Hiy,const gsl_vector * QixHiy,const gsl_matrix * xHiDHiy_all_g,const gsl_matrix * xHiDHiy_all_e,const gsl_matrix * QixHiDHiy_all_g,const gsl_matrix * QixHiDHiy_all_e,const gsl_matrix * xHiDHixQixHiy_all_g,const gsl_matrix * xHiDHixQixHiy_all_e,const gsl_matrix * QixHiDHixQixHiy_all_g,const gsl_matrix * QixHiDHixQixHiy_all_e,const gsl_matrix * xHiDHiDHiy_all_gg,const gsl_matrix * xHiDHiDHiy_all_ee,const gsl_matrix * xHiDHiDHiy_all_ge,const gsl_matrix * xHiDHiDHix_all_gg,const gsl_matrix * xHiDHiDHix_all_ee,const gsl_matrix * xHiDHiDHix_all_ge,const size_t i1,const size_t j1,const size_t i2,const size_t j2,double & yPDPDPy_gg,double & yPDPDPy_ee,double & yPDPDPy_ge)1908 void Calc_yPDPDPy(
1909     const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matrix *xHi,
1910     const gsl_matrix *Hiy, const gsl_vector *QixHiy,
1911     const gsl_matrix *xHiDHiy_all_g, const gsl_matrix *xHiDHiy_all_e,
1912     const gsl_matrix *QixHiDHiy_all_g, const gsl_matrix *QixHiDHiy_all_e,
1913     const gsl_matrix *xHiDHixQixHiy_all_g,
1914     const gsl_matrix *xHiDHixQixHiy_all_e,
1915     const gsl_matrix *QixHiDHixQixHiy_all_g,
1916     const gsl_matrix *QixHiDHixQixHiy_all_e,
1917     const gsl_matrix *xHiDHiDHiy_all_gg, const gsl_matrix *xHiDHiDHiy_all_ee,
1918     const gsl_matrix *xHiDHiDHiy_all_ge, const gsl_matrix *xHiDHiDHix_all_gg,
1919     const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge,
1920     const size_t i1, const size_t j1, const size_t i2, const size_t j2,
1921     double &yPDPDPy_gg, double &yPDPDPy_ee, double &yPDPDPy_ge) {
1922   size_t d_size = Hi->size1, dc_size = xHi->size1;
1923   size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size);
1924   size_t v_size = d_size * (d_size + 1) / 2;
1925 
1926   double d;
1927 
1928   gsl_vector *xHiDHiDHixQixHiy = gsl_vector_alloc(dc_size);
1929 
1930   // First part: yHiDHiDHiy.
1931   Calc_yHiDHiDHiy(eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee,
1932                   yPDPDPy_ge);
1933 
1934   // Second and third parts:
1935   // -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy).
1936   gsl_vector_const_view xHiDHiDHiy_gg1 =
1937       gsl_matrix_const_column(xHiDHiDHiy_all_gg, v1 * v_size + v2);
1938   gsl_vector_const_view xHiDHiDHiy_ee1 =
1939       gsl_matrix_const_column(xHiDHiDHiy_all_ee, v1 * v_size + v2);
1940   gsl_vector_const_view xHiDHiDHiy_ge1 =
1941       gsl_matrix_const_column(xHiDHiDHiy_all_ge, v1 * v_size + v2);
1942 
1943   gsl_vector_const_view xHiDHiDHiy_gg2 =
1944       gsl_matrix_const_column(xHiDHiDHiy_all_gg, v2 * v_size + v1);
1945   gsl_vector_const_view xHiDHiDHiy_ee2 =
1946       gsl_matrix_const_column(xHiDHiDHiy_all_ee, v2 * v_size + v1);
1947   gsl_vector_const_view xHiDHiDHiy_ge2 =
1948       gsl_matrix_const_column(xHiDHiDHiy_all_ge, v2 * v_size + v1);
1949 
1950   gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
1951   yPDPDPy_gg -= d;
1952   gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
1953   yPDPDPy_ee -= d;
1954   gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
1955   yPDPDPy_ge -= d;
1956 
1957   gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
1958   yPDPDPy_gg -= d;
1959   gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
1960   yPDPDPy_ee -= d;
1961   gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
1962   yPDPDPy_ge -= d;
1963 
1964   // Fourth part: - (yHiDHix)Qi(xHiDHiy).
1965   gsl_vector_const_view xHiDHiy_g1 = gsl_matrix_const_column(xHiDHiy_all_g, v1);
1966   gsl_vector_const_view xHiDHiy_e1 = gsl_matrix_const_column(xHiDHiy_all_e, v1);
1967   gsl_vector_const_view QixHiDHiy_g2 =
1968       gsl_matrix_const_column(QixHiDHiy_all_g, v2);
1969   gsl_vector_const_view QixHiDHiy_e2 =
1970       gsl_matrix_const_column(QixHiDHiy_all_e, v2);
1971 
1972   gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
1973   yPDPDPy_gg -= d;
1974   gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
1975   yPDPDPy_ee -= d;
1976   gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
1977   yPDPDPy_ge -= d;
1978 
1979   // Fifth and sixth parts:
1980   //   + (yHix)Qi(xHiDHix)Qi(xHiDHiy) +
1981   //   (yHiDHix)Qi(xHiDHix)Qi(xHiy)
1982   gsl_vector_const_view QixHiDHiy_g1 =
1983       gsl_matrix_const_column(QixHiDHiy_all_g, v1);
1984   gsl_vector_const_view QixHiDHiy_e1 =
1985       gsl_matrix_const_column(QixHiDHiy_all_e, v1);
1986 
1987   gsl_vector_const_view xHiDHixQixHiy_g1 =
1988       gsl_matrix_const_column(xHiDHixQixHiy_all_g, v1);
1989   gsl_vector_const_view xHiDHixQixHiy_e1 =
1990       gsl_matrix_const_column(xHiDHixQixHiy_all_e, v1);
1991   gsl_vector_const_view xHiDHixQixHiy_g2 =
1992       gsl_matrix_const_column(xHiDHixQixHiy_all_g, v2);
1993   gsl_vector_const_view xHiDHixQixHiy_e2 =
1994       gsl_matrix_const_column(xHiDHixQixHiy_all_e, v2);
1995 
1996   gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
1997   yPDPDPy_gg += d;
1998   gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d);
1999   yPDPDPy_gg += d;
2000 
2001   gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
2002   yPDPDPy_ee += d;
2003   gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d);
2004   yPDPDPy_ee += d;
2005 
2006   gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
2007   yPDPDPy_ge += d;
2008   gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d);
2009   yPDPDPy_ge += d;
2010 
2011   // Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
2012   gsl_matrix_const_view xHiDHiDHix_gg = gsl_matrix_const_submatrix(
2013       xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
2014   gsl_matrix_const_view xHiDHiDHix_ee = gsl_matrix_const_submatrix(
2015       xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
2016   gsl_matrix_const_view xHiDHiDHix_ge = gsl_matrix_const_submatrix(
2017       xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
2018 
2019   gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix, QixHiy, 0.0,
2020                  xHiDHiDHixQixHiy);
2021   gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
2022   yPDPDPy_gg += d;
2023   gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix, QixHiy, 0.0,
2024                  xHiDHiDHixQixHiy);
2025   gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
2026   yPDPDPy_ee += d;
2027   gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix, QixHiy, 0.0,
2028                  xHiDHiDHixQixHiy);
2029   gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
2030   yPDPDPy_ge += d;
2031 
2032   // Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy).
2033   gsl_vector_const_view QixHiDHixQixHiy_g1 =
2034       gsl_matrix_const_column(QixHiDHixQixHiy_all_g, v1);
2035   gsl_vector_const_view QixHiDHixQixHiy_e1 =
2036       gsl_matrix_const_column(QixHiDHixQixHiy_all_e, v1);
2037 
2038   gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_g2.vector, &d);
2039   yPDPDPy_gg -= d;
2040   gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector, &xHiDHixQixHiy_e2.vector, &d);
2041   yPDPDPy_ee -= d;
2042   gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_e2.vector, &d);
2043   yPDPDPy_ge -= d;
2044 
2045   // Free memory.
2046   gsl_vector_free(xHiDHiDHixQixHiy);
2047 
2048   return;
2049 }
2050 
2051 // Calculate Edgeworth correctation factors for small samples notation
2052 // and method follows Thomas J. Rothenberg, Econometirca 1984; 52 (4)
2053 // M=xHiDHix
CalcCRT(const gsl_matrix * Hessian_inv,const gsl_matrix * Qi,const gsl_matrix * QixHiDHix_all_g,const gsl_matrix * QixHiDHix_all_e,const gsl_matrix * xHiDHiDHix_all_gg,const gsl_matrix * xHiDHiDHix_all_ee,const gsl_matrix * xHiDHiDHix_all_ge,const size_t d_size,double & crt_a,double & crt_b,double & crt_c)2054 void CalcCRT(const gsl_matrix *Hessian_inv, const gsl_matrix *Qi,
2055              const gsl_matrix *QixHiDHix_all_g,
2056              const gsl_matrix *QixHiDHix_all_e,
2057              const gsl_matrix *xHiDHiDHix_all_gg,
2058              const gsl_matrix *xHiDHiDHix_all_ee,
2059              const gsl_matrix *xHiDHiDHix_all_ge, const size_t d_size,
2060              double &crt_a, double &crt_b, double &crt_c) {
2061   crt_a = 0.0;
2062   crt_b = 0.0;
2063   crt_c = 0.0;
2064 
2065   size_t dc_size = Qi->size1, v_size = Hessian_inv->size1 / 2;
2066   size_t c_size = dc_size / d_size;
2067   double h_gg, h_ge, h_ee, d, B = 0.0, C = 0.0, D = 0.0;
2068   double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee;
2069   double trCC_gg, trCC_ge, trCC_ee, trD_gg = 0.0, trD_ge = 0.0, trD_ee = 0.0;
2070 
2071   gsl_matrix *QiMQi_g1 = gsl_matrix_alloc(dc_size, dc_size);
2072   gsl_matrix *QiMQi_e1 = gsl_matrix_alloc(dc_size, dc_size);
2073   gsl_matrix *QiMQi_g2 = gsl_matrix_alloc(dc_size, dc_size);
2074   gsl_matrix *QiMQi_e2 = gsl_matrix_alloc(dc_size, dc_size);
2075 
2076   gsl_matrix *QiMQisQisi_g1 = gsl_matrix_alloc(d_size, d_size);
2077   gsl_matrix *QiMQisQisi_e1 = gsl_matrix_alloc(d_size, d_size);
2078   gsl_matrix *QiMQisQisi_g2 = gsl_matrix_alloc(d_size, d_size);
2079   gsl_matrix *QiMQisQisi_e2 = gsl_matrix_alloc(d_size, d_size);
2080 
2081   gsl_matrix *QiMQiMQi_gg = gsl_matrix_alloc(dc_size, dc_size);
2082   gsl_matrix *QiMQiMQi_ge = gsl_matrix_alloc(dc_size, dc_size);
2083   gsl_matrix *QiMQiMQi_ee = gsl_matrix_alloc(dc_size, dc_size);
2084 
2085   gsl_matrix *QiMMQi_gg = gsl_matrix_alloc(dc_size, dc_size);
2086   gsl_matrix *QiMMQi_ge = gsl_matrix_alloc(dc_size, dc_size);
2087   gsl_matrix *QiMMQi_ee = gsl_matrix_alloc(dc_size, dc_size);
2088 
2089   gsl_matrix *Qi_si = gsl_matrix_alloc(d_size, d_size);
2090 
2091   gsl_matrix *M_dd = gsl_matrix_alloc(d_size, d_size);
2092   gsl_matrix *M_dcdc = gsl_matrix_alloc(dc_size, dc_size);
2093 
2094   // Invert Qi_sub to Qi_si.
2095   gsl_matrix *Qi_sub = gsl_matrix_alloc(d_size, d_size);
2096 
2097   gsl_matrix_const_view Qi_s = gsl_matrix_const_submatrix(
2098       Qi, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
2099 
2100   int sig;
2101   gsl_permutation *pmt = gsl_permutation_alloc(d_size);
2102 
2103   gsl_matrix_memcpy(Qi_sub, &Qi_s.matrix);
2104   LUDecomp(Qi_sub, pmt, &sig);
2105   LUInvert(Qi_sub, pmt, Qi_si);
2106 
2107   gsl_permutation_free(pmt);
2108   gsl_matrix_free(Qi_sub);
2109 
2110   // Calculate correction factors.
2111   for (size_t v1 = 0; v1 < v_size; v1++) {
2112 
2113     // Calculate Qi(xHiDHix)Qi, and subpart of it.
2114     gsl_matrix_const_view QiM_g1 = gsl_matrix_const_submatrix(
2115         QixHiDHix_all_g, 0, v1 * dc_size, dc_size, dc_size);
2116     gsl_matrix_const_view QiM_e1 = gsl_matrix_const_submatrix(
2117         QixHiDHix_all_e, 0, v1 * dc_size, dc_size, dc_size);
2118 
2119     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, Qi, 0.0,
2120                    QiMQi_g1);
2121     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, Qi, 0.0,
2122                    QiMQi_e1);
2123 
2124     gsl_matrix_view QiMQi_g1_s = gsl_matrix_submatrix(
2125         QiMQi_g1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
2126     gsl_matrix_view QiMQi_e1_s = gsl_matrix_submatrix(
2127         QiMQi_e1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
2128 
2129     // Calculate trCg1 and trCe1.
2130     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix, Qi_si,
2131                    0.0, QiMQisQisi_g1);
2132     trCg1 = 0.0;
2133     for (size_t k = 0; k < d_size; k++) {
2134       trCg1 -= gsl_matrix_get(QiMQisQisi_g1, k, k);
2135     }
2136 
2137     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix, Qi_si,
2138                    0.0, QiMQisQisi_e1);
2139     trCe1 = 0.0;
2140     for (size_t k = 0; k < d_size; k++) {
2141       trCe1 -= gsl_matrix_get(QiMQisQisi_e1, k, k);
2142     }
2143 
2144     for (size_t v2 = 0; v2 < v_size; v2++) {
2145       if (v2 < v1) {
2146         continue;
2147       }
2148 
2149       // Calculate Qi(xHiDHix)Qi, and subpart of it.
2150       gsl_matrix_const_view QiM_g2 = gsl_matrix_const_submatrix(
2151           QixHiDHix_all_g, 0, v2 * dc_size, dc_size, dc_size);
2152       gsl_matrix_const_view QiM_e2 = gsl_matrix_const_submatrix(
2153           QixHiDHix_all_e, 0, v2 * dc_size, dc_size, dc_size);
2154 
2155       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix, Qi, 0.0,
2156                      QiMQi_g2);
2157       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix, Qi, 0.0,
2158                      QiMQi_e2);
2159 
2160       gsl_matrix_view QiMQi_g2_s =
2161           gsl_matrix_submatrix(QiMQi_g2, (c_size - 1) * d_size,
2162                                (c_size - 1) * d_size, d_size, d_size);
2163       gsl_matrix_view QiMQi_e2_s =
2164           gsl_matrix_submatrix(QiMQi_e2, (c_size - 1) * d_size,
2165                                (c_size - 1) * d_size, d_size, d_size);
2166 
2167       // Calculate trCg2 and trCe2.
2168       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g2_s.matrix, Qi_si,
2169                      0.0, QiMQisQisi_g2);
2170       trCg2 = 0.0;
2171       for (size_t k = 0; k < d_size; k++) {
2172         trCg2 -= gsl_matrix_get(QiMQisQisi_g2, k, k);
2173       }
2174 
2175       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e2_s.matrix, Qi_si,
2176                      0.0, QiMQisQisi_e2);
2177       trCe2 = 0.0;
2178       for (size_t k = 0; k < d_size; k++) {
2179         trCe2 -= gsl_matrix_get(QiMQisQisi_e2, k, k);
2180       }
2181 
2182       // Calculate trCC_gg, trCC_ge, trCC_ee.
2183       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
2184                      QiMQisQisi_g2, 0.0, M_dd);
2185       trCC_gg = 0.0;
2186       for (size_t k = 0; k < d_size; k++) {
2187         trCC_gg += gsl_matrix_get(M_dd, k, k);
2188       }
2189 
2190       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
2191                      QiMQisQisi_e2, 0.0, M_dd);
2192       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
2193                      QiMQisQisi_g2, 1.0, M_dd);
2194       trCC_ge = 0.0;
2195       for (size_t k = 0; k < d_size; k++) {
2196         trCC_ge += gsl_matrix_get(M_dd, k, k);
2197       }
2198 
2199       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
2200                      QiMQisQisi_e2, 0.0, M_dd);
2201       trCC_ee = 0.0;
2202       for (size_t k = 0; k < d_size; k++) {
2203         trCC_ee += gsl_matrix_get(M_dd, k, k);
2204       }
2205 
2206       // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it.
2207       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_g2,
2208                      0.0, QiMQiMQi_gg);
2209       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_e2,
2210                      0.0, QiMQiMQi_ge);
2211       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_g2,
2212                      1.0, QiMQiMQi_ge);
2213       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_e2,
2214                      0.0, QiMQiMQi_ee);
2215 
2216       gsl_matrix_view QiMQiMQi_gg_s =
2217           gsl_matrix_submatrix(QiMQiMQi_gg, (c_size - 1) * d_size,
2218                                (c_size - 1) * d_size, d_size, d_size);
2219       gsl_matrix_view QiMQiMQi_ge_s =
2220           gsl_matrix_submatrix(QiMQiMQi_ge, (c_size - 1) * d_size,
2221                                (c_size - 1) * d_size, d_size, d_size);
2222       gsl_matrix_view QiMQiMQi_ee_s =
2223           gsl_matrix_submatrix(QiMQiMQi_ee, (c_size - 1) * d_size,
2224                                (c_size - 1) * d_size, d_size, d_size);
2225 
2226       // and part of trB_gg, trB_ge, trB_ee.
2227       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_gg_s.matrix,
2228                      Qi_si, 0.0, M_dd);
2229       trB_gg = 0.0;
2230       for (size_t k = 0; k < d_size; k++) {
2231         d = gsl_matrix_get(M_dd, k, k);
2232         trB_gg -= d;
2233       }
2234 
2235       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ge_s.matrix,
2236                      Qi_si, 0.0, M_dd);
2237       trB_ge = 0.0;
2238       for (size_t k = 0; k < d_size; k++) {
2239         d = gsl_matrix_get(M_dd, k, k);
2240         trB_ge -= d;
2241       }
2242 
2243       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ee_s.matrix,
2244                      Qi_si, 0.0, M_dd);
2245       trB_ee = 0.0;
2246       for (size_t k = 0; k < d_size; k++) {
2247         d = gsl_matrix_get(M_dd, k, k);
2248         trB_ee -= d;
2249       }
2250 
2251       // Calculate Qi(xHiDHiDHix)Qi, and subpart of it.
2252       gsl_matrix_const_view MM_gg = gsl_matrix_const_submatrix(
2253           xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
2254       gsl_matrix_const_view MM_ge = gsl_matrix_const_submatrix(
2255           xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
2256       gsl_matrix_const_view MM_ee = gsl_matrix_const_submatrix(
2257           xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
2258 
2259       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_gg.matrix, 0.0,
2260                      M_dcdc);
2261       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
2262                      QiMMQi_gg);
2263       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ge.matrix, 0.0,
2264                      M_dcdc);
2265       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
2266                      QiMMQi_ge);
2267       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ee.matrix, 0.0,
2268                      M_dcdc);
2269       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
2270                      QiMMQi_ee);
2271 
2272       gsl_matrix_view QiMMQi_gg_s =
2273           gsl_matrix_submatrix(QiMMQi_gg, (c_size - 1) * d_size,
2274                                (c_size - 1) * d_size, d_size, d_size);
2275       gsl_matrix_view QiMMQi_ge_s =
2276           gsl_matrix_submatrix(QiMMQi_ge, (c_size - 1) * d_size,
2277                                (c_size - 1) * d_size, d_size, d_size);
2278       gsl_matrix_view QiMMQi_ee_s =
2279           gsl_matrix_submatrix(QiMMQi_ee, (c_size - 1) * d_size,
2280                                (c_size - 1) * d_size, d_size, d_size);
2281 
2282       // Calculate the other part of trB_gg, trB_ge, trB_ee.
2283       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_gg_s.matrix,
2284                      Qi_si, 0.0, M_dd);
2285       for (size_t k = 0; k < d_size; k++) {
2286         trB_gg += gsl_matrix_get(M_dd, k, k);
2287       }
2288       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ge_s.matrix,
2289                      Qi_si, 0.0, M_dd);
2290       for (size_t k = 0; k < d_size; k++) {
2291         trB_ge += 2.0 * gsl_matrix_get(M_dd, k, k);
2292       }
2293       gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ee_s.matrix,
2294                      Qi_si, 0.0, M_dd);
2295       for (size_t k = 0; k < d_size; k++) {
2296         trB_ee += gsl_matrix_get(M_dd, k, k);
2297       }
2298 
2299       // Calculate trD_gg, trD_ge, trD_ee.
2300       trD_gg = 2.0 * trB_gg;
2301       trD_ge = 2.0 * trB_ge;
2302       trD_ee = 2.0 * trB_ee;
2303 
2304       // calculate B, C and D
2305       h_gg = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2);
2306       h_ge = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2 + v_size);
2307       h_ee = -1.0 * gsl_matrix_get(Hessian_inv, v1 + v_size, v2 + v_size);
2308 
2309       B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee;
2310       C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) +
2311            h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) +
2312            h_ee * (trCC_ee + 0.5 * trCe1 * trCe2);
2313       D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) +
2314            h_ee * (trCC_ee + 0.5 * trD_ee);
2315 
2316       if (v1 != v2) {
2317         B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee;
2318         C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) +
2319              h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) +
2320              h_ee * (trCC_ee + 0.5 * trCe1 * trCe2);
2321         D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) +
2322              h_ee * (trCC_ee + 0.5 * trD_ee);
2323       }
2324     }
2325   }
2326 
2327   // Calculate a, b, c from B C D.
2328   crt_a = 2.0 * D - C;
2329   crt_b = 2.0 * B;
2330   crt_c = C;
2331 
2332   // Free matrix memory.
2333   gsl_matrix_free(QiMQi_g1);
2334   gsl_matrix_free(QiMQi_e1);
2335   gsl_matrix_free(QiMQi_g2);
2336   gsl_matrix_free(QiMQi_e2);
2337 
2338   gsl_matrix_free(QiMQisQisi_g1);
2339   gsl_matrix_free(QiMQisQisi_e1);
2340   gsl_matrix_free(QiMQisQisi_g2);
2341   gsl_matrix_free(QiMQisQisi_e2);
2342 
2343   gsl_matrix_free(QiMQiMQi_gg);
2344   gsl_matrix_free(QiMQiMQi_ge);
2345   gsl_matrix_free(QiMQiMQi_ee);
2346 
2347   gsl_matrix_free(QiMMQi_gg);
2348   gsl_matrix_free(QiMMQi_ge);
2349   gsl_matrix_free(QiMMQi_ee);
2350 
2351   gsl_matrix_free(Qi_si);
2352 
2353   gsl_matrix_free(M_dd);
2354   gsl_matrix_free(M_dcdc);
2355 
2356   return;
2357 }
2358 
2359 // Calculate first-order and second-order derivatives.
CalcDev(const char func_name,const gsl_vector * eval,const gsl_matrix * Qi,const gsl_matrix * Hi,const gsl_matrix * xHi,const gsl_matrix * Hiy,const gsl_vector * QixHiy,gsl_vector * gradient,gsl_matrix * Hessian_inv,double & crt_a,double & crt_b,double & crt_c)2360 void CalcDev(const char func_name, const gsl_vector *eval, const gsl_matrix *Qi,
2361              const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *Hiy,
2362              const gsl_vector *QixHiy, gsl_vector *gradient,
2363              gsl_matrix *Hessian_inv, double &crt_a, double &crt_b,
2364              double &crt_c) {
2365   if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
2366       func_name != 'l') {
2367     cout << "func_name only takes 'R' or 'L': 'R' for "
2368          << "log-restricted likelihood, 'L' for log-likelihood." << endl;
2369     return;
2370   }
2371 
2372   size_t dc_size = Qi->size1, d_size = Hi->size1;
2373   size_t c_size = dc_size / d_size;
2374   size_t v_size = d_size * (d_size + 1) / 2;
2375   size_t v1, v2;
2376   double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge;
2377 
2378   gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
2379 
2380   gsl_matrix *xHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
2381   gsl_matrix *xHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
2382   gsl_matrix *xHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size);
2383   gsl_matrix *xHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size);
2384   gsl_matrix *xHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
2385   gsl_matrix *xHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
2386 
2387   gsl_matrix *QixHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
2388   gsl_matrix *QixHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
2389   gsl_matrix *QixHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size);
2390   gsl_matrix *QixHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size);
2391   gsl_matrix *QixHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
2392   gsl_matrix *QixHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
2393 
2394   gsl_matrix *xHiDHiDHiy_all_gg = gsl_matrix_alloc(dc_size, v_size * v_size);
2395   gsl_matrix *xHiDHiDHiy_all_ee = gsl_matrix_alloc(dc_size, v_size * v_size);
2396   gsl_matrix *xHiDHiDHiy_all_ge = gsl_matrix_alloc(dc_size, v_size * v_size);
2397   gsl_matrix *xHiDHiDHix_all_gg =
2398       gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
2399   gsl_matrix *xHiDHiDHix_all_ee =
2400       gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
2401   gsl_matrix *xHiDHiDHix_all_ge =
2402       gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
2403 
2404   // Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all.
2405   Calc_xHiDHiy_all(eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
2406   Calc_xHiDHix_all(eval, xHi, xHiDHix_all_g, xHiDHix_all_e);
2407   Calc_xHiDHixQixHiy_all(xHiDHix_all_g, xHiDHix_all_e, QixHiy,
2408                          xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e);
2409 
2410   Calc_xHiDHiDHiy_all(v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg,
2411                       xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge);
2412   Calc_xHiDHiDHix_all(v_size, eval, Hi, xHi, xHiDHiDHix_all_gg,
2413                       xHiDHiDHix_all_ee, xHiDHiDHix_all_ge);
2414 
2415   // Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all.
2416   Calc_QiVec_all(Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g,
2417                  QixHiDHiy_all_e);
2418   Calc_QiVec_all(Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
2419                  QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e);
2420   Calc_QiMat_all(Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g,
2421                  QixHiDHix_all_e);
2422 
2423   double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee;
2424   double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge;
2425   double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge;
2426 
2427   // Calculate gradient and Hessian for Vg.
2428   for (size_t i1 = 0; i1 < d_size; i1++) {
2429     for (size_t j1 = 0; j1 < d_size; j1++) {
2430       if (j1 < i1) {
2431         continue;
2432       }
2433       v1 = GetIndex(i1, j1, d_size);
2434 
2435       Calc_yPDPy(eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
2436                  xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1, yPDPy_g,
2437                  yPDPy_e);
2438 
2439       if (func_name == 'R' || func_name == 'r') {
2440         Calc_tracePD(eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e, i1, j1, tPD_g,
2441                      tPD_e);
2442 
2443         dev1_g = -0.5 * tPD_g + 0.5 * yPDPy_g;
2444         dev1_e = -0.5 * tPD_e + 0.5 * yPDPy_e;
2445       } else {
2446         Calc_traceHiD(eval, Hi, i1, j1, tHiD_g, tHiD_e);
2447 
2448         dev1_g = -0.5 * tHiD_g + 0.5 * yPDPy_g;
2449         dev1_e = -0.5 * tHiD_e + 0.5 * yPDPy_e;
2450       }
2451 
2452       gsl_vector_set(gradient, v1, dev1_g);
2453       gsl_vector_set(gradient, v1 + v_size, dev1_e);
2454 
2455       for (size_t i2 = 0; i2 < d_size; i2++) {
2456         for (size_t j2 = 0; j2 < d_size; j2++) {
2457           if (j2 < i2) {
2458             continue;
2459           }
2460           v2 = GetIndex(i2, j2, d_size);
2461 
2462           if (v2 < v1) {
2463             continue;
2464           }
2465 
2466           Calc_yPDPDPy(eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
2467                        QixHiDHiy_all_g, QixHiDHiy_all_e, xHiDHixQixHiy_all_g,
2468                        xHiDHixQixHiy_all_e, QixHiDHixQixHiy_all_g,
2469                        QixHiDHixQixHiy_all_e, xHiDHiDHiy_all_gg,
2470                        xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg,
2471                        xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, i2, j2,
2472                        yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
2473 
2474           // AI for REML.
2475           if (func_name == 'R' || func_name == 'r') {
2476             Calc_tracePDPD(eval, Qi, Hi, xHi, QixHiDHix_all_g, QixHiDHix_all_e,
2477                            xHiDHiDHix_all_gg, xHiDHiDHix_all_ee,
2478                            xHiDHiDHix_all_ge, i1, j1, i2, j2, tPDPD_gg,
2479                            tPDPD_ee, tPDPD_ge);
2480 
2481             dev2_gg = 0.5 * tPDPD_gg - yPDPDPy_gg;
2482             dev2_ee = 0.5 * tPDPD_ee - yPDPDPy_ee;
2483             dev2_ge = 0.5 * tPDPD_ge - yPDPDPy_ge;
2484           } else {
2485             Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tHiDHiD_gg, tHiDHiD_ee,
2486                              tHiDHiD_ge);
2487 
2488             dev2_gg = 0.5 * tHiDHiD_gg - yPDPDPy_gg;
2489             dev2_ee = 0.5 * tHiDHiD_ee - yPDPDPy_ee;
2490             dev2_ge = 0.5 * tHiDHiD_ge - yPDPDPy_ge;
2491           }
2492 
2493           // Set up Hessian.
2494           gsl_matrix_set(Hessian, v1, v2, dev2_gg);
2495           gsl_matrix_set(Hessian, v1 + v_size, v2 + v_size, dev2_ee);
2496           gsl_matrix_set(Hessian, v1, v2 + v_size, dev2_ge);
2497           gsl_matrix_set(Hessian, v2 + v_size, v1, dev2_ge);
2498 
2499           if (v1 != v2) {
2500             gsl_matrix_set(Hessian, v2, v1, dev2_gg);
2501             gsl_matrix_set(Hessian, v2 + v_size, v1 + v_size, dev2_ee);
2502             gsl_matrix_set(Hessian, v2, v1 + v_size, dev2_ge);
2503             gsl_matrix_set(Hessian, v1 + v_size, v2, dev2_ge);
2504           }
2505         }
2506       }
2507     }
2508   }
2509 
2510   // Invert Hessian.
2511   int sig;
2512   gsl_permutation *pmt = gsl_permutation_alloc(v_size * 2);
2513 
2514   LUDecomp(Hessian, pmt, &sig);
2515   LUInvert(Hessian, pmt, Hessian_inv);
2516 
2517   gsl_permutation_free(pmt);
2518   gsl_matrix_free(Hessian);
2519 
2520   // Calculate Edgeworth correction factors after inverting
2521   // Hessian.
2522   if (c_size > 1) {
2523     CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e,
2524             xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, d_size,
2525             crt_a, crt_b, crt_c);
2526   } else {
2527     crt_a = 0.0;
2528     crt_b = 0.0;
2529     crt_c = 0.0;
2530   }
2531 
2532   gsl_matrix_free(xHiDHiy_all_g);
2533   gsl_matrix_free(xHiDHiy_all_e);
2534   gsl_matrix_free(xHiDHix_all_g);
2535   gsl_matrix_free(xHiDHix_all_e);
2536   gsl_matrix_free(xHiDHixQixHiy_all_g);
2537   gsl_matrix_free(xHiDHixQixHiy_all_e);
2538 
2539   gsl_matrix_free(QixHiDHiy_all_g);
2540   gsl_matrix_free(QixHiDHiy_all_e);
2541   gsl_matrix_free(QixHiDHix_all_g);
2542   gsl_matrix_free(QixHiDHix_all_e);
2543   gsl_matrix_free(QixHiDHixQixHiy_all_g);
2544   gsl_matrix_free(QixHiDHixQixHiy_all_e);
2545 
2546   gsl_matrix_free(xHiDHiDHiy_all_gg);
2547   gsl_matrix_free(xHiDHiDHiy_all_ee);
2548   gsl_matrix_free(xHiDHiDHiy_all_ge);
2549   gsl_matrix_free(xHiDHiDHix_all_gg);
2550   gsl_matrix_free(xHiDHiDHix_all_ee);
2551   gsl_matrix_free(xHiDHiDHix_all_ge);
2552 
2553   return;
2554 }
2555 
2556 // Update Vg, Ve.
UpdateVgVe(const gsl_matrix * Hessian_inv,const gsl_vector * gradient,const double step_scale,gsl_matrix * V_g,gsl_matrix * V_e)2557 void UpdateVgVe(const gsl_matrix *Hessian_inv, const gsl_vector *gradient,
2558                 const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) {
2559   size_t v_size = gradient->size / 2, d_size = V_g->size1;
2560   size_t v;
2561 
2562   gsl_vector *vec_v = gsl_vector_alloc(v_size * 2);
2563 
2564   double d;
2565 
2566   // Vectorize Vg and Ve.
2567   for (size_t i = 0; i < d_size; i++) {
2568     for (size_t j = 0; j < d_size; j++) {
2569       if (j < i) {
2570         continue;
2571       }
2572       v = GetIndex(i, j, d_size);
2573 
2574       d = gsl_matrix_get(V_g, i, j);
2575       gsl_vector_set(vec_v, v, d);
2576 
2577       d = gsl_matrix_get(V_e, i, j);
2578       gsl_vector_set(vec_v, v + v_size, d);
2579     }
2580   }
2581 
2582   gsl_blas_dgemv(CblasNoTrans, -1.0 * step_scale, Hessian_inv, gradient, 1.0,
2583                  vec_v);
2584 
2585   // Save Vg and Ve.
2586   for (size_t i = 0; i < d_size; i++) {
2587     for (size_t j = 0; j < d_size; j++) {
2588       if (j < i) {
2589         continue;
2590       }
2591       v = GetIndex(i, j, d_size);
2592 
2593       d = gsl_vector_get(vec_v, v);
2594       gsl_matrix_set(V_g, i, j, d);
2595       gsl_matrix_set(V_g, j, i, d);
2596 
2597       d = gsl_vector_get(vec_v, v + v_size);
2598       gsl_matrix_set(V_e, i, j, d);
2599       gsl_matrix_set(V_e, j, i, d);
2600     }
2601   }
2602 
2603   gsl_vector_free(vec_v);
2604 
2605   return;
2606 }
2607 
MphNR(const char func_name,const size_t max_iter,const double max_prec,const gsl_vector * eval,const gsl_matrix * X,const gsl_matrix * Y,gsl_matrix * Hi_all,gsl_matrix * xHi_all,gsl_matrix * Hiy_all,gsl_matrix * V_g,gsl_matrix * V_e,gsl_matrix * Hessian_inv,double & crt_a,double & crt_b,double & crt_c)2608 double MphNR(const char func_name, const size_t max_iter, const double max_prec,
2609              const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y,
2610              gsl_matrix *Hi_all, gsl_matrix *xHi_all, gsl_matrix *Hiy_all,
2611              gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *Hessian_inv,
2612              double &crt_a, double &crt_b, double &crt_c) {
2613   if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
2614       func_name != 'l') {
2615     cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
2616          << "likelihood, 'L' for log-likelihood." << endl;
2617     return 0.0;
2618   }
2619   size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
2620   size_t dc_size = d_size * c_size;
2621   size_t v_size = d_size * (d_size + 1) / 2;
2622 
2623   double logdet_H, logdet_Q, yPy, logl_const;
2624   double logl_old = 0.0, logl_new = 0.0, step_scale;
2625   int sig;
2626   size_t step_iter, flag_pd;
2627 
2628   gsl_matrix *Vg_save = gsl_matrix_alloc(d_size, d_size);
2629   gsl_matrix *Ve_save = gsl_matrix_alloc(d_size, d_size);
2630   gsl_matrix *V_temp = gsl_matrix_alloc(d_size, d_size);
2631   gsl_matrix *U_temp = gsl_matrix_alloc(d_size, d_size);
2632   gsl_vector *D_temp = gsl_vector_alloc(d_size);
2633   gsl_vector *xHiy = gsl_vector_alloc(dc_size);
2634   gsl_vector *QixHiy = gsl_vector_alloc(dc_size);
2635   gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
2636   gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size);
2637 
2638   gsl_vector *gradient = gsl_vector_alloc(v_size * 2);
2639 
2640   // Calculate |XXt| and (XXt)^{-1}.
2641   gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
2642   for (size_t i = 0; i < c_size; ++i) {
2643     for (size_t j = 0; j < i; ++j) {
2644       gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i));
2645     }
2646   }
2647 
2648   gsl_permutation *pmt = gsl_permutation_alloc(c_size);
2649   LUDecomp(XXt, pmt, &sig);
2650   gsl_permutation_free(pmt);
2651 
2652   // Calculate the constant for logl.
2653   if (func_name == 'R' || func_name == 'r') {
2654     logl_const =
2655         -0.5 * (double)(n_size - c_size) * (double)d_size * safe_log(2.0 * M_PI) +
2656         0.5 * (double)d_size * LULndet(XXt);
2657   } else {
2658     logl_const = -0.5 * (double)n_size * (double)d_size * safe_log(2.0 * M_PI);
2659   }
2660 
2661   // Optimization iterations.
2662   for (size_t t = 0; t < max_iter; t++) {
2663     gsl_matrix_memcpy(Vg_save, V_g);
2664     gsl_matrix_memcpy(Ve_save, V_e);
2665 
2666     step_scale = 1.0;
2667     step_iter = 0;
2668     do {
2669       gsl_matrix_memcpy(V_g, Vg_save);
2670       gsl_matrix_memcpy(V_e, Ve_save);
2671 
2672       // Update Vg, Ve, and invert Hessian.
2673       if (t != 0) {
2674         UpdateVgVe(Hessian_inv, gradient, step_scale, V_g, V_e);
2675       }
2676 
2677       // Check if both Vg and Ve are positive definite.
2678       flag_pd = 1;
2679       gsl_matrix_memcpy(V_temp, V_e);
2680       EigenDecomp(V_temp, U_temp, D_temp, 0);
2681       for (size_t i = 0; i < d_size; i++) {
2682         if (gsl_vector_get(D_temp, i) <= 0) {
2683           flag_pd = 0;
2684         }
2685       }
2686       gsl_matrix_memcpy(V_temp, V_g);
2687       EigenDecomp(V_temp, U_temp, D_temp, 0);
2688       for (size_t i = 0; i < d_size; i++) {
2689         if (gsl_vector_get(D_temp, i) <= 0) {
2690           flag_pd = 0;
2691         }
2692       }
2693 
2694       // If flag_pd==1, continue to calculate quantities
2695       // and logl.
2696       if (flag_pd == 1) {
2697         CalcHiQi(eval, X, V_g, V_e, Hi_all, Qi, logdet_H, logdet_Q);
2698         Calc_Hiy_all(Y, Hi_all, Hiy_all);
2699         Calc_xHi_all(X, Hi_all, xHi_all);
2700 
2701         // Calculate QixHiy and yPy.
2702         Calc_xHiy(Y, xHi_all, xHiy);
2703         gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy);
2704 
2705         gsl_blas_ddot(QixHiy, xHiy, &yPy);
2706         yPy = Calc_yHiy(Y, Hiy_all) - yPy;
2707 
2708         // Calculate log likelihood/restricted likelihood value.
2709         if (func_name == 'R' || func_name == 'r') {
2710           logl_new = logl_const - 0.5 * logdet_H - 0.5 * logdet_Q - 0.5 * yPy;
2711         } else {
2712           logl_new = logl_const - 0.5 * logdet_H - 0.5 * yPy;
2713         }
2714       }
2715 
2716       step_scale /= 2.0;
2717       step_iter++;
2718 
2719     } while (
2720         (flag_pd == 0 || logl_new < logl_old || logl_new - logl_old > 10) &&
2721         step_iter < 10 && t != 0);
2722 
2723     // Terminate if change is small.
2724     if (t != 0) {
2725       if (logl_new < logl_old || flag_pd == 0) {
2726         gsl_matrix_memcpy(V_g, Vg_save);
2727         gsl_matrix_memcpy(V_e, Ve_save);
2728         break;
2729       }
2730 
2731       if (logl_new - logl_old < max_prec) {
2732         break;
2733       }
2734     }
2735 
2736     logl_old = logl_new;
2737 
2738     CalcDev(func_name, eval, Qi, Hi_all, xHi_all, Hiy_all, QixHiy, gradient,
2739             Hessian_inv, crt_a, crt_b, crt_c);
2740   }
2741 
2742   // Mutiply Hessian_inv with -1.0.
2743   // Now Hessian_inv is the variance matrix.
2744   gsl_matrix_scale(Hessian_inv, -1.0);
2745 
2746   gsl_matrix_free(Vg_save);
2747   gsl_matrix_free(Ve_save);
2748   gsl_matrix_free(V_temp);
2749   gsl_matrix_free(U_temp);
2750   gsl_vector_free(D_temp);
2751   gsl_vector_free(xHiy);
2752   gsl_vector_free(QixHiy);
2753 
2754   gsl_matrix_free(Qi);
2755   gsl_matrix_free(XXt);
2756 
2757   gsl_vector_free(gradient);
2758 
2759   return logl_new;
2760 }
2761 
2762 // Initialize Vg, Ve and B.
MphInitial(const size_t em_iter,const double em_prec,const size_t nr_iter,const double nr_prec,const gsl_vector * eval,const gsl_matrix * X,const gsl_matrix * Y,const double l_min,const double l_max,const size_t n_region,gsl_matrix * V_g,gsl_matrix * V_e,gsl_matrix * B)2763 void MphInitial(const size_t em_iter, const double em_prec,
2764                 const size_t nr_iter, const double nr_prec,
2765                 const gsl_vector *eval, const gsl_matrix *X,
2766                 const gsl_matrix *Y, const double l_min, const double l_max,
2767                 const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e,
2768                 gsl_matrix *B) {
2769 
2770   debug_msg("MphInitial");
2771   gsl_matrix_set_zero(V_g);
2772   gsl_matrix_set_zero(V_e);
2773   gsl_matrix_set_zero(B);
2774 
2775   size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
2776   double a, b, c;
2777   double lambda, logl, vg, ve;
2778 
2779   // Initialize the diagonal elements of Vg and Ve using univariate
2780   // LMM and REML estimates.
2781   gsl_matrix *Xt = gsl_matrix_alloc(n_size, c_size);
2782   gsl_vector *beta_temp = gsl_vector_alloc(c_size);
2783   gsl_vector *se_beta_temp = gsl_vector_alloc(c_size);
2784 
2785   gsl_matrix_transpose_memcpy(Xt, X);
2786 
2787   for (size_t i = 0; i < d_size; i++) {
2788     gsl_vector_const_view Y_row = gsl_matrix_const_row(Y, i);
2789     CalcLambda('R', eval, Xt, &Y_row.vector, l_min, l_max, n_region, lambda,
2790                logl);
2791     CalcLmmVgVeBeta(eval, Xt, &Y_row.vector, lambda, vg, ve, beta_temp,
2792                     se_beta_temp);
2793 
2794     gsl_matrix_set(V_g, i, i, vg);
2795     gsl_matrix_set(V_e, i, i, ve);
2796   }
2797 
2798   gsl_matrix_free(Xt);
2799   gsl_vector_free(beta_temp);
2800   gsl_vector_free(se_beta_temp);
2801 
2802   // If number of phenotypes is above four, then obtain the off
2803   // diagonal elements with two trait models.
2804   if (d_size > 4) {
2805 
2806     // First obtain good initial values.
2807     // Large matrices for EM.
2808     gsl_matrix *U_hat = gsl_matrix_alloc(2, n_size);
2809     gsl_matrix *E_hat = gsl_matrix_alloc(2, n_size);
2810     gsl_matrix *OmegaU = gsl_matrix_alloc(2, n_size);
2811     gsl_matrix *OmegaE = gsl_matrix_alloc(2, n_size);
2812     gsl_matrix *UltVehiY = gsl_matrix_alloc(2, n_size);
2813     gsl_matrix *UltVehiBX = gsl_matrix_alloc(2, n_size);
2814     gsl_matrix *UltVehiU = gsl_matrix_alloc(2, n_size);
2815     gsl_matrix *UltVehiE = gsl_matrix_alloc(2, n_size);
2816 
2817     // Large matrices for NR. Each dxd block is H_k^{-1}.
2818     gsl_matrix *Hi_all = gsl_matrix_alloc(2, 2 * n_size);
2819 
2820     // Each column is H_k^{-1}y_k.
2821     gsl_matrix *Hiy_all = gsl_matrix_alloc(2, n_size);
2822 
2823     // Each dcxdc block is x_k\otimes H_k^{-1}.
2824     gsl_matrix *xHi_all = gsl_matrix_alloc(2 * c_size, 2 * n_size);
2825     gsl_matrix *Hessian = gsl_matrix_alloc(6, 6);
2826 
2827     // 2 by n matrix of Y.
2828     gsl_matrix *Y_sub = gsl_matrix_alloc(2, n_size);
2829     gsl_matrix *Vg_sub = gsl_matrix_alloc(2, 2);
2830     gsl_matrix *Ve_sub = gsl_matrix_alloc(2, 2);
2831     gsl_matrix *B_sub = gsl_matrix_alloc(2, c_size);
2832 
2833     for (size_t i = 0; i < d_size; i++) {
2834       gsl_vector_view Y_sub1 = gsl_matrix_row(Y_sub, 0);
2835       gsl_vector_const_view Y_1 = gsl_matrix_const_row(Y, i);
2836       gsl_vector_memcpy(&Y_sub1.vector, &Y_1.vector);
2837 
2838       for (size_t j = i + 1; j < d_size; j++) {
2839         gsl_vector_view Y_sub2 = gsl_matrix_row(Y_sub, 1);
2840         gsl_vector_const_view Y_2 = gsl_matrix_const_row(Y, j);
2841         gsl_vector_memcpy(&Y_sub2.vector, &Y_2.vector);
2842 
2843         gsl_matrix_set_zero(Vg_sub);
2844         gsl_matrix_set_zero(Ve_sub);
2845         gsl_matrix_set(Vg_sub, 0, 0, gsl_matrix_get(V_g, i, i));
2846         gsl_matrix_set(Ve_sub, 0, 0, gsl_matrix_get(V_e, i, i));
2847         gsl_matrix_set(Vg_sub, 1, 1, gsl_matrix_get(V_g, j, j));
2848         gsl_matrix_set(Ve_sub, 1, 1, gsl_matrix_get(V_e, j, j));
2849 
2850         logl = MphEM('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat,
2851                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
2852                      Vg_sub, Ve_sub, B_sub);
2853         logl = MphNR('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all, xHi_all,
2854                      Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c);
2855 
2856         gsl_matrix_set(V_g, i, j, gsl_matrix_get(Vg_sub, 0, 1));
2857         gsl_matrix_set(V_g, j, i, gsl_matrix_get(Vg_sub, 0, 1));
2858 
2859         gsl_matrix_set(V_e, i, j, ve = gsl_matrix_get(Ve_sub, 0, 1));
2860         gsl_matrix_set(V_e, j, i, ve = gsl_matrix_get(Ve_sub, 0, 1));
2861       }
2862     }
2863 
2864     // Free matrices.
2865     gsl_matrix_free(U_hat);
2866     gsl_matrix_free(E_hat);
2867     gsl_matrix_free(OmegaU);
2868     gsl_matrix_free(OmegaE);
2869     gsl_matrix_free(UltVehiY);
2870     gsl_matrix_free(UltVehiBX);
2871     gsl_matrix_free(UltVehiU);
2872     gsl_matrix_free(UltVehiE);
2873 
2874     gsl_matrix_free(Hi_all);
2875     gsl_matrix_free(Hiy_all);
2876     gsl_matrix_free(xHi_all);
2877     gsl_matrix_free(Hessian);
2878 
2879     gsl_matrix_free(Y_sub);
2880     gsl_matrix_free(Vg_sub);
2881     gsl_matrix_free(Ve_sub);
2882     gsl_matrix_free(B_sub);
2883   }
2884 
2885   // Calculate B hat using GSL estimate.
2886   gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
2887 
2888   gsl_vector *D_l = gsl_vector_alloc(d_size);
2889   gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
2890   gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
2891   gsl_matrix *Qi = gsl_matrix_alloc(d_size * c_size, d_size * c_size);
2892   gsl_vector *XHiy = gsl_vector_alloc(d_size * c_size);
2893   gsl_vector *beta = gsl_vector_alloc(d_size * c_size);
2894 
2895   gsl_vector_set_zero(XHiy);
2896 
2897   double dl, d, delta, dx, dy;
2898 
2899   // Eigen decomposition and calculate log|Ve|.
2900   // double logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
2901   EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
2902 
2903   // Calculate Qi and log|Q|.
2904   // double logdet_Q = CalcQi(eval, D_l, X, Qi);
2905   CalcQi(eval, D_l, X, Qi);
2906 
2907   // Calculate UltVehiY.
2908   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
2909 
2910   // calculate XHiy
2911   for (size_t i = 0; i < d_size; i++) {
2912     dl = gsl_vector_get(D_l, i);
2913 
2914     for (size_t j = 0; j < c_size; j++) {
2915       d = 0.0;
2916       for (size_t k = 0; k < n_size; k++) {
2917         delta = gsl_vector_get(eval, k);
2918         dx = gsl_matrix_get(X, j, k);
2919         dy = gsl_matrix_get(UltVehiY, i, k);
2920         d += dy * dx / (delta * dl + 1.0);
2921       }
2922       gsl_vector_set(XHiy, j * d_size + i, d);
2923     }
2924   }
2925 
2926   gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta);
2927 
2928   // Multiply beta by UltVeh and save to B.
2929   for (size_t i = 0; i < c_size; i++) {
2930     gsl_vector_view B_col = gsl_matrix_column(B, i);
2931     gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size);
2932     gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0,
2933                    &B_col.vector);
2934   }
2935 
2936   // Free memory.
2937   gsl_matrix_free(UltVehiY);
2938 
2939   gsl_vector_free(D_l);
2940   gsl_matrix_free(UltVeh);
2941   gsl_matrix_free(UltVehi);
2942   gsl_matrix_free(Qi);
2943   gsl_vector_free(XHiy);
2944   gsl_vector_free(beta);
2945 
2946   return;
2947 }
2948 
2949 // p-value correction
2950 // mode=1 Wald; mode=2 LRT; mode=3 SCORE;
PCRT(const size_t mode,const size_t d_size,const double p_value,const double crt_a,const double crt_b,const double crt_c)2951 double PCRT(const size_t mode, const size_t d_size, const double p_value,
2952             const double crt_a, const double crt_b, const double crt_c) {
2953   double p_crt = 0.0, chisq_crt = 0.0, q = (double)d_size;
2954   double chisq = gsl_cdf_chisq_Qinv(p_value, (double)d_size);
2955 
2956   if (mode == 1) {
2957     double a = crt_c / (2.0 * q * (q + 2.0));
2958     double b = 1.0 + (crt_a + crt_b) / (2.0 * q);
2959     chisq_crt = (-1.0 * b + safe_sqrt(b * b + 4.0 * a * chisq)) / (2.0 * a);
2960   } else if (mode == 2) {
2961     chisq_crt = chisq / (1.0 + crt_a / (2.0 * q));
2962   } else {
2963     chisq_crt = chisq;
2964   }
2965 
2966   p_crt = gsl_cdf_chisq_Q(chisq_crt, (double)d_size);
2967 
2968   return p_crt;
2969 }
2970 
AnalyzeBimbam(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_matrix * UtY)2971 void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
2972                           const gsl_matrix *UtW, const gsl_matrix *UtY) {
2973   debug_msg("entering");
2974   igzstream infile(file_geno.c_str(), igzstream::in);
2975   if (!infile) {
2976     cout << "error reading genotype file:" << file_geno << endl;
2977     return;
2978   }
2979 
2980   clock_t time_start = clock();
2981   time_UtX = 0;
2982   time_opt = 0;
2983 
2984   string line;
2985   char *ch_ptr;
2986 
2987   double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
2988   double crt_a, crt_b, crt_c;
2989   int n_miss, c_phen;
2990   double geno, x_mean;
2991   size_t c = 0;
2992   size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
2993 
2994   size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
2995 
2996   // Create a large matrix.
2997   size_t msize = LMM_BATCH_SIZE;
2998   gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
2999   gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
3000   gsl_matrix_set_zero(Xlarge);
3001 
3002   // Large matrices for EM.
3003   gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
3004   gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
3005   gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
3006   gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
3007   gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
3008   gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
3009   gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
3010   gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
3011 
3012   // Large matrices for NR.
3013   // Each dxd block is H_k^{-1}.
3014   gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
3015 
3016   // Each column is H_k^{-1}y_k.
3017   gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
3018 
3019   // Each dcxdc block is x_k \otimes H_k^{-1}.
3020   gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
3021   gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
3022 
3023   gsl_vector *x = gsl_vector_alloc(n_size);
3024   gsl_vector *x_miss = gsl_vector_alloc(n_size);
3025 
3026   gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
3027   gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
3028   gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
3029   gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
3030   gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
3031   gsl_vector *beta = gsl_vector_alloc(d_size);
3032   gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
3033 
3034   // Null estimates for initial values.
3035   gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
3036   gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
3037   gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
3038   gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
3039 
3040   gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
3041   gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
3042   gsl_matrix_view xHi_all_sub =
3043       gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
3044 
3045   gsl_matrix_transpose_memcpy(Y, UtY);
3046 
3047   gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
3048 
3049   gsl_vector_view X_row = gsl_matrix_row(X, c_size);
3050   gsl_vector_set_zero(&X_row.vector);
3051   gsl_vector_view B_col = gsl_matrix_column(B, c_size);
3052   gsl_vector_set_zero(&B_col.vector);
3053 
3054   MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
3055              l_max, n_region, V_g, V_e, &B_sub.matrix);
3056   logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
3057                   OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
3058                   V_e, &B_sub.matrix);
3059   logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
3060                   &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
3061                   crt_c);
3062   MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
3063               se_B_null);
3064 
3065   c = 0;
3066   Vg_remle_null.clear();
3067   Ve_remle_null.clear();
3068   for (size_t i = 0; i < d_size; i++) {
3069     for (size_t j = i; j < d_size; j++) {
3070       Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
3071       Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
3072       VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
3073       VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
3074       c++;
3075     }
3076   }
3077   beta_remle_null.clear();
3078   se_beta_remle_null.clear();
3079   for (size_t i = 0; i < se_B_null->size1; i++) {
3080     for (size_t j = 0; j < se_B_null->size2; j++) {
3081       beta_remle_null.push_back(gsl_matrix_get(B, i, j));
3082       se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
3083     }
3084   }
3085   logl_remle_H0 = logl_H0;
3086 
3087   cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
3088   cout.precision(4);
3089 
3090   cout << "REMLE estimate for Vg in the null model: " << endl;
3091   for (size_t i = 0; i < d_size; i++) {
3092     for (size_t j = 0; j <= i; j++) {
3093       cout << gsl_matrix_get(V_g, i, j) << "\t";
3094     }
3095     cout << endl;
3096   }
3097   cout << "se(Vg): " << endl;
3098   for (size_t i = 0; i < d_size; i++) {
3099     for (size_t j = 0; j <= i; j++) {
3100       c = GetIndex(i, j, d_size);
3101       cout << safe_sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
3102     }
3103     cout << endl;
3104   }
3105   cout << "REMLE estimate for Ve in the null model: " << endl;
3106   for (size_t i = 0; i < d_size; i++) {
3107     for (size_t j = 0; j <= i; j++) {
3108       cout << gsl_matrix_get(V_e, i, j) << "\t";
3109     }
3110     cout << endl;
3111   }
3112   cout << "se(Ve): " << endl;
3113   for (size_t i = 0; i < d_size; i++) {
3114     for (size_t j = 0; j <= i; j++) {
3115       c = GetIndex(i, j, d_size);
3116       cout << safe_sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
3117     }
3118     cout << endl;
3119   }
3120   cout << "REMLE likelihood = " << logl_H0 << endl;
3121 
3122   logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
3123                   OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
3124                   V_e, &B_sub.matrix);
3125   logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
3126                   &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
3127                   crt_c);
3128   MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
3129               se_B_null);
3130 
3131   c = 0;
3132   Vg_mle_null.clear();
3133   Ve_mle_null.clear();
3134   for (size_t i = 0; i < d_size; i++) {
3135     for (size_t j = i; j < d_size; j++) {
3136       Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
3137       Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
3138       VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
3139       VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
3140       c++;
3141     }
3142   }
3143   beta_mle_null.clear();
3144   se_beta_mle_null.clear();
3145   for (size_t i = 0; i < se_B_null->size1; i++) {
3146     for (size_t j = 0; j < se_B_null->size2; j++) {
3147       beta_mle_null.push_back(gsl_matrix_get(B, i, j));
3148       se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
3149     }
3150   }
3151   logl_mle_H0 = logl_H0;
3152 
3153   cout << "MLE estimate for Vg in the null model: " << endl;
3154   for (size_t i = 0; i < d_size; i++) {
3155     for (size_t j = 0; j <= i; j++) {
3156       cout << gsl_matrix_get(V_g, i, j) << "\t";
3157     }
3158     cout << endl;
3159   }
3160   cout << "se(Vg): " << endl;
3161   for (size_t i = 0; i < d_size; i++) {
3162     for (size_t j = 0; j <= i; j++) {
3163       c = GetIndex(i, j, d_size);
3164       cout << safe_sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
3165     }
3166     cout << endl;
3167   }
3168   cout << "MLE estimate for Ve in the null model: " << endl;
3169   for (size_t i = 0; i < d_size; i++) {
3170     for (size_t j = 0; j <= i; j++) {
3171       cout << gsl_matrix_get(V_e, i, j) << "\t";
3172     }
3173     cout << endl;
3174   }
3175   cout << "se(Ve): " << endl;
3176   for (size_t i = 0; i < d_size; i++) {
3177     for (size_t j = 0; j <= i; j++) {
3178       c = GetIndex(i, j, d_size);
3179       cout << safe_sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
3180     }
3181     cout << endl;
3182   }
3183   cout << "MLE likelihood = " << logl_H0 << endl;
3184 
3185   vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
3186   for (size_t i = 0; i < d_size; i++) {
3187     v_beta.push_back(0.0);
3188   }
3189   for (size_t i = 0; i < d_size; i++) {
3190     for (size_t j = i; j < d_size; j++) {
3191       v_Vg.push_back(0.0);
3192       v_Ve.push_back(0.0);
3193       v_Vbeta.push_back(0.0);
3194     }
3195   }
3196 
3197   gsl_matrix_memcpy(V_g_null, V_g);
3198   gsl_matrix_memcpy(V_e_null, V_e);
3199   gsl_matrix_memcpy(B_null, B);
3200 
3201   // Start reading genotypes and analyze.
3202   size_t csnp = 0, t_last = 0;
3203   for (size_t t = 0; t < indicator_snp.size(); ++t) {
3204     if (indicator_snp[t] == 0) {
3205       continue;
3206     }
3207     t_last++;
3208   }
3209   for (size_t t = 0; t < indicator_snp.size(); ++t) {
3210     safeGetline(infile, line).eof();
3211     if (t % d_pace == 0 || t == (ns_total - 1)) {
3212       ProgressBar("Reading SNPs", t, ns_total - 1);
3213     }
3214     if (indicator_snp[t] == 0) {
3215       continue;
3216     }
3217 
3218     ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
3219     ch_ptr = strtok_safe(NULL, " , \t");
3220     ch_ptr = strtok_safe(NULL, " , \t");
3221 
3222     x_mean = 0.0;
3223     c_phen = 0;
3224     n_miss = 0;
3225     gsl_vector_set_zero(x_miss);
3226     for (size_t i = 0; i < ni_total; ++i) {
3227       ch_ptr = strtok_safe(NULL, " , \t");
3228       if (indicator_idv[i] == 0) {
3229         continue;
3230       }
3231 
3232       if (strcmp(ch_ptr, "NA") == 0) {
3233         gsl_vector_set(x_miss, c_phen, 0.0);
3234         n_miss++;
3235       } else {
3236         geno = atof(ch_ptr);
3237 
3238         gsl_vector_set(x, c_phen, geno);
3239         gsl_vector_set(x_miss, c_phen, 1.0);
3240         x_mean += geno;
3241       }
3242       c_phen++;
3243     }
3244 
3245     x_mean /= (double)(ni_test - n_miss);
3246 
3247     for (size_t i = 0; i < ni_test; ++i) {
3248       if (gsl_vector_get(x_miss, i) == 0) {
3249         gsl_vector_set(x, i, x_mean);
3250       }
3251       geno = gsl_vector_get(x, i);
3252     }
3253 
3254     gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
3255     gsl_vector_memcpy(&Xlarge_col.vector, x);
3256     csnp++;
3257 
3258     if (csnp % msize == 0 || csnp == t_last) {
3259       size_t l = 0;
3260       if (csnp % msize == 0) {
3261         l = msize;
3262       } else {
3263         l = csnp % msize;
3264       }
3265 
3266       gsl_matrix_view Xlarge_sub =
3267           gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
3268       gsl_matrix_view UtXlarge_sub =
3269           gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
3270 
3271       time_start = clock();
3272       fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
3273                  &UtXlarge_sub.matrix);
3274       time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3275 
3276       gsl_matrix_set_zero(Xlarge);
3277 
3278       for (size_t i = 0; i < l; i++) {
3279         gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
3280         gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
3281 
3282         // Initial values.
3283         gsl_matrix_memcpy(V_g, V_g_null);
3284         gsl_matrix_memcpy(V_e, V_e_null);
3285         gsl_matrix_memcpy(B, B_null);
3286 
3287         time_start = clock();
3288 
3289         // 3 is before 1.
3290         if (a_mode == 3 || a_mode == 4) {
3291           p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
3292                              V_e_null, UltVehiY, beta, Vbeta);
3293           if (p_score < p_nr && crt == 1) {
3294             logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
3295                             Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
3296             p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
3297           }
3298         }
3299 
3300         if (a_mode == 2 || a_mode == 4) {
3301           logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
3302                           E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
3303                           UltVehiE, V_g, V_e, B);
3304 
3305           // Calculate beta and Vbeta.
3306           p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
3307                            UltVehiY, beta, Vbeta);
3308           p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
3309 
3310           if (p_lrt < p_nr) {
3311             logl_H1 =
3312                 MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
3313                       xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
3314 
3315             // Calculate beta and Vbeta.
3316             p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
3317                              UltVehiY, beta, Vbeta);
3318             p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
3319 
3320             if (crt == 1) {
3321               p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
3322             }
3323           }
3324         }
3325 
3326         if (a_mode == 1 || a_mode == 4) {
3327           logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
3328                           E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
3329                           UltVehiE, V_g, V_e, B);
3330           p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
3331                             UltVehiY, beta, Vbeta);
3332 
3333           if (p_wald < p_nr) {
3334             logl_H1 =
3335                 MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
3336                       xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
3337             p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
3338                               UltVehiY, beta, Vbeta);
3339 
3340             if (crt == 1) {
3341               p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
3342             }
3343           }
3344         }
3345 
3346         time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3347 
3348         // Store summary data.
3349         for (size_t i = 0; i < d_size; i++) {
3350           v_beta[i] = gsl_vector_get(beta, i);
3351         }
3352 
3353         c = 0;
3354         for (size_t i = 0; i < d_size; i++) {
3355           for (size_t j = i; j < d_size; j++) {
3356             v_Vg[c] = gsl_matrix_get(V_g, i, j);
3357             v_Ve[c] = gsl_matrix_get(V_e, i, j);
3358             v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
3359             c++;
3360           }
3361         }
3362 
3363         MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
3364         sumStat.push_back(SNPs);
3365       }
3366     }
3367   }
3368   cout << endl;
3369 
3370   infile.close();
3371   infile.clear();
3372 
3373   gsl_matrix_free(U_hat);
3374   gsl_matrix_free(E_hat);
3375   gsl_matrix_free(OmegaU);
3376   gsl_matrix_free(OmegaE);
3377   gsl_matrix_free(UltVehiY);
3378   gsl_matrix_free(UltVehiBX);
3379   gsl_matrix_free(UltVehiU);
3380   gsl_matrix_free(UltVehiE);
3381 
3382   gsl_matrix_free(Hi_all);
3383   gsl_matrix_free(Hiy_all);
3384   gsl_matrix_free(xHi_all);
3385   gsl_matrix_free(Hessian);
3386 
3387   gsl_vector_free(x);
3388   gsl_vector_free(x_miss);
3389 
3390   gsl_matrix_free(Y);
3391   gsl_matrix_free(X);
3392   gsl_matrix_free(V_g);
3393   gsl_matrix_free(V_e);
3394   gsl_matrix_free(B);
3395   gsl_vector_free(beta);
3396   gsl_matrix_free(Vbeta);
3397 
3398   gsl_matrix_free(V_g_null);
3399   gsl_matrix_free(V_e_null);
3400   gsl_matrix_free(B_null);
3401   gsl_matrix_free(se_B_null);
3402 
3403   gsl_matrix_free(Xlarge);
3404   gsl_matrix_free(UtXlarge);
3405 
3406   return;
3407 }
3408 
AnalyzePlink(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_matrix * UtY)3409 void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
3410                          const gsl_matrix *UtW, const gsl_matrix *UtY) {
3411   debug_msg("entering");
3412   string file_bed = file_bfile + ".bed";
3413   ifstream infile(file_bed.c_str(), ios::binary);
3414   if (!infile) {
3415     cout << "error reading bed file:" << file_bed << endl;
3416     return;
3417   }
3418 
3419   clock_t time_start = clock();
3420   time_UtX = 0;
3421   time_opt = 0;
3422 
3423   char ch[1];
3424   bitset<8> b;
3425 
3426   double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
3427   double crt_a, crt_b, crt_c;
3428   int n_bit, n_miss, ci_total, ci_test;
3429   double geno, x_mean;
3430   size_t c = 0;
3431   size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
3432   size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
3433 
3434   // Create a large matrix.
3435   size_t msize = LMM_BATCH_SIZE;
3436   gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
3437   gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
3438   gsl_matrix_set_zero(Xlarge);
3439 
3440   // Large matrices for EM.
3441   gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
3442   gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
3443   gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
3444   gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
3445   gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
3446   gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
3447   gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
3448   gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
3449 
3450   // Large matrices for NR.
3451   // Each dxd block is H_k^{-1}.
3452   gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
3453 
3454   // Each column is H_k^{-1}y_k.
3455   gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
3456 
3457   // Each dcxdc block is x_k\otimes H_k^{-1}.
3458   gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
3459 
3460   gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
3461 
3462   gsl_vector *x = gsl_vector_alloc(n_size);
3463 
3464   gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
3465   gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
3466   gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
3467   gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
3468   gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
3469   gsl_vector *beta = gsl_vector_alloc(d_size);
3470   gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
3471 
3472   // Null estimates for initial values.
3473   gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
3474   gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
3475   gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
3476   gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
3477 
3478   gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
3479   gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
3480   gsl_matrix_view xHi_all_sub =
3481       gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
3482 
3483   gsl_matrix_transpose_memcpy(Y, UtY);
3484   gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
3485 
3486   gsl_vector_view X_row = gsl_matrix_row(X, c_size);
3487   gsl_vector_set_zero(&X_row.vector);
3488   gsl_vector_view B_col = gsl_matrix_column(B, c_size);
3489   gsl_vector_set_zero(&B_col.vector);
3490 
3491   MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
3492              l_max, n_region, V_g, V_e, &B_sub.matrix);
3493 
3494   write(eval,"eval4");
3495 
3496   logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
3497                   OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
3498                   V_e, &B_sub.matrix);
3499   logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
3500                   &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
3501                   crt_c);
3502   MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
3503               se_B_null);
3504 
3505   c = 0;
3506   Vg_remle_null.clear();
3507   Ve_remle_null.clear();
3508   for (size_t i = 0; i < d_size; i++) {
3509     for (size_t j = i; j < d_size; j++) {
3510       Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
3511       Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
3512       VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
3513       VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
3514       c++;
3515     }
3516   }
3517   beta_remle_null.clear();
3518   se_beta_remle_null.clear();
3519   for (size_t i = 0; i < se_B_null->size1; i++) {
3520     for (size_t j = 0; j < se_B_null->size2; j++) {
3521       beta_remle_null.push_back(gsl_matrix_get(B, i, j));
3522       se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
3523     }
3524   }
3525   logl_remle_H0 = logl_H0;
3526 
3527   cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
3528   cout.precision(4);
3529   cout << "REMLE estimate for Vg in the null model: " << endl;
3530   for (size_t i = 0; i < d_size; i++) {
3531     for (size_t j = 0; j <= i; j++) {
3532       cout << gsl_matrix_get(V_g, i, j) << "\t";
3533     }
3534     cout << endl;
3535   }
3536   cout << "se(Vg): " << endl;
3537   for (size_t i = 0; i < d_size; i++) {
3538     for (size_t j = 0; j <= i; j++) {
3539       c = GetIndex(i, j, d_size);
3540       cout << safe_sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
3541     }
3542     cout << endl;
3543   }
3544   cout << "REMLE estimate for Ve in the null model: " << endl;
3545   for (size_t i = 0; i < d_size; i++) {
3546     for (size_t j = 0; j <= i; j++) {
3547       cout << gsl_matrix_get(V_e, i, j) << "\t";
3548     }
3549     cout << endl;
3550   }
3551   cout << "se(Ve): " << endl;
3552   for (size_t i = 0; i < d_size; i++) {
3553     for (size_t j = 0; j <= i; j++) {
3554       c = GetIndex(i, j, d_size);
3555       auto v = gsl_matrix_get(Hessian, c + v_size, c + v_size);
3556       if (is_strict_mode())
3557         enforce_msg(v >= 0,"se(Ve) is not valid");
3558 
3559       cout << safe_sqrt(v) << "\t";
3560     }
3561     cout << endl;
3562   }
3563   cout << "REMLE likelihood = " << logl_H0 << endl;
3564 
3565   logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
3566                   OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
3567                   V_e, &B_sub.matrix);
3568   logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
3569                   &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
3570                   crt_c);
3571   MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
3572               se_B_null);
3573 
3574   c = 0;
3575   Vg_mle_null.clear();
3576   Ve_mle_null.clear();
3577   for (size_t i = 0; i < d_size; i++) {
3578     for (size_t j = i; j < d_size; j++) {
3579       Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
3580       Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
3581       VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
3582       VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
3583       c++;
3584     }
3585   }
3586   beta_mle_null.clear();
3587   se_beta_mle_null.clear();
3588   for (size_t i = 0; i < se_B_null->size1; i++) {
3589     for (size_t j = 0; j < se_B_null->size2; j++) {
3590       beta_mle_null.push_back(gsl_matrix_get(B, i, j));
3591       se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
3592     }
3593   }
3594   logl_mle_H0 = logl_H0;
3595 
3596   cout << "MLE estimate for Vg in the null model: " << endl;
3597   for (size_t i = 0; i < d_size; i++) {
3598     for (size_t j = 0; j <= i; j++) {
3599       cout << gsl_matrix_get(V_g, i, j) << "\t";
3600     }
3601     cout << endl;
3602   }
3603   cout << "se(Vg): " << endl;
3604   for (size_t i = 0; i < d_size; i++) {
3605     for (size_t j = 0; j <= i; j++) {
3606       c = GetIndex(i, j, d_size);
3607       cout << safe_sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
3608     }
3609     cout << endl;
3610   }
3611   cout << "MLE estimate for Ve in the null model: " << endl;
3612   for (size_t i = 0; i < d_size; i++) {
3613     for (size_t j = 0; j <= i; j++) {
3614       cout << gsl_matrix_get(V_e, i, j) << "\t";
3615     }
3616     cout << endl;
3617   }
3618   cout << "se(Ve): " << endl;
3619   for (size_t i = 0; i < d_size; i++) {
3620     for (size_t j = 0; j <= i; j++) {
3621       c = GetIndex(i, j, d_size);
3622       auto v = gsl_matrix_get(Hessian, c + v_size, c + v_size);
3623       if (is_strict_mode())
3624         enforce_msg(v >= 0,"se(Ve) is not valid");
3625       cout << safe_sqrt(v) << "\t";
3626     }
3627     cout << endl;
3628   }
3629   cout << "MLE likelihood = " << logl_H0 << endl;
3630 
3631   vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
3632   for (size_t i = 0; i < d_size; i++) {
3633     v_beta.push_back(0.0);
3634   }
3635   for (size_t i = 0; i < d_size; i++) {
3636     for (size_t j = i; j < d_size; j++) {
3637       v_Vg.push_back(0.0);
3638       v_Ve.push_back(0.0);
3639       v_Vbeta.push_back(0.0);
3640     }
3641   }
3642 
3643   gsl_matrix_memcpy(V_g_null, V_g);
3644   gsl_matrix_memcpy(V_e_null, V_e);
3645   gsl_matrix_memcpy(B_null, B);
3646 
3647   // Start reading genotypes and analyze.
3648   // Calculate n_bit and c, the number of bit for each snp.
3649   if (ni_total % 4 == 0) {
3650     n_bit = ni_total / 4;
3651   } else {
3652     n_bit = ni_total / 4 + 1;
3653   }
3654 
3655   // Print the first three magic numbers.
3656   for (int i = 0; i < 3; ++i) {
3657     infile.read(ch, 1);
3658     b = ch[0];
3659   }
3660 
3661   size_t csnp = 0, t_last = 0;
3662   for (size_t t = 0; t < indicator_snp.size(); ++t) {
3663     if (indicator_snp[t] == 0) {
3664       continue;
3665     }
3666     t_last++;
3667   }
3668   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
3669     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
3670       ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
3671     }
3672     if (indicator_snp[t] == 0) {
3673       continue;
3674     }
3675 
3676     // n_bit, and 3 is the number of magic numbers.
3677     infile.seekg(t * n_bit + 3);
3678 
3679     // read genotypes
3680     x_mean = 0.0;
3681     n_miss = 0;
3682     ci_total = 0;
3683     ci_test = 0;
3684     for (int i = 0; i < n_bit; ++i) {
3685       infile.read(ch, 1);
3686       b = ch[0];
3687 
3688       // Minor allele homozygous: 2.0; major: 0.0;
3689       for (size_t j = 0; j < 4; ++j) {
3690         if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
3691           break;
3692         }
3693         if (indicator_idv[ci_total] == 0) {
3694           ci_total++;
3695           continue;
3696         }
3697 
3698         if (b[2 * j] == 0) {
3699           if (b[2 * j + 1] == 0) {
3700             gsl_vector_set(x, ci_test, 2);
3701             x_mean += 2.0;
3702           } else {
3703             gsl_vector_set(x, ci_test, 1);
3704             x_mean += 1.0;
3705           }
3706         } else {
3707           if (b[2 * j + 1] == 1) {
3708             gsl_vector_set(x, ci_test, 0);
3709           } else {
3710             gsl_vector_set(x, ci_test, -9);
3711             n_miss++;
3712           }
3713         }
3714 
3715         ci_total++;
3716         ci_test++;
3717       }
3718     }
3719 
3720     x_mean /= (double)(ni_test - n_miss);
3721 
3722     for (size_t i = 0; i < ni_test; ++i) {
3723       geno = gsl_vector_get(x, i);
3724       if (geno == -9) {
3725         gsl_vector_set(x, i, x_mean);
3726         geno = x_mean;
3727       }
3728     }
3729 
3730     gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
3731     gsl_vector_memcpy(&Xlarge_col.vector, x);
3732     csnp++;
3733 
3734     if (csnp % msize == 0 || csnp == t_last) {
3735       size_t l = 0;
3736       if (csnp % msize == 0) {
3737         l = msize;
3738       } else {
3739         l = csnp % msize;
3740       }
3741 
3742       gsl_matrix_view Xlarge_sub =
3743           gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
3744       gsl_matrix_view UtXlarge_sub =
3745           gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
3746 
3747       time_start = clock();
3748       fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
3749                      &UtXlarge_sub.matrix);
3750       time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3751 
3752       gsl_matrix_set_zero(Xlarge);
3753 
3754       for (size_t i = 0; i < l; i++) {
3755         gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
3756         gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
3757 
3758         // Initial values.
3759         gsl_matrix_memcpy(V_g, V_g_null);
3760         gsl_matrix_memcpy(V_e, V_e_null);
3761         gsl_matrix_memcpy(B, B_null);
3762 
3763         time_start = clock();
3764 
3765         // 3 is before 1.
3766         if (a_mode == 3 || a_mode == 4) {
3767           p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
3768                              V_e_null, UltVehiY, beta, Vbeta);
3769 
3770           if (p_score < p_nr && crt == 1) {
3771             logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
3772                             Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
3773             p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
3774           }
3775         }
3776 
3777         if (a_mode == 2 || a_mode == 4) {
3778           logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
3779                           E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
3780                           UltVehiE, V_g, V_e, B);
3781 
3782           // Calculate beta and Vbeta.
3783           p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
3784                            UltVehiY, beta, Vbeta);
3785           p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
3786 
3787           if (p_lrt < p_nr) {
3788             logl_H1 =
3789                 MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
3790                       xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
3791 
3792             // Calculate beta and Vbeta.
3793             p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
3794                              UltVehiY, beta, Vbeta);
3795             p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
3796             if (crt == 1) {
3797               p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
3798             }
3799           }
3800         }
3801 
3802         if (a_mode == 1 || a_mode == 4) {
3803           logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
3804                           E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
3805                           UltVehiE, V_g, V_e, B);
3806           p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
3807                             UltVehiY, beta, Vbeta);
3808 
3809           if (p_wald < p_nr) {
3810             logl_H1 =
3811                 MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
3812                       xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
3813             p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
3814                               UltVehiY, beta, Vbeta);
3815 
3816             if (crt == 1) {
3817               p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
3818             }
3819           }
3820         }
3821 
3822         time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3823 
3824         // Store summary data.
3825         for (size_t i = 0; i < d_size; i++) {
3826           v_beta[i] = gsl_vector_get(beta, i);
3827         }
3828 
3829         c = 0;
3830         for (size_t i = 0; i < d_size; i++) {
3831           for (size_t j = i; j < d_size; j++) {
3832             v_Vg[c] = gsl_matrix_get(V_g, i, j);
3833             v_Ve[c] = gsl_matrix_get(V_e, i, j);
3834             v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
3835             c++;
3836           }
3837         }
3838 
3839         MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
3840         sumStat.push_back(SNPs);
3841       }
3842     }
3843   }
3844   cout << endl;
3845 
3846   infile.close();
3847   infile.clear();
3848 
3849   gsl_matrix_free(U_hat);
3850   gsl_matrix_free(E_hat);
3851   gsl_matrix_free(OmegaU);
3852   gsl_matrix_free(OmegaE);
3853   gsl_matrix_free(UltVehiY);
3854   gsl_matrix_free(UltVehiBX);
3855   gsl_matrix_free(UltVehiU);
3856   gsl_matrix_free(UltVehiE);
3857 
3858   gsl_matrix_free(Hi_all);
3859   gsl_matrix_free(Hiy_all);
3860   gsl_matrix_free(xHi_all);
3861   gsl_matrix_free(Hessian);
3862 
3863   gsl_vector_free(x);
3864 
3865   gsl_matrix_free(Y);
3866   gsl_matrix_free(X);
3867   gsl_matrix_free(V_g);
3868   gsl_matrix_free(V_e);
3869   gsl_matrix_free(B);
3870   gsl_vector_free(beta);
3871   gsl_matrix_free(Vbeta);
3872 
3873   gsl_matrix_free(V_g_null);
3874   gsl_matrix_free(V_e_null);
3875   gsl_matrix_free(B_null);
3876   gsl_matrix_free(se_B_null);
3877 
3878   gsl_matrix_free(Xlarge);
3879   gsl_matrix_free(UtXlarge);
3880 
3881   return;
3882 }
3883 
3884 // Calculate Vg, Ve, B, se(B) in the null mvLMM model.
3885 // Both B and se_B are d by c matrices.
CalcMvLmmVgVeBeta(const gsl_vector * eval,const gsl_matrix * UtW,const gsl_matrix * UtY,const size_t em_iter,const size_t nr_iter,const double em_prec,const double nr_prec,const double l_min,const double l_max,const size_t n_region,gsl_matrix * V_g,gsl_matrix * V_e,gsl_matrix * B,gsl_matrix * se_B)3886 void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
3887                        const gsl_matrix *UtY, const size_t em_iter,
3888                        const size_t nr_iter, const double em_prec,
3889                        const double nr_prec, const double l_min,
3890                        const double l_max, const size_t n_region,
3891                        gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
3892                        gsl_matrix *se_B) {
3893   size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
3894   size_t dc_size = d_size * c_size, v_size = d_size * (d_size + 1) / 2;
3895 
3896   double crt_a, crt_b, crt_c;
3897 
3898   // Large matrices for EM.
3899   gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
3900   gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
3901   gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
3902   gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
3903   gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
3904   gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
3905   gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
3906   gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
3907 
3908   // Large matrices for NR.
3909   // Each dxd block is H_k^{-1}.
3910   gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
3911 
3912   // Each column is H_k^{-1}y_k.
3913   gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
3914 
3915   // Each dcxdc block is x_k\otimes H_k^{-1}.
3916   gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
3917   gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
3918 
3919   // Transpose matrices.
3920   gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
3921   gsl_matrix *W = gsl_matrix_alloc(c_size, n_size);
3922   gsl_matrix_transpose_memcpy(Y, UtY);
3923   gsl_matrix_transpose_memcpy(W, UtW);
3924 
3925   // Initial, EM, NR, and calculate B.
3926   MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max,
3927              n_region, V_g, V_e, B);
3928   MphEM('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE,
3929         UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
3930   MphNR('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g,
3931         V_e, Hessian, crt_a, crt_b, crt_c);
3932   MphCalcBeta(eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
3933 
3934   // Free matrices.
3935   gsl_matrix_free(U_hat);
3936   gsl_matrix_free(E_hat);
3937   gsl_matrix_free(OmegaU);
3938   gsl_matrix_free(OmegaE);
3939   gsl_matrix_free(UltVehiY);
3940   gsl_matrix_free(UltVehiBX);
3941   gsl_matrix_free(UltVehiU);
3942   gsl_matrix_free(UltVehiE);
3943 
3944   gsl_matrix_free(Hi_all);
3945   gsl_matrix_free(Hiy_all);
3946   gsl_matrix_free(xHi_all);
3947   gsl_matrix_free(Hessian);
3948 
3949   gsl_matrix_free(Y);
3950   gsl_matrix_free(W);
3951 
3952   return;
3953 }
3954 
AnalyzeBimbamGXE(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_matrix * UtY,const gsl_vector * env)3955 void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
3956                              const gsl_matrix *UtW, const gsl_matrix *UtY,
3957                              const gsl_vector *env) {
3958   debug_msg("entering");
3959   igzstream infile(file_geno.c_str(), igzstream::in);
3960   if (!infile) {
3961     cout << "error reading genotype file:" << file_geno << endl;
3962     return;
3963   }
3964 
3965   clock_t time_start = clock();
3966   time_UtX = 0;
3967   time_opt = 0;
3968 
3969   string line;
3970   char *ch_ptr;
3971 
3972   double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
3973   double crt_a, crt_b, crt_c;
3974   int n_miss, c_phen;
3975   double geno, x_mean;
3976   size_t c = 0;
3977   size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2;
3978   size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
3979 
3980   // Large matrices for EM.
3981   gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
3982   gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
3983   gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
3984   gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
3985   gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
3986   gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
3987   gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
3988   gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
3989 
3990   // Large matrices for NR.
3991   // Each dxd block is H_k^{-1}.
3992   gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
3993 
3994   // Each column is H_k^{-1}y_k.
3995   gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
3996 
3997   // Each dcxdc block is x_k\otimes H_k^{-1}.
3998   gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
3999   gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
4000 
4001   gsl_vector *x = gsl_vector_alloc(n_size);
4002   gsl_vector *x_miss = gsl_vector_alloc(n_size);
4003 
4004   gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
4005   gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
4006   gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
4007   gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
4008   gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
4009   gsl_vector *beta = gsl_vector_alloc(d_size);
4010   gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
4011 
4012   // Null estimates for initial values; including env but not
4013   // including x.
4014   gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
4015   gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
4016   gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
4017   gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1);
4018   gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size);
4019 
4020   gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size);
4021   gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1);
4022   gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix(
4023       xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size);
4024 
4025   gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
4026   gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
4027   gsl_matrix_view xHi_all_sub2 =
4028       gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
4029 
4030   gsl_matrix_transpose_memcpy(Y, UtY);
4031 
4032   gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size);
4033   gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW);
4034   gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2);
4035   gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
4036 
4037   gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1);
4038   gsl_vector_set_zero(&X_row1.vector);
4039   gsl_vector_view X_row2 = gsl_matrix_row(X, c_size);
4040   gsl_vector_set_zero(&X_row2.vector);
4041 
4042   gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1);
4043   gsl_vector_set_zero(&B_col1.vector);
4044   gsl_vector_view B_col2 = gsl_matrix_column(B, c_size);
4045   gsl_vector_set_zero(&B_col2.vector);
4046 
4047   MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min,
4048              l_max, n_region, V_g, V_e, &B_sub1.matrix);
4049   logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
4050                   OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
4051                   V_e, &B_sub1.matrix);
4052   logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
4053                   &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
4054                   crt_b, crt_c);
4055   MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
4056               se_B_null1);
4057 
4058   c = 0;
4059   Vg_remle_null.clear();
4060   Ve_remle_null.clear();
4061   for (size_t i = 0; i < d_size; i++) {
4062     for (size_t j = i; j < d_size; j++) {
4063       Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
4064       Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
4065       VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
4066       VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
4067       c++;
4068     }
4069   }
4070   beta_remle_null.clear();
4071   se_beta_remle_null.clear();
4072   for (size_t i = 0; i < se_B_null1->size1; i++) {
4073     for (size_t j = 0; j < se_B_null1->size2; j++) {
4074       beta_remle_null.push_back(gsl_matrix_get(B, i, j));
4075       se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
4076     }
4077   }
4078   logl_remle_H0 = logl_H0;
4079 
4080   cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
4081   cout.precision(4);
4082 
4083   cout << "REMLE estimate for Vg in the null model: " << endl;
4084   for (size_t i = 0; i < d_size; i++) {
4085     for (size_t j = 0; j <= i; j++) {
4086       cout << gsl_matrix_get(V_g, i, j) << "\t";
4087     }
4088     cout << endl;
4089   }
4090   cout << "se(Vg): " << endl;
4091   for (size_t i = 0; i < d_size; i++) {
4092     for (size_t j = 0; j <= i; j++) {
4093       c = GetIndex(i, j, d_size);
4094       cout << safe_sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
4095     }
4096     cout << endl;
4097   }
4098   cout << "REMLE estimate for Ve in the null model: " << endl;
4099   for (size_t i = 0; i < d_size; i++) {
4100     for (size_t j = 0; j <= i; j++) {
4101       cout << gsl_matrix_get(V_e, i, j) << "\t";
4102     }
4103     cout << endl;
4104   }
4105   cout << "se(Ve): " << endl;
4106   for (size_t i = 0; i < d_size; i++) {
4107     for (size_t j = 0; j <= i; j++) {
4108       c = GetIndex(i, j, d_size);
4109       cout << safe_sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
4110     }
4111     cout << endl;
4112   }
4113   cout << "REMLE likelihood = " << logl_H0 << endl;
4114 
4115   logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
4116                   OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
4117                   V_e, &B_sub1.matrix);
4118   logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
4119                   &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
4120                   crt_b, crt_c);
4121   MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
4122               se_B_null1);
4123 
4124   c = 0;
4125   Vg_mle_null.clear();
4126   Ve_mle_null.clear();
4127   for (size_t i = 0; i < d_size; i++) {
4128     for (size_t j = i; j < d_size; j++) {
4129       Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
4130       Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
4131       VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
4132       VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
4133       c++;
4134     }
4135   }
4136   beta_mle_null.clear();
4137   se_beta_mle_null.clear();
4138   for (size_t i = 0; i < se_B_null1->size1; i++) {
4139     for (size_t j = 0; j < se_B_null1->size2; j++) {
4140       beta_mle_null.push_back(gsl_matrix_get(B, i, j));
4141       se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
4142     }
4143   }
4144   logl_mle_H0 = logl_H0;
4145 
4146   cout << "MLE estimate for Vg in the null model: " << endl;
4147   for (size_t i = 0; i < d_size; i++) {
4148     for (size_t j = 0; j <= i; j++) {
4149       cout << gsl_matrix_get(V_g, i, j) << "\t";
4150     }
4151     cout << endl;
4152   }
4153   cout << "se(Vg): " << endl;
4154   for (size_t i = 0; i < d_size; i++) {
4155     for (size_t j = 0; j <= i; j++) {
4156       c = GetIndex(i, j, d_size);
4157       cout << safe_sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
4158     }
4159     cout << endl;
4160   }
4161   cout << "MLE estimate for Ve in the null model: " << endl;
4162   for (size_t i = 0; i < d_size; i++) {
4163     for (size_t j = 0; j <= i; j++) {
4164       cout << gsl_matrix_get(V_e, i, j) << "\t";
4165     }
4166     cout << endl;
4167   }
4168   cout << "se(Ve): " << endl;
4169   for (size_t i = 0; i < d_size; i++) {
4170     for (size_t j = 0; j <= i; j++) {
4171       c = GetIndex(i, j, d_size);
4172       cout << safe_sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
4173     }
4174     cout << endl;
4175   }
4176   cout << "MLE likelihood = " << logl_H0 << endl;
4177 
4178   vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
4179   for (size_t i = 0; i < d_size; i++) {
4180     v_beta.push_back(0.0);
4181   }
4182   for (size_t i = 0; i < d_size; i++) {
4183     for (size_t j = i; j < d_size; j++) {
4184       v_Vg.push_back(0.0);
4185       v_Ve.push_back(0.0);
4186       v_Vbeta.push_back(0.0);
4187     }
4188   }
4189 
4190   gsl_matrix_memcpy(V_g_null, V_g);
4191   gsl_matrix_memcpy(V_e_null, V_e);
4192   gsl_matrix_memcpy(B_null, B);
4193 
4194   // Start reading genotypes and analyze.
4195   for (size_t t = 0; t < indicator_snp.size(); ++t) {
4196     safeGetline(infile, line).eof();
4197     if (t % d_pace == 0 || t == (ns_total - 1)) {
4198       ProgressBar("Reading SNPs", t, ns_total - 1);
4199     }
4200     if (indicator_snp[t] == 0) {
4201       continue;
4202     }
4203 
4204     ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
4205     ch_ptr = strtok_safe(NULL, " , \t");
4206     ch_ptr = strtok_safe(NULL, " , \t");
4207 
4208     x_mean = 0.0;
4209     c_phen = 0;
4210     n_miss = 0;
4211     gsl_vector_set_zero(x_miss);
4212     for (size_t i = 0; i < ni_total; ++i) {
4213       ch_ptr = strtok_safe(NULL, " , \t");
4214       if (indicator_idv[i] == 0) {
4215         continue;
4216       }
4217 
4218       if (strcmp(ch_ptr, "NA") == 0) {
4219         gsl_vector_set(x_miss, c_phen, 0.0);
4220         n_miss++;
4221       } else {
4222         geno = atof(ch_ptr);
4223 
4224         gsl_vector_set(x, c_phen, geno);
4225         gsl_vector_set(x_miss, c_phen, 1.0);
4226         x_mean += geno;
4227       }
4228       c_phen++;
4229     }
4230 
4231     x_mean /= (double)(ni_test - n_miss);
4232 
4233     for (size_t i = 0; i < ni_test; ++i) {
4234       if (gsl_vector_get(x_miss, i) == 0) {
4235         gsl_vector_set(x, i, x_mean);
4236       }
4237       geno = gsl_vector_get(x, i);
4238       if (x_mean > 1) {
4239         gsl_vector_set(x, i, 2 - geno);
4240       }
4241     }
4242 
4243     // Calculate statistics.
4244     time_start = clock();
4245     gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
4246     gsl_vector_mul(x, env);
4247     gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
4248     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
4249 
4250     // initial values
4251     gsl_matrix_memcpy(V_g, V_g_null);
4252     gsl_matrix_memcpy(V_e, V_e_null);
4253     gsl_matrix_memcpy(B, B_null);
4254 
4255     if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
4256       if (a_mode == 3 || a_mode == 4) {
4257         logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
4258                         Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
4259                         UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
4260         logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
4261                         Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
4262                         Hessian, crt_a, crt_b, crt_c);
4263         MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
4264                     se_B_null2);
4265       }
4266 
4267       if (a_mode == 2 || a_mode == 4) {
4268         logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
4269                         Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
4270                         UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
4271         logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
4272                         Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
4273                         Hessian, crt_a, crt_b, crt_c);
4274         MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
4275                     se_B_null2);
4276       }
4277     }
4278 
4279     time_start = clock();
4280 
4281     // 3 is before 1.
4282     if (a_mode == 3 || a_mode == 4) {
4283       p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null,
4284                          V_e_null, UltVehiY, beta, Vbeta);
4285       if (p_score < p_nr && crt == 1) {
4286         logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
4287                         Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
4288         p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
4289       }
4290     }
4291 
4292     if (a_mode == 2 || a_mode == 4) {
4293       logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
4294                       OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
4295                       V_g, V_e, B);
4296 
4297       // Calculate beta and Vbeta.
4298       p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
4299                        UltVehiY, beta, Vbeta);
4300       p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
4301 
4302       if (p_lrt < p_nr) {
4303         logl_H1 =
4304             MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
4305                   Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
4306 
4307         // Calculate beta and Vbeta.
4308         p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
4309                          UltVehiY, beta, Vbeta);
4310         p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
4311 
4312         if (crt == 1) {
4313           p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
4314         }
4315       }
4316     }
4317 
4318     if (a_mode == 1 || a_mode == 4) {
4319       logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
4320                       OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
4321                       V_g, V_e, B);
4322       p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
4323                         UltVehiY, beta, Vbeta);
4324 
4325       if (p_wald < p_nr) {
4326         logl_H1 =
4327             MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
4328                   Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
4329         p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
4330                           UltVehiY, beta, Vbeta);
4331 
4332         if (crt == 1) {
4333           p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
4334         }
4335       }
4336     }
4337 
4338     if (x_mean > 1) {
4339       gsl_vector_scale(beta, -1.0);
4340     }
4341 
4342     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
4343 
4344     // Store summary data.
4345     for (size_t i = 0; i < d_size; i++) {
4346       v_beta[i] = gsl_vector_get(beta, i);
4347     }
4348 
4349     c = 0;
4350     for (size_t i = 0; i < d_size; i++) {
4351       for (size_t j = i; j < d_size; j++) {
4352         v_Vg[c] = gsl_matrix_get(V_g, i, j);
4353         v_Ve[c] = gsl_matrix_get(V_e, i, j);
4354         v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
4355         c++;
4356       }
4357     }
4358 
4359     MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
4360     sumStat.push_back(SNPs);
4361   }
4362   cout << endl;
4363 
4364   infile.close();
4365   infile.clear();
4366 
4367   gsl_matrix_free(U_hat);
4368   gsl_matrix_free(E_hat);
4369   gsl_matrix_free(OmegaU);
4370   gsl_matrix_free(OmegaE);
4371   gsl_matrix_free(UltVehiY);
4372   gsl_matrix_free(UltVehiBX);
4373   gsl_matrix_free(UltVehiU);
4374   gsl_matrix_free(UltVehiE);
4375 
4376   gsl_matrix_free(Hi_all);
4377   gsl_matrix_free(Hiy_all);
4378   gsl_matrix_free(xHi_all);
4379   gsl_matrix_free(Hessian);
4380 
4381   gsl_vector_free(x);
4382   gsl_vector_free(x_miss);
4383 
4384   gsl_matrix_free(Y);
4385   gsl_matrix_free(X);
4386   gsl_matrix_free(V_g);
4387   gsl_matrix_free(V_e);
4388   gsl_matrix_free(B);
4389   gsl_vector_free(beta);
4390   gsl_matrix_free(Vbeta);
4391 
4392   gsl_matrix_free(V_g_null);
4393   gsl_matrix_free(V_e_null);
4394   gsl_matrix_free(B_null);
4395   gsl_matrix_free(se_B_null1);
4396   gsl_matrix_free(se_B_null2);
4397 
4398   return;
4399 }
4400 
AnalyzePlinkGXE(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_matrix * UtY,const gsl_vector * env)4401 void MVLMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
4402                             const gsl_matrix *UtW, const gsl_matrix *UtY,
4403                             const gsl_vector *env) {
4404   debug_msg("entering");
4405   string file_bed = file_bfile + ".bed";
4406   ifstream infile(file_bed.c_str(), ios::binary);
4407   if (!infile) {
4408     cout << "error reading bed file:" << file_bed << endl;
4409     return;
4410   }
4411 
4412   clock_t time_start = clock();
4413   time_UtX = 0;
4414   time_opt = 0;
4415 
4416   char ch[1];
4417   bitset<8> b;
4418 
4419   double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
4420   double crt_a, crt_b, crt_c;
4421   int n_bit, n_miss, ci_total, ci_test;
4422   double geno, x_mean;
4423   size_t c = 0;
4424   size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2;
4425   size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
4426 
4427   // Large matrices for EM.
4428   gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
4429   gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
4430   gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
4431   gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
4432   gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
4433   gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
4434   gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
4435   gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
4436 
4437   // Large matrices for NR.
4438   // Each dxd block is H_k^{-1}.
4439   gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
4440 
4441   // Each column is H_k^{-1}y_k
4442   gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
4443 
4444   // Each dcxdc block is x_k\otimes H_k^{-1}.
4445   gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
4446   gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
4447 
4448   gsl_vector *x = gsl_vector_alloc(n_size);
4449 
4450   gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
4451   gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
4452   gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
4453   gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
4454   gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
4455   gsl_vector *beta = gsl_vector_alloc(d_size);
4456   gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
4457 
4458   // Null estimates for initial values.
4459   gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
4460   gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
4461   gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
4462   gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1);
4463   gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size);
4464 
4465   gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size);
4466   gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1);
4467   gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix(
4468       xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size);
4469 
4470   gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
4471   gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
4472   gsl_matrix_view xHi_all_sub2 =
4473       gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
4474 
4475   gsl_matrix_transpose_memcpy(Y, UtY);
4476 
4477   gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size);
4478   gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW);
4479   gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2);
4480   gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
4481 
4482   gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1);
4483   gsl_vector_set_zero(&X_row1.vector);
4484   gsl_vector_view X_row2 = gsl_matrix_row(X, c_size);
4485   gsl_vector_set_zero(&X_row2.vector);
4486 
4487   gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1);
4488   gsl_vector_set_zero(&B_col1.vector);
4489   gsl_vector_view B_col2 = gsl_matrix_column(B, c_size);
4490   gsl_vector_set_zero(&B_col2.vector);
4491 
4492   MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min,
4493              l_max, n_region, V_g, V_e, &B_sub1.matrix);
4494 
4495   logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
4496                   OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
4497                   V_e, &B_sub1.matrix);
4498   logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
4499                   &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
4500                   crt_b, crt_c);
4501   MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
4502               se_B_null1);
4503 
4504   c = 0;
4505   Vg_remle_null.clear();
4506   Ve_remle_null.clear();
4507   for (size_t i = 0; i < d_size; i++) {
4508     for (size_t j = i; j < d_size; j++) {
4509       Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
4510       Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
4511       VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
4512       VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
4513       c++;
4514     }
4515   }
4516   beta_remle_null.clear();
4517   se_beta_remle_null.clear();
4518   for (size_t i = 0; i < se_B_null1->size1; i++) {
4519     for (size_t j = 0; j < se_B_null1->size2; j++) {
4520       beta_remle_null.push_back(gsl_matrix_get(B, i, j));
4521       se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
4522     }
4523   }
4524   logl_remle_H0 = logl_H0;
4525 
4526   cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
4527   cout.precision(4);
4528   cout << "REMLE estimate for Vg in the null model: " << endl;
4529   for (size_t i = 0; i < d_size; i++) {
4530     for (size_t j = 0; j <= i; j++) {
4531       cout << gsl_matrix_get(V_g, i, j) << "\t";
4532     }
4533     cout << endl;
4534   }
4535   cout << "se(Vg): " << endl;
4536   for (size_t i = 0; i < d_size; i++) {
4537     for (size_t j = 0; j <= i; j++) {
4538       c = GetIndex(i, j, d_size);
4539       cout << safe_sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
4540     }
4541     cout << endl;
4542   }
4543   cout << "REMLE estimate for Ve in the null model: " << endl;
4544   for (size_t i = 0; i < d_size; i++) {
4545     for (size_t j = 0; j <= i; j++) {
4546       cout << gsl_matrix_get(V_e, i, j) << "\t";
4547     }
4548     cout << endl;
4549   }
4550   cout << "se(Ve): " << endl;
4551   for (size_t i = 0; i < d_size; i++) {
4552     for (size_t j = 0; j <= i; j++) {
4553       c = GetIndex(i, j, d_size);
4554       cout << safe_sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
4555     }
4556     cout << endl;
4557   }
4558   cout << "REMLE likelihood = " << logl_H0 << endl;
4559 
4560   logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
4561                   OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
4562                   V_e, &B_sub1.matrix);
4563   logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
4564                   &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
4565                   crt_b, crt_c);
4566   MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
4567               se_B_null1);
4568 
4569   c = 0;
4570   Vg_mle_null.clear();
4571   Ve_mle_null.clear();
4572   for (size_t i = 0; i < d_size; i++) {
4573     for (size_t j = i; j < d_size; j++) {
4574       Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
4575       Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
4576       VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
4577       VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
4578       c++;
4579     }
4580   }
4581   beta_mle_null.clear();
4582   se_beta_mle_null.clear();
4583   for (size_t i = 0; i < se_B_null1->size1; i++) {
4584     for (size_t j = 0; j < se_B_null1->size2; j++) {
4585       beta_mle_null.push_back(gsl_matrix_get(B, i, j));
4586       se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
4587     }
4588   }
4589   logl_mle_H0 = logl_H0;
4590 
4591   cout << "MLE estimate for Vg in the null model: " << endl;
4592   for (size_t i = 0; i < d_size; i++) {
4593     for (size_t j = 0; j <= i; j++) {
4594       cout << gsl_matrix_get(V_g, i, j) << "\t";
4595     }
4596     cout << endl;
4597   }
4598   cout << "se(Vg): " << endl;
4599   for (size_t i = 0; i < d_size; i++) {
4600     for (size_t j = 0; j <= i; j++) {
4601       c = GetIndex(i, j, d_size);
4602       cout << safe_sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
4603     }
4604     cout << endl;
4605   }
4606   cout << "MLE estimate for Ve in the null model: " << endl;
4607   for (size_t i = 0; i < d_size; i++) {
4608     for (size_t j = 0; j <= i; j++) {
4609       cout << gsl_matrix_get(V_e, i, j) << "\t";
4610     }
4611     cout << endl;
4612   }
4613   cout << "se(Ve): " << endl;
4614   for (size_t i = 0; i < d_size; i++) {
4615     for (size_t j = 0; j <= i; j++) {
4616       c = GetIndex(i, j, d_size);
4617       cout << safe_sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
4618     }
4619     cout << endl;
4620   }
4621   cout << "MLE likelihood = " << logl_H0 << endl;
4622 
4623   vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
4624   for (size_t i = 0; i < d_size; i++) {
4625     v_beta.push_back(0.0);
4626   }
4627   for (size_t i = 0; i < d_size; i++) {
4628     for (size_t j = i; j < d_size; j++) {
4629       v_Vg.push_back(0.0);
4630       v_Ve.push_back(0.0);
4631       v_Vbeta.push_back(0.0);
4632     }
4633   }
4634 
4635   gsl_matrix_memcpy(V_g_null, V_g);
4636   gsl_matrix_memcpy(V_e_null, V_e);
4637   gsl_matrix_memcpy(B_null, B);
4638 
4639   // Start reading genotypes and analyze.
4640   // Calculate n_bit and c, the number of bit for each SNP.
4641   if (ni_total % 4 == 0) {
4642     n_bit = ni_total / 4;
4643   } else {
4644     n_bit = ni_total / 4 + 1;
4645   }
4646 
4647   // Print the first three magic numbers.
4648   for (int i = 0; i < 3; ++i) {
4649     infile.read(ch, 1);
4650     b = ch[0];
4651   }
4652 
4653   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
4654     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
4655       ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
4656     }
4657     if (indicator_snp[t] == 0) {
4658       continue;
4659     }
4660 
4661     // n_bit, and 3 is the number of magic numbers.
4662     infile.seekg(t * n_bit + 3);
4663 
4664     // Read genotypes.
4665     x_mean = 0.0;
4666     n_miss = 0;
4667     ci_total = 0;
4668     ci_test = 0;
4669     for (int i = 0; i < n_bit; ++i) {
4670       infile.read(ch, 1);
4671       b = ch[0];
4672 
4673       // Minor allele homozygous: 2.0; major: 0.0.
4674       for (size_t j = 0; j < 4; ++j) {
4675 
4676         if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
4677           break;
4678         }
4679         if (indicator_idv[ci_total] == 0) {
4680           ci_total++;
4681           continue;
4682         }
4683 
4684         if (b[2 * j] == 0) {
4685           if (b[2 * j + 1] == 0) {
4686             gsl_vector_set(x, ci_test, 2);
4687             x_mean += 2.0;
4688           } else {
4689             gsl_vector_set(x, ci_test, 1);
4690             x_mean += 1.0;
4691           }
4692         } else {
4693           if (b[2 * j + 1] == 1) {
4694             gsl_vector_set(x, ci_test, 0);
4695           } else {
4696             gsl_vector_set(x, ci_test, -9);
4697             n_miss++;
4698           }
4699         }
4700 
4701         ci_total++;
4702         ci_test++;
4703       }
4704     }
4705 
4706     x_mean /= (double)(ni_test - n_miss);
4707 
4708     for (size_t i = 0; i < ni_test; ++i) {
4709       geno = gsl_vector_get(x, i);
4710       if (geno == -9) {
4711         gsl_vector_set(x, i, x_mean);
4712         geno = x_mean;
4713       }
4714       if (x_mean > 1) {
4715         gsl_vector_set(x, i, 2 - geno);
4716       }
4717     }
4718 
4719     // Calculate statistics.
4720     time_start = clock();
4721     gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
4722     gsl_vector_mul(x, env);
4723     gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
4724     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
4725 
4726     // Initial values.
4727     gsl_matrix_memcpy(V_g, V_g_null);
4728     gsl_matrix_memcpy(V_e, V_e_null);
4729     gsl_matrix_memcpy(B, B_null);
4730 
4731     if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
4732       if (a_mode == 3 || a_mode == 4) {
4733         logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
4734                         Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
4735                         UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
4736         logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
4737                         Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
4738                         Hessian, crt_a, crt_b, crt_c);
4739         MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
4740                     se_B_null2);
4741       }
4742 
4743       if (a_mode == 2 || a_mode == 4) {
4744         logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
4745                         Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
4746                         UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
4747         logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
4748                         Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
4749                         Hessian, crt_a, crt_b, crt_c);
4750         MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
4751                     se_B_null2);
4752       }
4753     }
4754 
4755     time_start = clock();
4756 
4757     // 3 is before 1.
4758     if (a_mode == 3 || a_mode == 4) {
4759       p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null,
4760                          V_e_null, UltVehiY, beta, Vbeta);
4761 
4762       if (p_score < p_nr && crt == 1) {
4763         logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
4764                         Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
4765         p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
4766       }
4767     }
4768 
4769     if (a_mode == 2 || a_mode == 4) {
4770       logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
4771                       OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
4772                       V_g, V_e, B);
4773 
4774       // Calculate beta and Vbeta.
4775       p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
4776                        UltVehiY, beta, Vbeta);
4777       p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
4778 
4779       if (p_lrt < p_nr) {
4780         logl_H1 =
4781             MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
4782                   Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
4783 
4784         // Calculate beta and Vbeta.
4785         p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
4786                          UltVehiY, beta, Vbeta);
4787         p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
4788         if (crt == 1) {
4789           p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
4790         }
4791       }
4792     }
4793 
4794     if (a_mode == 1 || a_mode == 4) {
4795       logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
4796                       OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
4797                       V_g, V_e, B);
4798       p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
4799                         UltVehiY, beta, Vbeta);
4800 
4801       if (p_wald < p_nr) {
4802         logl_H1 =
4803             MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
4804                   Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
4805         p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
4806                           UltVehiY, beta, Vbeta);
4807 
4808         if (crt == 1) {
4809           p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
4810         }
4811       }
4812     }
4813 
4814     if (x_mean > 1) {
4815       gsl_vector_scale(beta, -1.0);
4816     }
4817 
4818     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
4819 
4820     // Store summary data.
4821     for (size_t i = 0; i < d_size; i++) {
4822       v_beta[i] = gsl_vector_get(beta, i);
4823     }
4824 
4825     c = 0;
4826     for (size_t i = 0; i < d_size; i++) {
4827       for (size_t j = i; j < d_size; j++) {
4828         v_Vg[c] = gsl_matrix_get(V_g, i, j);
4829         v_Ve[c] = gsl_matrix_get(V_e, i, j);
4830         v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
4831         c++;
4832       }
4833     }
4834 
4835     MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
4836     sumStat.push_back(SNPs);
4837   }
4838   cout << endl;
4839 
4840   infile.close();
4841   infile.clear();
4842 
4843   gsl_matrix_free(U_hat);
4844   gsl_matrix_free(E_hat);
4845   gsl_matrix_free(OmegaU);
4846   gsl_matrix_free(OmegaE);
4847   gsl_matrix_free(UltVehiY);
4848   gsl_matrix_free(UltVehiBX);
4849   gsl_matrix_free(UltVehiU);
4850   gsl_matrix_free(UltVehiE);
4851 
4852   gsl_matrix_free(Hi_all);
4853   gsl_matrix_free(Hiy_all);
4854   gsl_matrix_free(xHi_all);
4855   gsl_matrix_free(Hessian);
4856 
4857   gsl_vector_free(x);
4858 
4859   gsl_matrix_free(Y);
4860   gsl_matrix_free(X);
4861   gsl_matrix_free(V_g);
4862   gsl_matrix_free(V_e);
4863   gsl_matrix_free(B);
4864   gsl_vector_free(beta);
4865   gsl_matrix_free(Vbeta);
4866 
4867   gsl_matrix_free(V_g_null);
4868   gsl_matrix_free(V_e_null);
4869   gsl_matrix_free(B_null);
4870   gsl_matrix_free(se_B_null1);
4871   gsl_matrix_free(se_B_null2);
4872 
4873   return;
4874 }
4875