1 /*
2  Genome-wide Efficient Mixed Model Association (GEMMA)
3  Copyright (C) 2011-2017, Xiang Zhou
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include <fstream>
20 #include <iostream>
21 #include <sstream>
22 
23 #include <bitset>
24 #include <cmath>
25 #include <cstring>
26 #include <iomanip>
27 #include <iostream>
28 #include <map>
29 #include <set>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string>
33 #include <vector>
34 
35 #include "gsl/gsl_blas.h"
36 #include "gsl/gsl_linalg.h"
37 #include "gsl/gsl_matrix.h"
38 #include "gsl/gsl_vector.h"
39 
40 #include "gsl/gsl_cdf.h"
41 #include "gsl/gsl_min.h"
42 #include "gsl/gsl_multiroots.h"
43 
44 // #include "Eigen/Dense"
45 
46 #include "gzstream.h"
47 #include "gemma_io.h"
48 #include "lapack.h"
49 #include "lmm.h"
50 #include "mathfunc.h"
51 #include "param.h"
52 #include "vc.h"
53 #include "fastblas.h"
54 
55 using namespace std;
56 // using namespace Eigen;
57 
58 // In this file, X, Y are already transformed (i.e. UtX and UtY).
CopyFromParam(PARAM & cPar)59 void VC::CopyFromParam(PARAM &cPar) {
60   a_mode = cPar.a_mode;
61 
62   file_cat = cPar.file_cat;
63   file_beta = cPar.file_beta;
64   file_cor = cPar.file_cor;
65 
66   setSnps = cPar.setSnps;
67 
68   file_out = cPar.file_out;
69   path_out = cPar.path_out;
70 
71   time_UtX = 0.0;
72   time_opt = 0.0;
73 
74   v_traceG = cPar.v_traceG;
75 
76   ni_total = cPar.ni_total;
77   ns_total = cPar.ns_total;
78   ns_test = cPar.ns_test;
79 
80   crt = cPar.crt;
81   window_cm = cPar.window_cm;
82   window_bp = cPar.window_bp;
83   window_ns = cPar.window_ns;
84 
85   n_vc = cPar.n_vc;
86 
87   return;
88 }
89 
CopyToParam(PARAM & cPar)90 void VC::CopyToParam(PARAM &cPar) {
91   cPar.time_UtX = time_UtX;
92   cPar.time_opt = time_opt;
93 
94   cPar.v_pve = v_pve;
95   cPar.v_se_pve = v_se_pve;
96   cPar.v_sigma2 = v_sigma2;
97   cPar.v_se_sigma2 = v_se_sigma2;
98   cPar.pve_total = pve_total;
99   cPar.se_pve_total = se_pve_total;
100   cPar.v_traceG = v_traceG;
101 
102   cPar.v_beta = v_beta;
103   cPar.v_se_beta = v_se_beta;
104 
105   cPar.ni_total = ni_total;
106   cPar.ns_total = ns_total;
107   cPar.ns_test = ns_test;
108 
109   cPar.n_vc = n_vc;
110 
111   return;
112 }
113 
WriteFile_qs(const gsl_vector * s_vec,const gsl_vector * q_vec,const gsl_vector * qvar_vec,const gsl_matrix * S_mat,const gsl_matrix * Svar_mat)114 void VC::WriteFile_qs(const gsl_vector *s_vec, const gsl_vector *q_vec,
115                       const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
116                       const gsl_matrix *Svar_mat) {
117   string file_str;
118   file_str = path_out + "/" + file_out;
119   file_str += ".qvec.txt";
120 
121   ofstream outfile_q(file_str.c_str(), ofstream::out);
122   if (!outfile_q) {
123     cout << "error writing file: " << file_str.c_str() << endl;
124     return;
125   }
126 
127   for (size_t i = 0; i < s_vec->size; i++) {
128     outfile_q << gsl_vector_get(s_vec, i) << endl;
129   }
130   for (size_t i = 0; i < q_vec->size; i++) {
131     outfile_q << gsl_vector_get(q_vec, i) << endl;
132   }
133   for (size_t i = 0; i < qvar_vec->size; i++) {
134     outfile_q << gsl_vector_get(qvar_vec, i) << endl;
135   }
136 
137   outfile_q.clear();
138   outfile_q.close();
139 
140   file_str = path_out + "/" + file_out;
141   file_str += ".smat.txt";
142 
143   ofstream outfile_s(file_str.c_str(), ofstream::out);
144   if (!outfile_s) {
145     cout << "error writing file: " << file_str.c_str() << endl;
146     return;
147   }
148 
149   for (size_t i = 0; i < S_mat->size1; i++) {
150     for (size_t j = 0; j < S_mat->size2; j++) {
151       outfile_s << gsl_matrix_get(S_mat, i, j) << "\t";
152     }
153     outfile_s << endl;
154   }
155   for (size_t i = 0; i < Svar_mat->size1; i++) {
156     for (size_t j = 0; j < Svar_mat->size2; j++) {
157       outfile_s << gsl_matrix_get(Svar_mat, i, j) << "\t";
158     }
159     outfile_s << endl;
160   }
161 
162   outfile_s.clear();
163   outfile_s.close();
164 
165   return;
166 }
167 
UpdateParam(const gsl_vector * log_sigma2,VC_PARAM * p)168 void UpdateParam(const gsl_vector *log_sigma2, VC_PARAM *p) {
169   size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1, n_cvt = (p->W)->size2;
170 
171   gsl_matrix *K_temp = gsl_matrix_alloc(n1, n1);
172   gsl_matrix *HiW = gsl_matrix_alloc(n1, n_cvt);
173   gsl_matrix *WtHiW = gsl_matrix_alloc(n_cvt, n_cvt);
174   gsl_matrix *WtHiWi = gsl_matrix_alloc(n_cvt, n_cvt);
175   gsl_matrix *WtHiWiWtHi = gsl_matrix_alloc(n_cvt, n1);
176 
177   double sigma2;
178 
179   // Calculate H = \sum_i^{k+1} \sigma_i^2 K_i.
180   gsl_matrix_set_zero(p->P);
181   for (size_t i = 0; i < n_vc + 1; i++) {
182     if (i == n_vc) {
183       gsl_matrix_set_identity(K_temp);
184     } else {
185       gsl_matrix_const_view K_sub =
186           gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1);
187       gsl_matrix_memcpy(K_temp, &K_sub.matrix);
188     }
189 
190     // When unconstrained, update on sigma2 instead of log_sigma2.
191     if (p->noconstrain) {
192       sigma2 = gsl_vector_get(log_sigma2, i);
193     } else {
194       sigma2 = exp(gsl_vector_get(log_sigma2, i));
195     }
196     gsl_matrix_scale(K_temp, sigma2);
197     gsl_matrix_add(p->P, K_temp);
198   }
199 
200   // Calculate H^{-1}.
201   fast_inverse(p->P);
202 
203   fast_dgemm("N", "N", 1.0, p->P, p->W, 0.0, HiW);
204   fast_dgemm("T", "N", 1.0, p->W, HiW, 0.0, WtHiW);
205 
206   fast_inverse(WtHiW);
207   gsl_matrix_memcpy(WtHiWi, WtHiW);
208 
209   fast_dgemm("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
210   fast_dgemm("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
211 
212   // Calculate Py, KPy, PKPy.
213   gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py);
214 
215   double d;
216   for (size_t i = 0; i < n_vc + 1; i++) {
217     gsl_vector_view KPy = gsl_matrix_column(p->KPy_mat, i);
218     gsl_vector_view PKPy = gsl_matrix_column(p->PKPy_mat, i);
219 
220     if (i == n_vc) {
221       gsl_vector_memcpy(&KPy.vector, p->Py);
222     } else {
223       gsl_matrix_const_view K_sub =
224           gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1);
225 
226       // Seems to be important to use gsl dgemv here instead of
227       // fast_dgemv; otherwise.
228       gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector);
229     }
230 
231     gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector);
232 
233     // When phenotypes are not normalized well, then some values in
234     // the following matrix maybe NaN; change that to 0; this seems to
235     // only happen when fast_dgemv was used above.
236     for (size_t j = 0; j < p->KPy_mat->size1; j++) {
237       d = gsl_matrix_get(p->KPy_mat, j, i);
238       if (isnan(d)) {
239         gsl_matrix_set(p->KPy_mat, j, i, 0);
240         cout << "nan appears in " << i << " " << j << endl;
241       }
242       d = gsl_matrix_get(p->PKPy_mat, j, i);
243       if (isnan(d)) {
244         gsl_matrix_set(p->PKPy_mat, j, i, 0);
245         cout << "nan appears in " << i << " " << j << endl;
246       }
247     }
248   }
249 
250   gsl_matrix_free(K_temp);
251   gsl_matrix_free(HiW);
252   gsl_matrix_free(WtHiW);
253   gsl_matrix_free(WtHiWi);
254   gsl_matrix_free(WtHiWiWtHi);
255 
256   return;
257 }
258 
259 // Below are functions for AI algorithm.
LogRL_dev1(const gsl_vector * log_sigma2,void * params,gsl_vector * dev1)260 int LogRL_dev1(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) {
261   VC_PARAM *p = (VC_PARAM *)params;
262 
263   size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1;
264 
265   double tr, d;
266 
267   // Update parameters.
268   UpdateParam(log_sigma2, p);
269 
270   // Calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy.
271   for (size_t i = 0; i < n_vc + 1; i++) {
272     if (i == n_vc) {
273       tr = 0;
274       for (size_t l = 0; l < n1; l++) {
275         tr += gsl_matrix_get(p->P, l, l);
276       }
277     } else {
278       tr = 0;
279       for (size_t l = 0; l < n1; l++) {
280         gsl_vector_view P_row = gsl_matrix_row(p->P, l);
281         gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l);
282         gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
283         tr += d;
284       }
285     }
286 
287     gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
288     gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
289 
290     if (p->noconstrain) {
291       d = (-0.5 * tr + 0.5 * d);
292     } else {
293       d = (-0.5 * tr + 0.5 * d) * exp(gsl_vector_get(log_sigma2, i));
294     }
295 
296     gsl_vector_set(dev1, i, d);
297   }
298 
299   return GSL_SUCCESS;
300 }
301 
LogRL_dev2(const gsl_vector * log_sigma2,void * params,gsl_matrix * dev2)302 int LogRL_dev2(const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) {
303   VC_PARAM *p = (VC_PARAM *)params;
304 
305   size_t n_vc = log_sigma2->size - 1;
306 
307   double d, sigma2_i, sigma2_j;
308 
309   // Update parameters.
310   UpdateParam(log_sigma2, p);
311 
312   // Calculate dev2 = 0.5(yPKPKPy).
313   for (size_t i = 0; i < n_vc + 1; i++) {
314     gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
315     if (p->noconstrain) {
316       sigma2_i = gsl_vector_get(log_sigma2, i);
317     } else {
318       sigma2_i = exp(gsl_vector_get(log_sigma2, i));
319     }
320 
321     for (size_t j = i; j < n_vc + 1; j++) {
322       gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j);
323 
324       gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
325       if (p->noconstrain) {
326         sigma2_j = gsl_vector_get(log_sigma2, j);
327         d *= -0.5;
328       } else {
329         sigma2_j = exp(gsl_vector_get(log_sigma2, j));
330         d *= -0.5 * sigma2_i * sigma2_j;
331       }
332 
333       gsl_matrix_set(dev2, i, j, d);
334       if (j != i) {
335         gsl_matrix_set(dev2, j, i, d);
336       }
337     }
338   }
339 
340   gsl_matrix_memcpy(p->Hessian, dev2);
341   return GSL_SUCCESS;
342 }
343 
LogRL_dev12(const gsl_vector * log_sigma2,void * params,gsl_vector * dev1,gsl_matrix * dev2)344 int LogRL_dev12(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1,
345                 gsl_matrix *dev2) {
346   VC_PARAM *p = (VC_PARAM *)params;
347 
348   size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1;
349 
350   double tr, d, sigma2_i, sigma2_j;
351 
352   // Update parameters.
353   UpdateParam(log_sigma2, p);
354 
355   for (size_t i = 0; i < n_vc + 1; i++) {
356     if (i == n_vc) {
357       tr = 0;
358       for (size_t l = 0; l < n1; l++) {
359         tr += gsl_matrix_get(p->P, l, l);
360       }
361     } else {
362       tr = 0;
363       for (size_t l = 0; l < n1; l++) {
364         gsl_vector_view P_row = gsl_matrix_row(p->P, l);
365         gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l);
366         gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
367         tr += d;
368       }
369     }
370 
371     gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
372     gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
373 
374     if (p->noconstrain) {
375       sigma2_i = gsl_vector_get(log_sigma2, i);
376       d = (-0.5 * tr + 0.5 * d);
377     } else {
378       sigma2_i = exp(gsl_vector_get(log_sigma2, i));
379       d = (-0.5 * tr + 0.5 * d) * sigma2_i;
380     }
381 
382     gsl_vector_set(dev1, i, d);
383 
384     for (size_t j = i; j < n_vc + 1; j++) {
385       gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j);
386       gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
387 
388       if (p->noconstrain) {
389         sigma2_j = gsl_vector_get(log_sigma2, j);
390         d *= -0.5;
391       } else {
392         sigma2_j = exp(gsl_vector_get(log_sigma2, j));
393         d *= -0.5 * sigma2_i * sigma2_j;
394       }
395 
396       gsl_matrix_set(dev2, i, j, d);
397       if (j != i) {
398         gsl_matrix_set(dev2, j, i, d);
399       }
400     }
401   }
402 
403   gsl_matrix_memcpy(p->Hessian, dev2);
404 
405   return GSL_SUCCESS;
406 }
407 
408 // Read header to determine which column contains which item.
ReadHeader_vc(const string & line,HEADER & header)409 bool ReadHeader_vc(const string &line, HEADER &header) {
410   debug_msg("entering");
411   string rs_ptr[] = {"rs",   "RS",    "snp",   "SNP",  "snps",
412                      "SNPS", "snpid", "SNPID", "rsid", "RSID"};
413   set<string> rs_set(rs_ptr, rs_ptr + 10);
414   string chr_ptr[] = {"chr", "CHR"};
415   set<string> chr_set(chr_ptr, chr_ptr + 2);
416   string pos_ptr[] = {
417       "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"};
418   set<string> pos_set(pos_ptr, pos_ptr + 8);
419   string cm_ptr[] = {"cm", "CM"};
420   set<string> cm_set(cm_ptr, cm_ptr + 2);
421   string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1"};
422   set<string> a1_set(a1_ptr, a1_ptr + 4);
423   string a0_ptr[] = {"a0", "A0", "allele0", "ALLELE0"};
424   set<string> a0_set(a0_ptr, a0_ptr + 4);
425 
426   string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"};
427   set<string> z_set(z_ptr, z_ptr + 6);
428   string beta_ptr[] = {"beta", "BETA", "b", "B"};
429   set<string> beta_set(beta_ptr, beta_ptr + 4);
430   string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"};
431   set<string> sebeta_set(sebeta_ptr, sebeta_ptr + 4);
432   string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"};
433   set<string> chisq_set(chisq_ptr, chisq_ptr + 4);
434   string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"};
435   set<string> p_set(p_ptr, p_ptr + 6);
436 
437   string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"};
438   set<string> n_set(n_ptr, n_ptr + 6);
439   string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"};
440   set<string> nmis_set(nmis_ptr, nmis_ptr + 6);
441   string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"};
442   set<string> nobs_set(nobs_ptr, nobs_ptr + 4);
443 
444   string af_ptr[] = {"af",
445                      "AF",
446                      "maf",
447                      "MAF",
448                      "f",
449                      "F",
450                      "allele_freq",
451                      "ALLELE_FREQ",
452                      "allele_frequency",
453                      "ALLELE_FREQUENCY"};
454   set<string> af_set(af_ptr, af_ptr + 10);
455   string var_ptr[] = {"var", "VAR"};
456   set<string> var_set(var_ptr, var_ptr + 2);
457 
458   string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"};
459   set<string> ws_set(ws_ptr, ws_ptr + 4);
460   string cor_ptr[] = {"cor", "COR", "r", "R"};
461   set<string> cor_set(cor_ptr, cor_ptr + 4);
462 
463   header.rs_col = 0;
464   header.chr_col = 0;
465   header.pos_col = 0;
466   header.a1_col = 0;
467   header.a0_col = 0;
468   header.z_col = 0;
469   header.beta_col = 0;
470   header.sebeta_col = 0;
471   header.chisq_col = 0;
472   header.p_col = 0;
473   header.n_col = 0;
474   header.nmis_col = 0;
475   header.nobs_col = 0;
476   header.af_col = 0;
477   header.var_col = 0;
478   header.ws_col = 0;
479   header.cor_col = 0;
480   header.coln = 0;
481 
482   char *ch_ptr;
483   string type;
484   size_t n_error = 0;
485 
486   ch_ptr = strtok((char *)line.c_str(), " , \t");
487   while (ch_ptr != NULL) {
488     type = ch_ptr;
489     if (rs_set.count(type) != 0) {
490       if (header.rs_col == 0) {
491         header.rs_col = header.coln + 1;
492       } else {
493         cout << "error! more than two rs columns in the file." << endl;
494         n_error++;
495       }
496     } else if (chr_set.count(type) != 0) {
497       if (header.chr_col == 0) {
498         header.chr_col = header.coln + 1;
499       } else {
500         cout << "error! more than two chr columns in the file." << endl;
501         n_error++;
502       }
503     } else if (pos_set.count(type) != 0) {
504       if (header.pos_col == 0) {
505         header.pos_col = header.coln + 1;
506       } else {
507         cout << "error! more than two pos columns in the file." << endl;
508         n_error++;
509       }
510     } else if (cm_set.count(type) != 0) {
511       if (header.cm_col == 0) {
512         header.cm_col = header.coln + 1;
513       } else {
514         cout << "error! more than two cm columns in the file." << endl;
515         n_error++;
516       }
517     } else if (a1_set.count(type) != 0) {
518       if (header.a1_col == 0) {
519         header.a1_col = header.coln + 1;
520       } else {
521         cout << "error! more than two allele1 columns in the file." << endl;
522         n_error++;
523       }
524     } else if (a0_set.count(type) != 0) {
525       if (header.a0_col == 0) {
526         header.a0_col = header.coln + 1;
527       } else {
528         cout << "error! more than two allele0 columns in the file." << endl;
529         n_error++;
530       }
531     } else if (z_set.count(type) != 0) {
532       if (header.z_col == 0) {
533         header.z_col = header.coln + 1;
534       } else {
535         cout << "error! more than two z columns in the file." << endl;
536         n_error++;
537       }
538     } else if (beta_set.count(type) != 0) {
539       if (header.beta_col == 0) {
540         header.beta_col = header.coln + 1;
541       } else {
542         cout << "error! more than two beta columns in the file." << endl;
543         n_error++;
544       }
545     } else if (sebeta_set.count(type) != 0) {
546       if (header.sebeta_col == 0) {
547         header.sebeta_col = header.coln + 1;
548       } else {
549         cout << "error! more than two se_beta columns in the file." << endl;
550         n_error++;
551       }
552     } else if (chisq_set.count(type) != 0) {
553       if (header.chisq_col == 0) {
554         header.chisq_col = header.coln + 1;
555       } else {
556         cout << "error! more than two z columns in the file." << endl;
557         n_error++;
558       }
559     } else if (p_set.count(type) != 0) {
560       if (header.p_col == 0) {
561         header.p_col = header.coln + 1;
562       } else {
563         cout << "error! more than two p columns in the file." << endl;
564         n_error++;
565       }
566     } else if (n_set.count(type) != 0) {
567       if (header.n_col == 0) {
568         header.n_col = header.coln + 1;
569       } else {
570         cout << "error! more than two n_total columns in the file." << endl;
571         n_error++;
572       }
573     } else if (nmis_set.count(type) != 0) {
574       if (header.nmis_col == 0) {
575         header.nmis_col = header.coln + 1;
576       } else {
577         cout << "error! more than two n_mis columns in the file." << endl;
578         n_error++;
579       }
580     } else if (nobs_set.count(type) != 0) {
581       if (header.nobs_col == 0) {
582         header.nobs_col = header.coln + 1;
583       } else {
584         cout << "error! more than two n_obs columns in the file." << endl;
585         n_error++;
586       }
587     } else if (ws_set.count(type) != 0) {
588       if (header.ws_col == 0) {
589         header.ws_col = header.coln + 1;
590       } else {
591         cout << "error! more than two window_size columns in the file." << endl;
592         n_error++;
593       }
594     } else if (af_set.count(type) != 0) {
595       if (header.af_col == 0) {
596         header.af_col = header.coln + 1;
597       } else {
598         cout << "error! more than two af columns in the file." << endl;
599         n_error++;
600       }
601     } else if (cor_set.count(type) != 0) {
602       if (header.cor_col == 0) {
603         header.cor_col = header.coln + 1;
604       } else {
605         cout << "error! more than two cor columns in the file." << endl;
606         n_error++;
607       }
608     } else {
609     }
610 
611     ch_ptr = strtok(NULL, " , \t");
612     header.coln++;
613   }
614 
615   if (header.cor_col != 0 && header.cor_col != header.coln) {
616     cout << "error! the cor column should be the last column." << endl;
617     n_error++;
618   }
619 
620   if (header.rs_col == 0) {
621     if (header.chr_col != 0 && header.pos_col != 0) {
622       cout << "missing an rs column. rs id will be replaced by chr:pos" << endl;
623     } else {
624       cout << "error! missing an rs column." << endl;
625       n_error++;
626     }
627   }
628 
629   if (n_error == 0) {
630     return true;
631   } else {
632     return false;
633   }
634 }
635 
636 // Read cov file the first time, record mapRS2in, mapRS2var (in case
637 // var is not provided in the z file), store vec_n and vec_rs.
ReadFile_cor(const string & file_cor,const set<string> & setSnps,vector<string> & vec_rs,vector<size_t> & vec_n,vector<double> & vec_cm,vector<double> & vec_bp,map<string,size_t> & mapRS2in,map<string,double> & mapRS2var)638 void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
639                   vector<string> &vec_rs, vector<size_t> &vec_n,
640                   vector<double> &vec_cm, vector<double> &vec_bp,
641                   map<string, size_t> &mapRS2in,
642                   map<string, double> &mapRS2var) {
643   debug_msg("entering");
644   vec_rs.clear();
645   vec_n.clear();
646   mapRS2in.clear();
647   mapRS2var.clear();
648 
649   igzstream infile(file_cor.c_str(), igzstream::in);
650   if (!infile) {
651     cout << "error! fail to open cov file: " << file_cor << endl;
652     return;
653   }
654 
655   string line;
656   char *ch_ptr;
657 
658   string rs, chr, a1, a0, pos, cm;
659   double af = 0, var_x = 0, d_pos, d_cm;
660   size_t n_total = 0, n_mis = 0, n_obs = 0, ni_total = 0;
661   size_t ns_test = 0, ns_total = 0;
662 
663   HEADER header;
664 
665   // Header.
666   safeGetline(infile, line).eof();
667   ReadHeader_vc(line, header);
668 
669   if (header.n_col == 0) {
670     if (header.nobs_col == 0 && header.nmis_col == 0) {
671       cout << "error! missing sample size in the cor file." << endl;
672     } else {
673       cout << "total sample size will be replaced by obs/mis sample size."
674            << endl;
675     }
676   }
677 
678   while (!safeGetline(infile, line).eof()) {
679 
680     // do not read cor values this time; upto col_n-1.
681     ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
682 
683     n_total = 0;
684     n_mis = 0;
685     n_obs = 0;
686     af = 0;
687     var_x = 0;
688     d_cm = 0;
689     d_pos = 0;
690     for (size_t i = 0; i < header.coln - 1; i++) {
691       enforce(ch_ptr);
692       if (header.rs_col != 0 && header.rs_col == i + 1) {
693         rs = ch_ptr;
694       }
695       if (header.chr_col != 0 && header.chr_col == i + 1) {
696         chr = ch_ptr;
697       }
698       if (header.pos_col != 0 && header.pos_col == i + 1) {
699         pos = ch_ptr;
700         d_pos = atof(ch_ptr);
701       }
702       if (header.cm_col != 0 && header.cm_col == i + 1) {
703         cm = ch_ptr;
704         d_cm = atof(ch_ptr);
705       }
706       if (header.a1_col != 0 && header.a1_col == i + 1) {
707         a1 = ch_ptr;
708       }
709       if (header.a0_col != 0 && header.a0_col == i + 1) {
710         a0 = ch_ptr;
711       }
712 
713       if (header.n_col != 0 && header.n_col == i + 1) {
714         n_total = atoi(ch_ptr);
715       }
716       if (header.nmis_col != 0 && header.nmis_col == i + 1) {
717         n_mis = atoi(ch_ptr);
718       }
719       if (header.nobs_col != 0 && header.nobs_col == i + 1) {
720         n_obs = atoi(ch_ptr);
721       }
722 
723       if (header.af_col != 0 && header.af_col == i + 1) {
724         af = atof(ch_ptr);
725       }
726       if (header.var_col != 0 && header.var_col == i + 1) {
727         var_x = atof(ch_ptr);
728       }
729 
730       ch_ptr = strtok(NULL, " , \t");
731     }
732 
733     if (header.rs_col == 0) {
734       rs = chr + ":" + pos;
735     }
736 
737     if (header.n_col == 0) {
738       n_total = n_mis + n_obs;
739     }
740 
741     // Record rs, n.
742     vec_rs.push_back(rs);
743     vec_n.push_back(n_total);
744     if (d_cm > 0) {
745       vec_cm.push_back(d_cm);
746     } else {
747       vec_cm.push_back(d_cm);
748     }
749     if (d_pos > 0) {
750       vec_bp.push_back(d_pos);
751     } else {
752       vec_bp.push_back(d_pos);
753     }
754 
755     // Record mapRS2in and mapRS2var.
756     if (setSnps.size() == 0 || setSnps.count(rs) != 0) {
757       if (mapRS2in.count(rs) == 0) {
758         mapRS2in[rs] = 1;
759 
760         if (header.var_col != 0) {
761           mapRS2var[rs] = var_x;
762         } else if (header.af_col != 0) {
763           var_x = 2.0 * af * (1.0 - af);
764           mapRS2var[rs] = var_x;
765         } else {
766         }
767 
768         ns_test++;
769 
770       } else {
771         cout << "error! more than one snp has the same id " << rs
772              << " in cor file?" << endl;
773       }
774     }
775 
776     // Record max pos.
777     ni_total = max(ni_total, n_total);
778     ns_total++;
779   }
780 
781   infile.close();
782   infile.clear();
783 
784   return;
785 }
786 
787 // Read beta file, store mapRS2var if var is provided here, calculate
788 // q and var_y.
ReadFile_beta(const bool flag_priorscale,const string & file_beta,const map<string,size_t> & mapRS2cat,map<string,size_t> & mapRS2in,map<string,double> & mapRS2var,map<string,size_t> & mapRS2nsamp,gsl_vector * q_vec,gsl_vector * qvar_vec,gsl_vector * s_vec,size_t & ni_total,size_t & ns_total)789 void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
790                    const map<string, size_t> &mapRS2cat,
791                    map<string, size_t> &mapRS2in,
792                    map<string, double> &mapRS2var,
793                    map<string, size_t> &mapRS2nsamp, gsl_vector *q_vec,
794                    gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total,
795                    size_t &ns_total) {
796   debug_msg("entering");
797   mapRS2nsamp.clear();
798 
799   igzstream infile(file_beta.c_str(), igzstream::in);
800   if (!infile) {
801     cout << "error! fail to open beta file: " << file_beta << endl;
802     return;
803   }
804 
805   string line;
806   char *ch_ptr;
807   string type;
808 
809   string rs, chr, a1, a0, pos, cm;
810   double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0,
811          af = 0, var_x = 0;
812   size_t n_total = 0, n_mis = 0, n_obs = 0;
813   size_t ns_test = 0;
814   ns_total = 0;
815   ni_total = 0;
816 
817   vector<double> vec_q, vec_qvar, vec_s;
818   for (size_t i = 0; i < q_vec->size; i++) {
819     vec_q.push_back(0.0);
820     vec_qvar.push_back(0.0);
821     vec_s.push_back(0.0);
822   }
823 
824   // Read header.
825   HEADER header;
826   safeGetline(infile, line).eof();
827   ReadHeader_vc(line, header);
828 
829   if (header.n_col == 0) {
830     if (header.nobs_col == 0 && header.nmis_col == 0) {
831       cout << "error! missing sample size in the beta file." << endl;
832     } else {
833       cout << "total sample size will be replaced by obs/mis sample size."
834            << endl;
835     }
836   }
837 
838   if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0) &&
839       header.chisq_col == 0 && header.p_col == 0) {
840     cout << "error! missing z scores in the beta file." << endl;
841   }
842 
843   if (header.af_col == 0 && header.var_col == 0 && mapRS2var.size() == 0) {
844     cout << "error! missing allele frequency in the beta file." << endl;
845   }
846 
847   while (!safeGetline(infile, line).eof()) {
848     ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
849 
850     z = 0;
851     beta = 0;
852     se_beta = 0;
853     chisq = 0;
854     pvalue = 0;
855     n_total = 0;
856     n_mis = 0;
857     n_obs = 0;
858     af = 0;
859     var_x = 0;
860     for (size_t i = 0; i < header.coln; i++) {
861       enforce(ch_ptr);
862       if (header.rs_col != 0 && header.rs_col == i + 1) {
863         rs = ch_ptr;
864       }
865       if (header.chr_col != 0 && header.chr_col == i + 1) {
866         chr = ch_ptr;
867       }
868       if (header.pos_col != 0 && header.pos_col == i + 1) {
869         pos = ch_ptr;
870       }
871       if (header.cm_col != 0 && header.cm_col == i + 1) {
872         cm = ch_ptr;
873       }
874       if (header.a1_col != 0 && header.a1_col == i + 1) {
875         a1 = ch_ptr;
876       }
877       if (header.a0_col != 0 && header.a0_col == i + 1) {
878         a0 = ch_ptr;
879       }
880 
881       if (header.z_col != 0 && header.z_col == i + 1) {
882         z = atof(ch_ptr);
883       }
884       if (header.beta_col != 0 && header.beta_col == i + 1) {
885         beta = atof(ch_ptr);
886       }
887       if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
888         se_beta = atof(ch_ptr);
889       }
890       if (header.chisq_col != 0 && header.chisq_col == i + 1) {
891         chisq = atof(ch_ptr);
892       }
893       if (header.p_col != 0 && header.p_col == i + 1) {
894         pvalue = atof(ch_ptr);
895       }
896 
897       if (header.n_col != 0 && header.n_col == i + 1) {
898         n_total = atoi(ch_ptr);
899       }
900       if (header.nmis_col != 0 && header.nmis_col == i + 1) {
901         n_mis = atoi(ch_ptr);
902       }
903       if (header.nobs_col != 0 && header.nobs_col == i + 1) {
904         n_obs = atoi(ch_ptr);
905       }
906 
907       if (header.af_col != 0 && header.af_col == i + 1) {
908         af = atof(ch_ptr);
909       }
910       if (header.var_col != 0 && header.var_col == i + 1) {
911         var_x = atof(ch_ptr);
912       }
913 
914       ch_ptr = strtok(NULL, " , \t");
915     }
916 
917     if (header.rs_col == 0) {
918       rs = chr + ":" + pos;
919     }
920 
921     if (header.n_col == 0) {
922       n_total = n_mis + n_obs;
923     }
924 
925     // Both z values and beta/se_beta have directions, while
926     // chisq/pvalue do not.
927     if (header.z_col != 0) {
928       zsquare = z * z;
929     } else if (header.beta_col != 0 && header.sebeta_col != 0) {
930       z = beta / se_beta;
931       zsquare = z * z;
932     } else if (header.chisq_col != 0) {
933       zsquare = chisq;
934     } else if (header.p_col != 0) {
935       zsquare = gsl_cdf_chisq_Qinv(pvalue, 1);
936     } else {
937       zsquare = 0;
938     }
939 
940     // If the snp is also present in cor file, then do calculations.
941     if ((header.var_col != 0 || header.af_col != 0 ||
942          mapRS2var.count(rs) != 0) &&
943         mapRS2in.count(rs) != 0 &&
944         (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) {
945       if (mapRS2in.at(rs) > 1) {
946         cout << "error! more than one snp has the same id " << rs
947              << " in beta file?" << endl;
948         break;
949       }
950 
951       if (header.var_col == 0) {
952         if (header.af_col != 0) {
953           var_x = 2.0 * af * (1.0 - af);
954         } else {
955           var_x = mapRS2var.at(rs);
956         }
957       }
958 
959       if (flag_priorscale) {
960         var_x = 1;
961       }
962 
963       mapRS2in[rs]++;
964       mapRS2var[rs] = var_x;
965       mapRS2nsamp[rs] = n_total;
966 
967       if (mapRS2cat.size() != 0) {
968         vec_q[mapRS2cat.at(rs)] += (zsquare - 1.0) * var_x / (double)n_total;
969         vec_s[mapRS2cat.at(rs)] += var_x;
970         vec_qvar[mapRS2cat.at(rs)] +=
971             var_x * var_x / ((double)n_total * (double)n_total);
972       } else {
973         vec_q[0] += (zsquare - 1.0) * var_x / (double)n_total;
974         vec_s[0] += var_x;
975         vec_qvar[0] += var_x * var_x / ((double)n_total * (double)n_total);
976       }
977 
978       ni_total = max(ni_total, n_total);
979       ns_test++;
980     }
981 
982     ns_total++;
983   }
984 
985   for (size_t i = 0; i < q_vec->size; i++) {
986     gsl_vector_set(q_vec, i, vec_q[i]);
987     gsl_vector_set(qvar_vec, i, 2.0 * vec_qvar[i]);
988     gsl_vector_set(s_vec, i, vec_s[i]);
989   }
990 
991   infile.clear();
992   infile.close();
993 
994   return;
995 }
996 
997 // Read covariance file the second time.
998 // Look for rs, n_mis+n_obs, var, window_size, cov.
999 // If window_cm/bp/ns is provided, then use these max values to
1000 // calibrate estimates.
ReadFile_cor(const string & file_cor,const vector<string> & vec_rs,const vector<size_t> & vec_n,const vector<double> & vec_cm,const vector<double> & vec_bp,const map<string,size_t> & mapRS2cat,const map<string,size_t> & mapRS2in,const map<string,double> & mapRS2var,const map<string,size_t> & mapRS2nsamp,const size_t crt,const double & window_cm,const double & window_bp,const double & window_ns,gsl_matrix * S_mat,gsl_matrix * Svar_mat,gsl_vector * qvar_vec,size_t & ni_total,size_t & ns_total,size_t & ns_test,size_t & ns_pair)1001 void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
1002                   const vector<size_t> &vec_n, const vector<double> &vec_cm,
1003                   const vector<double> &vec_bp,
1004                   const map<string, size_t> &mapRS2cat,
1005                   const map<string, size_t> &mapRS2in,
1006                   const map<string, double> &mapRS2var,
1007                   const map<string, size_t> &mapRS2nsamp, const size_t crt,
1008                   const double &window_cm, const double &window_bp,
1009                   const double &window_ns, gsl_matrix *S_mat,
1010                   gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total,
1011                   size_t &ns_total, size_t &ns_test, size_t &ns_pair) {
1012   debug_msg("entering");
1013   igzstream infile(file_cor.c_str(), igzstream::in);
1014   if (!infile) {
1015     cout << "error! fail to open cov file: " << file_cor << endl;
1016     return;
1017   }
1018 
1019   string line;
1020   char *ch_ptr;
1021 
1022   string rs1, rs2;
1023   double d1, d2, d3, cor, var1, var2;
1024   size_t n_nb, nsamp1, nsamp2, n12, bin_size = 10, bin;
1025 
1026   vector<vector<double>> mat_S, mat_Svar, mat_tmp;
1027   vector<double> vec_qvar, vec_tmp;
1028   vector<vector<vector<double>>> mat3d_Sbin;
1029 
1030   for (size_t i = 0; i < S_mat->size1; i++) {
1031     vec_qvar.push_back(0.0);
1032   }
1033 
1034   for (size_t i = 0; i < S_mat->size1; i++) {
1035     mat_S.push_back(vec_qvar);
1036     mat_Svar.push_back(vec_qvar);
1037   }
1038 
1039   for (size_t k = 0; k < bin_size; k++) {
1040     vec_tmp.push_back(0.0);
1041   }
1042   for (size_t i = 0; i < S_mat->size1; i++) {
1043     mat_tmp.push_back(vec_tmp);
1044   }
1045   for (size_t i = 0; i < S_mat->size1; i++) {
1046     mat3d_Sbin.push_back(mat_tmp);
1047   }
1048 
1049   string rs, chr, a1, a0, type, pos, cm;
1050   size_t n_total = 0, n_mis = 0, n_obs = 0;
1051   double d_pos1, d_pos2, d_pos, d_cm1, d_cm2, d_cm;
1052   ns_test = 0;
1053   ns_total = 0;
1054   ns_pair = 0;
1055   ni_total = 0;
1056 
1057   // Header.
1058   HEADER header;
1059 
1060   safeGetline(infile, line).eof();
1061   ReadHeader_vc(line, header);
1062 
1063   while (!safeGetline(infile, line).eof()) {
1064 
1065     // Do not read cor values this time; upto col_n-1.
1066     d_pos1 = 0;
1067     d_cm1 = 0;
1068     ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
1069     for (size_t i = 0; i < header.coln - 1; i++) {
1070       enforce(ch_ptr);
1071       if (header.rs_col != 0 && header.rs_col == i + 1) {
1072         rs = ch_ptr;
1073       }
1074       if (header.chr_col != 0 && header.chr_col == i + 1) {
1075         chr = ch_ptr;
1076       }
1077       if (header.pos_col != 0 && header.pos_col == i + 1) {
1078         pos = ch_ptr;
1079         d_pos1 = atof(ch_ptr);
1080       }
1081       if (header.cm_col != 0 && header.cm_col == i + 1) {
1082         cm = ch_ptr;
1083         d_cm1 = atof(ch_ptr);
1084       }
1085       if (header.a1_col != 0 && header.a1_col == i + 1) {
1086         a1 = ch_ptr;
1087       }
1088       if (header.a0_col != 0 && header.a0_col == i + 1) {
1089         a0 = ch_ptr;
1090       }
1091 
1092       if (header.n_col != 0 && header.n_col == i + 1) {
1093         n_total = atoi(ch_ptr);
1094       }
1095       if (header.nmis_col != 0 && header.nmis_col == i + 1) {
1096         n_mis = atoi(ch_ptr);
1097       }
1098       if (header.nobs_col != 0 && header.nobs_col == i + 1) {
1099         n_obs = atoi(ch_ptr);
1100       }
1101 
1102       ch_ptr = strtok(NULL, " , \t");
1103     }
1104 
1105     if (header.rs_col == 0) {
1106       rs = chr + ":" + pos;
1107     }
1108 
1109     if (header.n_col == 0) {
1110       n_total = n_mis + n_obs;
1111     }
1112 
1113     rs1 = rs;
1114 
1115     if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs1) != 0) &&
1116         mapRS2in.count(rs1) != 0 && mapRS2in.at(rs1) == 2) {
1117       var1 = mapRS2var.at(rs1);
1118       nsamp1 = mapRS2nsamp.at(rs1);
1119       d2 = var1 * var1;
1120 
1121       if (mapRS2cat.size() != 0) {
1122         mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] +=
1123             (1 - 1.0 / (double)vec_n[ns_total]) * d2;
1124         mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] +=
1125             d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]);
1126         if (crt == 1) {
1127           mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)][0] +=
1128               (1 - 1.0 / (double)vec_n[ns_total]) * d2;
1129         }
1130       } else {
1131         mat_S[0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2;
1132         mat_Svar[0][0] +=
1133             d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]);
1134         if (crt == 1) {
1135           mat3d_Sbin[0][0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2;
1136         }
1137       }
1138 
1139       n_nb = 0;
1140       while (ch_ptr != NULL) {
1141         type = ch_ptr;
1142         if (type.compare("NA") != 0 && type.compare("na") != 0 &&
1143             type.compare("nan") != 0 && type.compare("-nan") != 0) {
1144           cor = atof(ch_ptr);
1145           rs2 = vec_rs[ns_total + n_nb + 1];
1146           d_pos2 = vec_bp[ns_total + n_nb + 1];
1147           d_cm2 = vec_cm[ns_total + n_nb + 1];
1148           d_pos = abs(d_pos2 - d_pos1);
1149           d_cm = abs(d_cm2 - d_cm1);
1150 
1151           if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs2) != 0) &&
1152               mapRS2in.count(rs2) != 0 && mapRS2in.at(rs2) == 2) {
1153             var2 = mapRS2var.at(rs2);
1154             nsamp2 = mapRS2nsamp.at(rs2);
1155             d1 = cor * cor -
1156                  1.0 / (double)min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]);
1157             d2 = var1 * var2;
1158             d3 = cor * cor / ((double)nsamp1 * (double)nsamp2);
1159             n12 = min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]);
1160 
1161             // Compute bin.
1162             if (crt == 1) {
1163               if (window_cm != 0 && d_cm1 != 0 && d_cm2 != 0) {
1164                 bin =
1165                     min((int)floor(d_cm / window_cm * bin_size), (int)bin_size);
1166               } else if (window_bp != 0 && d_pos1 != 0 && d_pos2 != 0) {
1167                 bin = min((int)floor(d_pos / window_bp * bin_size),
1168                           (int)bin_size);
1169               } else if (window_ns != 0) {
1170                 bin = min((int)floor(((double)n_nb + 1) / window_ns * bin_size),
1171                           (int)bin_size);
1172               }
1173             }
1174 
1175             if (mapRS2cat.size() != 0) {
1176               if (mapRS2cat.at(rs1) == mapRS2cat.at(rs2)) {
1177                 vec_qvar[mapRS2cat.at(rs1)] += 2 * d3 * d2;
1178                 mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += 2 * d1 * d2;
1179                 mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] +=
1180                     2 * d2 * d2 / ((double)n12 * (double)n12);
1181                 if (crt == 1) {
1182                   mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] +=
1183                       2 * d1 * d2;
1184                 }
1185               } else {
1186                 mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += d1 * d2;
1187                 mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] +=
1188                     d2 * d2 / ((double)n12 * (double)n12);
1189                 if (crt == 1) {
1190                   mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] +=
1191                       d1 * d2;
1192                 }
1193               }
1194             } else {
1195               vec_qvar[0] += 2 * d3 * d2;
1196               mat_S[0][0] += 2 * d1 * d2;
1197               mat_Svar[0][0] += 2 * d2 * d2 / ((double)n12 * (double)n12);
1198 
1199               if (crt == 1) {
1200                 mat3d_Sbin[0][0][bin] += 2 * d1 * d2;
1201               }
1202             }
1203             ns_pair++;
1204           }
1205         }
1206 
1207         ch_ptr = strtok(NULL, " , \t");
1208         n_nb++;
1209       }
1210       ni_total = max(ni_total, n_total);
1211       ns_test++;
1212     }
1213 
1214     ns_total++;
1215   }
1216 
1217   // Use S_bin to fit a rational function y=1/(a+bx)^2, where
1218   // x=seq(0.5,bin_size-0.5,by=1) and then compute a correlation
1219   // factor as a percentage.
1220   double a, b, x, y, n, var_y, var_x, mean_y, mean_x, cov_xy, crt_factor;
1221   if (crt == 1) {
1222     for (size_t i = 0; i < S_mat->size1; i++) {
1223       for (size_t j = i; j < S_mat->size2; j++) {
1224 
1225         // Correct mat_S.
1226         n = 0;
1227         var_y = 0;
1228         var_x = 0;
1229         mean_y = 0;
1230         mean_x = 0;
1231         cov_xy = 0;
1232         for (size_t k = 0; k < bin_size; k++) {
1233           if (j == i) {
1234             y = mat3d_Sbin[i][j][k];
1235           } else {
1236             y = mat3d_Sbin[i][j][k] + mat3d_Sbin[j][i][k];
1237           }
1238           x = k + 0.5;
1239           cout << y << ", ";
1240           if (y > 0) {
1241             y = 1 / sqrt(y);
1242             mean_x += x;
1243             mean_y += y;
1244             var_x += x * x;
1245             var_y += y * y;
1246             cov_xy += x * y;
1247             n++;
1248           }
1249         }
1250         cout << endl;
1251 
1252         if (n >= 5) {
1253           mean_x /= n;
1254           mean_y /= n;
1255           var_x /= n;
1256           var_y /= n;
1257           cov_xy /= n;
1258           var_x -= mean_x * mean_x;
1259           var_y -= mean_y * mean_y;
1260           cov_xy -= mean_x * mean_y;
1261           b = cov_xy / var_x;
1262           a = mean_y - b * mean_x;
1263           crt_factor = a / (b * (bin_size + 0.5)) + 1;
1264           if (i == j) {
1265             mat_S[i][j] *= crt_factor;
1266           } else {
1267             mat_S[i][j] *= crt_factor;
1268             mat_S[j][i] *= crt_factor;
1269           }
1270           cout << crt_factor << endl;
1271 
1272           // Correct qvar.
1273           if (i == j) {
1274             vec_qvar[i] *= crt_factor;
1275           }
1276         }
1277       }
1278     }
1279   }
1280 
1281   // Save to gsl_vector and gsl_matrix: qvar_vec, S_mat, Svar_mat.
1282   for (size_t i = 0; i < S_mat->size1; i++) {
1283     d1 = gsl_vector_get(qvar_vec, i) + 2 * vec_qvar[i];
1284     gsl_vector_set(qvar_vec, i, d1);
1285     for (size_t j = 0; j < S_mat->size2; j++) {
1286       if (i == j) {
1287         gsl_matrix_set(S_mat, i, j, mat_S[i][i]);
1288         gsl_matrix_set(Svar_mat, i, j, 2.0 * mat_Svar[i][i] * ns_test *
1289                                            ns_test / (2.0 * ns_pair));
1290       } else {
1291         gsl_matrix_set(S_mat, i, j, mat_S[i][j] + mat_S[j][i]);
1292         gsl_matrix_set(Svar_mat, i, j, 2.0 * (mat_Svar[i][j] + mat_Svar[j][i]) *
1293                                            ns_test * ns_test / (2.0 * ns_pair));
1294       }
1295     }
1296   }
1297 
1298   infile.clear();
1299   infile.close();
1300 
1301   return;
1302 }
1303 
1304 // Use the new method to calculate variance components with summary
1305 // statistics first, use a function CalcS to compute S matrix (where
1306 // the diagonal elements are part of V(q) ), and then use bootstrap to
1307 // compute the variance for S, use a set of genotypes, phenotypes, and
1308 // individual ids, and snp category label.
CalcVCss(const gsl_matrix * Vq,const gsl_matrix * S_mat,const gsl_matrix * Svar_mat,const gsl_vector * q_vec,const gsl_vector * s_vec,const double df,vector<double> & v_pve,vector<double> & v_se_pve,double & pve_total,double & se_pve_total,vector<double> & v_sigma2,vector<double> & v_se_sigma2,vector<double> & v_enrich,vector<double> & v_se_enrich)1309 void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
1310               const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
1311               const gsl_vector *s_vec, const double df, vector<double> &v_pve,
1312               vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
1313               vector<double> &v_sigma2, vector<double> &v_se_sigma2,
1314               vector<double> &v_enrich, vector<double> &v_se_enrich) {
1315   size_t n_vc = S_mat->size1;
1316 
1317   gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
1318   gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
1319   gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
1320   gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc);
1321   gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc);
1322   gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
1323 
1324   gsl_vector *pve = gsl_vector_alloc(n_vc);
1325   gsl_vector *pve_plus = gsl_vector_alloc(n_vc + 1);
1326   gsl_vector *tmp = gsl_vector_alloc(n_vc + 1);
1327   gsl_vector *sigma2persnp = gsl_vector_alloc(n_vc);
1328   gsl_vector *enrich = gsl_vector_alloc(n_vc);
1329   gsl_vector *se_pve = gsl_vector_alloc(n_vc);
1330   gsl_vector *se_sigma2persnp = gsl_vector_alloc(n_vc);
1331   gsl_vector *se_enrich = gsl_vector_alloc(n_vc);
1332 
1333   double d;
1334 
1335   // Calculate S^{-1}q.
1336   gsl_matrix_memcpy(tmp_mat, S_mat);
1337   int sig;
1338   gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
1339   LUDecomp(tmp_mat, pmt, &sig);
1340   LUInvert(tmp_mat, pmt, Si_mat);
1341 
1342   // Calculate sigma2snp and pve.
1343   gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
1344   gsl_vector_memcpy(sigma2persnp, pve);
1345   gsl_vector_div(sigma2persnp, s_vec);
1346 
1347   // Get qvar_mat.
1348   gsl_matrix_memcpy(qvar_mat, Vq);
1349   gsl_matrix_scale(qvar_mat, 1.0 / (df * df));
1350 
1351   // Calculate variance for these estimates.
1352   for (size_t i = 0; i < n_vc; i++) {
1353     for (size_t j = i; j < n_vc; j++) {
1354       d = gsl_matrix_get(Svar_mat, i, j);
1355       d *= gsl_vector_get(pve, i) * gsl_vector_get(pve, j);
1356 
1357       d += gsl_matrix_get(qvar_mat, i, j);
1358       gsl_matrix_set(Var_mat, i, j, d);
1359       if (i != j) {
1360         gsl_matrix_set(Var_mat, j, i, d);
1361       }
1362     }
1363   }
1364 
1365   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0,
1366                  tmp_mat);
1367   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
1368                  Var_mat);
1369 
1370   for (size_t i = 0; i < n_vc; i++) {
1371     d = sqrt(gsl_matrix_get(Var_mat, i, i));
1372     gsl_vector_set(se_pve, i, d);
1373     d /= gsl_vector_get(s_vec, i);
1374     gsl_vector_set(se_sigma2persnp, i, d);
1375   }
1376 
1377   // Compute pve_total, se_pve_total.
1378   pve_total = 0;
1379   se_pve_total = 0;
1380   for (size_t i = 0; i < n_vc; i++) {
1381     pve_total += gsl_vector_get(pve, i);
1382 
1383     for (size_t j = 0; j < n_vc; j++) {
1384       se_pve_total += gsl_matrix_get(Var_mat, i, j);
1385     }
1386   }
1387   se_pve_total = sqrt(se_pve_total);
1388 
1389   // Compute enrichment and its variance.
1390   double s_pve = 0, s_snp = 0;
1391   for (size_t i = 0; i < n_vc; i++) {
1392     s_pve += gsl_vector_get(pve, i);
1393     s_snp += gsl_vector_get(s_vec, i);
1394   }
1395   gsl_vector_memcpy(enrich, sigma2persnp);
1396   gsl_vector_scale(enrich, s_snp / s_pve);
1397 
1398   gsl_matrix_set_identity(tmp_mat);
1399 
1400   double d1;
1401   for (size_t i = 0; i < n_vc; i++) {
1402     d = gsl_vector_get(pve, i) / s_pve;
1403     d1 = gsl_vector_get(s_vec, i);
1404     for (size_t j = 0; j < n_vc; j++) {
1405       if (i == j) {
1406         gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve);
1407       } else {
1408         gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve);
1409       }
1410     }
1411   }
1412   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0,
1413                  tmp_mat1);
1414   gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0,
1415                  VarEnrich_mat);
1416 
1417   for (size_t i = 0; i < n_vc; i++) {
1418     d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
1419     gsl_vector_set(se_enrich, i, d);
1420   }
1421 
1422   cout << "pve = ";
1423   for (size_t i = 0; i < n_vc; i++) {
1424     cout << gsl_vector_get(pve, i) << " ";
1425   }
1426   cout << endl;
1427 
1428   cout << "se(pve) = ";
1429   for (size_t i = 0; i < n_vc; i++) {
1430     cout << gsl_vector_get(se_pve, i) << " ";
1431   }
1432   cout << endl;
1433 
1434   cout << "sigma2 per snp = ";
1435   for (size_t i = 0; i < n_vc; i++) {
1436     cout << gsl_vector_get(sigma2persnp, i) << " ";
1437   }
1438   cout << endl;
1439 
1440   cout << "se(sigma2 per snp) = ";
1441   for (size_t i = 0; i < n_vc; i++) {
1442     cout << gsl_vector_get(se_sigma2persnp, i) << " ";
1443   }
1444   cout << endl;
1445 
1446   cout << "enrichment = ";
1447   for (size_t i = 0; i < n_vc; i++) {
1448     cout << gsl_vector_get(enrich, i) << " ";
1449   }
1450   cout << endl;
1451 
1452   cout << "se(enrichment) = ";
1453   for (size_t i = 0; i < n_vc; i++) {
1454     cout << gsl_vector_get(se_enrich, i) << " ";
1455   }
1456   cout << endl;
1457 
1458   // Save data.
1459   v_pve.clear();
1460   v_se_pve.clear();
1461   v_sigma2.clear();
1462   v_se_sigma2.clear();
1463   v_enrich.clear();
1464   v_se_enrich.clear();
1465   for (size_t i = 0; i < n_vc; i++) {
1466     d = gsl_vector_get(pve, i);
1467     v_pve.push_back(d);
1468     d = gsl_vector_get(se_pve, i);
1469     v_se_pve.push_back(d);
1470 
1471     d = gsl_vector_get(sigma2persnp, i);
1472     v_sigma2.push_back(d);
1473     d = gsl_vector_get(se_sigma2persnp, i);
1474     v_se_sigma2.push_back(d);
1475 
1476     d = gsl_vector_get(enrich, i);
1477     v_enrich.push_back(d);
1478     d = gsl_vector_get(se_enrich, i);
1479     v_se_enrich.push_back(d);
1480   }
1481 
1482   // Delete matrices.
1483   gsl_matrix_free(Si_mat);
1484   gsl_matrix_free(Var_mat);
1485   gsl_matrix_free(VarEnrich_mat);
1486   gsl_matrix_free(tmp_mat);
1487   gsl_matrix_free(tmp_mat1);
1488   gsl_matrix_free(qvar_mat);
1489 
1490   gsl_vector_free(pve);
1491   gsl_vector_free(pve_plus);
1492   gsl_vector_free(tmp);
1493   gsl_vector_free(sigma2persnp);
1494   gsl_vector_free(enrich);
1495   gsl_vector_free(se_pve);
1496   gsl_vector_free(se_sigma2persnp);
1497   gsl_vector_free(se_enrich);
1498 
1499   return;
1500 }
1501 
1502 // Ks are not scaled.
CalcVChe(const gsl_matrix * K,const gsl_matrix * W,const gsl_vector * y)1503 void VC::CalcVChe(const gsl_matrix *K, const gsl_matrix *W,
1504                   const gsl_vector *y) {
1505   size_t n1 = K->size1, n2 = K->size2;
1506   size_t n_vc = n2 / n1;
1507 
1508   double r = (double)n1 / (double)(n1 - W->size2);
1509   double var_y, var_y_new;
1510   double d, tr, s, v;
1511   vector<double> traceG_new;
1512 
1513   // New matrices/vectors.
1514   gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
1515   gsl_vector *y_scale = gsl_vector_alloc(n1);
1516   gsl_matrix *Kry = gsl_matrix_alloc(n1, n_vc);
1517   gsl_matrix *yKrKKry = gsl_matrix_alloc(n_vc, n_vc * (n_vc + 1));
1518   gsl_vector *KKry = gsl_vector_alloc(n1);
1519 
1520   // Old matrices/vectors.
1521   gsl_vector *pve = gsl_vector_alloc(n_vc);
1522   gsl_vector *se_pve = gsl_vector_alloc(n_vc);
1523   gsl_vector *q_vec = gsl_vector_alloc(n_vc);
1524   gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
1525   gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
1526   gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc);
1527   gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
1528   gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
1529 
1530   // Center and scale K by W.
1531   for (size_t i = 0; i < n_vc; i++) {
1532     gsl_matrix_view Kscale_sub =
1533         gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
1534     gsl_matrix_const_view K_sub =
1535         gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1);
1536     gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix);
1537 
1538     CenterMatrix(&Kscale_sub.matrix, W);
1539     d = ScaleMatrix(&Kscale_sub.matrix);
1540     traceG_new.push_back(d);
1541   }
1542 
1543   // Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1).
1544   gsl_vector_memcpy(y_scale, y);
1545   CenterVector(y_scale, W);
1546 
1547   var_y = VectorVar(y);
1548   var_y_new = VectorVar(y_scale);
1549 
1550   StandardizeVector(y_scale);
1551 
1552   // Compute Kry, which is used for confidence interval; also compute
1553   // q_vec (*n^2).
1554   for (size_t i = 0; i < n_vc; i++) {
1555     gsl_matrix_const_view Kscale_sub =
1556         gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
1557     gsl_vector_view Kry_col = gsl_matrix_column(Kry, i);
1558 
1559     gsl_vector_memcpy(&Kry_col.vector, y_scale);
1560     gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0 * r,
1561                    &Kry_col.vector);
1562 
1563     gsl_blas_ddot(&Kry_col.vector, y_scale, &d);
1564     gsl_vector_set(q_vec, i, d);
1565   }
1566 
1567   // Compute yKrKKry, which is used later for confidence interval.
1568   for (size_t i = 0; i < n_vc; i++) {
1569     gsl_vector_const_view Kry_coli = gsl_matrix_const_column(Kry, i);
1570     for (size_t j = i; j < n_vc; j++) {
1571       gsl_vector_const_view Kry_colj = gsl_matrix_const_column(Kry, j);
1572       for (size_t l = 0; l < n_vc; l++) {
1573         gsl_matrix_const_view Kscale_sub =
1574             gsl_matrix_const_submatrix(K_scale, 0, n1 * l, n1, n1);
1575         gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, &Kry_coli.vector,
1576                        0.0, KKry);
1577         gsl_blas_ddot(&Kry_colj.vector, KKry, &d);
1578         gsl_matrix_set(yKrKKry, i, l * n_vc + j, d);
1579         if (i != j) {
1580           gsl_matrix_set(yKrKKry, j, l * n_vc + i, d);
1581         }
1582       }
1583       gsl_blas_ddot(&Kry_coli.vector, &Kry_colj.vector, &d);
1584       gsl_matrix_set(yKrKKry, i, n_vc * n_vc + j, d);
1585       if (i != j) {
1586         gsl_matrix_set(yKrKKry, j, n_vc * n_vc + i, d);
1587       }
1588     }
1589   }
1590 
1591   // Compute Sij (*n^2).
1592   for (size_t i = 0; i < n_vc; i++) {
1593     for (size_t j = i; j < n_vc; j++) {
1594       tr = 0;
1595       for (size_t l = 0; l < n1; l++) {
1596         gsl_vector_const_view Ki_col =
1597             gsl_matrix_const_column(K_scale, i * n1 + l);
1598         gsl_vector_const_view Kj_col =
1599             gsl_matrix_const_column(K_scale, j * n1 + l);
1600         gsl_blas_ddot(&Ki_col.vector, &Kj_col.vector, &d);
1601         tr += d;
1602       }
1603 
1604       tr = tr - r * (double)n1;
1605       gsl_matrix_set(S_mat, i, j, tr);
1606       if (i != j) {
1607         gsl_matrix_set(S_mat, j, i, tr);
1608       }
1609     }
1610   }
1611 
1612   // Compute S^{-1}q.
1613   int sig;
1614   gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
1615   LUDecomp(S_mat, pmt, &sig);
1616   LUInvert(S_mat, pmt, Si_mat);
1617 
1618   // Compute pve (on the transformed scale).
1619   gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
1620 
1621   // Compute q_var (*n^4).
1622   gsl_matrix_set_zero(qvar_mat);
1623   s = 1;
1624   for (size_t i = 0; i < n_vc; i++) {
1625     d = gsl_vector_get(pve, i);
1626     gsl_matrix_view yKrKKry_sub =
1627         gsl_matrix_submatrix(yKrKKry, 0, i * n_vc, n_vc, n_vc);
1628     gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix);
1629     gsl_matrix_scale(tmp_mat, d);
1630     gsl_matrix_add(qvar_mat, tmp_mat);
1631     s -= d;
1632   }
1633   gsl_matrix_view yKrKKry_sub =
1634       gsl_matrix_submatrix(yKrKKry, 0, n_vc * n_vc, n_vc, n_vc);
1635   gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix);
1636   gsl_matrix_scale(tmp_mat, s);
1637   gsl_matrix_add(qvar_mat, tmp_mat);
1638 
1639   gsl_matrix_scale(qvar_mat, 2.0);
1640 
1641   // Compute S^{-1}var_qS^{-1}.
1642   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat, 0.0,
1643                  tmp_mat);
1644   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
1645                  Var_mat);
1646 
1647   // Transform pve back to the original scale and save data.
1648   v_pve.clear();
1649   v_se_pve.clear();
1650   v_sigma2.clear();
1651   v_se_sigma2.clear();
1652 
1653   s = 1.0, v = 0, pve_total = 0, se_pve_total = 0;
1654   for (size_t i = 0; i < n_vc; i++) {
1655     d = gsl_vector_get(pve, i);
1656     v_sigma2.push_back(d * var_y_new / traceG_new[i]);
1657     v_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y));
1658     s -= d;
1659     pve_total += d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y);
1660 
1661     d = sqrt(gsl_matrix_get(Var_mat, i, i));
1662     v_se_sigma2.push_back(d * var_y_new / traceG_new[i]);
1663     v_se_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y));
1664 
1665     for (size_t j = 0; j < n_vc; j++) {
1666       v += gsl_matrix_get(Var_mat, i, j);
1667       se_pve_total += gsl_matrix_get(Var_mat, i, j) *
1668                       (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y) *
1669                       (var_y_new / traceG_new[j]) * (v_traceG[j] / var_y);
1670     }
1671   }
1672   v_sigma2.push_back(s * r * var_y_new);
1673   v_se_sigma2.push_back(sqrt(v) * r * var_y_new);
1674   se_pve_total = sqrt(se_pve_total);
1675 
1676   cout << "sigma2 = ";
1677   for (size_t i = 0; i < n_vc + 1; i++) {
1678     cout << v_sigma2[i] << " ";
1679   }
1680   cout << endl;
1681 
1682   cout << "se(sigma2) = ";
1683   for (size_t i = 0; i < n_vc + 1; i++) {
1684     cout << v_se_sigma2[i] << " ";
1685   }
1686   cout << endl;
1687 
1688   cout << "pve = ";
1689   for (size_t i = 0; i < n_vc; i++) {
1690     cout << v_pve[i] << " ";
1691   }
1692   cout << endl;
1693 
1694   cout << "se(pve) = ";
1695   for (size_t i = 0; i < n_vc; i++) {
1696     cout << v_se_pve[i] << " ";
1697   }
1698   cout << endl;
1699 
1700   if (n_vc > 1) {
1701     cout << "total pve = " << pve_total << endl;
1702     cout << "se(total pve) = " << se_pve_total << endl;
1703   }
1704 
1705   gsl_permutation_free(pmt);
1706   gsl_matrix_free(K_scale);
1707   gsl_vector_free(y_scale);
1708   gsl_matrix_free(Kry);
1709   gsl_matrix_free(yKrKKry);
1710   gsl_vector_free(KKry);
1711 
1712   // Old matrices/vectors.
1713   gsl_vector_free(pve);
1714   gsl_vector_free(se_pve);
1715   gsl_vector_free(q_vec);
1716   gsl_matrix_free(qvar_mat);
1717   gsl_matrix_free(tmp_mat);
1718   gsl_matrix_free(S_mat);
1719   gsl_matrix_free(Si_mat);
1720   gsl_matrix_free(Var_mat);
1721 
1722   return;
1723 }
1724 
1725 // REML for log(sigma2) based on the AI algorithm.
CalcVCreml(bool noconstrain,const gsl_matrix * K,const gsl_matrix * W,const gsl_vector * y)1726 void VC::CalcVCreml(bool noconstrain, const gsl_matrix *K, const gsl_matrix *W,
1727                     const gsl_vector *y) {
1728   size_t n1 = K->size1, n2 = K->size2;
1729   size_t n_vc = n2 / n1;
1730   gsl_vector *log_sigma2 = gsl_vector_alloc(n_vc + 1);
1731   double d, s;
1732 
1733   // Set up params.
1734   gsl_matrix *P = gsl_matrix_alloc(n1, n1);
1735   gsl_vector *Py = gsl_vector_alloc(n1);
1736   gsl_matrix *KPy_mat = gsl_matrix_alloc(n1, n_vc + 1);
1737   gsl_matrix *PKPy_mat = gsl_matrix_alloc(n1, n_vc + 1);
1738   gsl_vector *dev1 = gsl_vector_alloc(n_vc + 1);
1739   gsl_matrix *dev2 = gsl_matrix_alloc(n_vc + 1, n_vc + 1);
1740   gsl_matrix *Hessian = gsl_matrix_alloc(n_vc + 1, n_vc + 1);
1741   VC_PARAM params = {K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain};
1742 
1743   // Initialize sigma2/log_sigma2.
1744   CalcVChe(K, W, y);
1745 
1746   gsl_blas_ddot(y, y, &s);
1747   s /= (double)n1;
1748   for (size_t i = 0; i < n_vc + 1; i++) {
1749     if (noconstrain) {
1750       d = v_sigma2[i];
1751     } else {
1752       if (v_sigma2[i] <= 0) {
1753         d = log(0.1);
1754       } else {
1755         d = log(v_sigma2[i]);
1756       }
1757     }
1758     gsl_vector_set(log_sigma2, i, d);
1759   }
1760 
1761   cout << "iteration " << 0 << endl;
1762   cout << "sigma2 = ";
1763   for (size_t i = 0; i < n_vc + 1; i++) {
1764     if (noconstrain) {
1765       cout << gsl_vector_get(log_sigma2, i) << " ";
1766     } else {
1767       cout << exp(gsl_vector_get(log_sigma2, i)) << " ";
1768     }
1769   }
1770   cout << endl;
1771 
1772   // Set up fdf.
1773   gsl_multiroot_function_fdf FDF;
1774   FDF.n = n_vc + 1;
1775   FDF.params = &params;
1776   FDF.f = &LogRL_dev1;
1777   FDF.df = &LogRL_dev2;
1778   FDF.fdf = &LogRL_dev12;
1779 
1780   // Set up solver.
1781   int status;
1782   int iter = 0, max_iter = 100;
1783 
1784   const gsl_multiroot_fdfsolver_type *T_fdf;
1785   gsl_multiroot_fdfsolver *s_fdf;
1786   T_fdf = gsl_multiroot_fdfsolver_hybridsj;
1787   s_fdf = gsl_multiroot_fdfsolver_alloc(T_fdf, n_vc + 1);
1788 
1789   gsl_multiroot_fdfsolver_set(s_fdf, &FDF, log_sigma2);
1790 
1791   do {
1792     iter++;
1793     status = gsl_multiroot_fdfsolver_iterate(s_fdf);
1794 
1795     if (status)
1796       break;
1797 
1798     cout << "iteration " << iter << endl;
1799     cout << "sigma2 = ";
1800     for (size_t i = 0; i < n_vc + 1; i++) {
1801       if (noconstrain) {
1802         cout << gsl_vector_get(s_fdf->x, i) << " ";
1803       } else {
1804         cout << exp(gsl_vector_get(s_fdf->x, i)) << " ";
1805       }
1806     }
1807     cout << endl;
1808     status = gsl_multiroot_test_residual(s_fdf->f, 1e-3);
1809   } while (status == GSL_CONTINUE && iter < max_iter);
1810 
1811   // Obtain Hessian and Hessian inverse.
1812   int sig = LogRL_dev12(s_fdf->x, &params, dev1, dev2);
1813 
1814   gsl_permutation *pmt = gsl_permutation_alloc(n_vc + 1);
1815   LUDecomp(dev2, pmt, &sig);
1816   LUInvert(dev2, pmt, Hessian);
1817   gsl_permutation_free(pmt);
1818 
1819   // Save sigma2 and se_sigma2.
1820   v_sigma2.clear();
1821   v_se_sigma2.clear();
1822   for (size_t i = 0; i < n_vc + 1; i++) {
1823     if (noconstrain) {
1824       d = gsl_vector_get(s_fdf->x, i);
1825     } else {
1826       d = exp(gsl_vector_get(s_fdf->x, i));
1827     }
1828     v_sigma2.push_back(d);
1829 
1830     if (noconstrain) {
1831       d = -1.0 * gsl_matrix_get(Hessian, i, i);
1832     } else {
1833       d = -1.0 * d * d * gsl_matrix_get(Hessian, i, i);
1834     }
1835     v_se_sigma2.push_back(sqrt(d));
1836   }
1837 
1838   s = 0;
1839   for (size_t i = 0; i < n_vc; i++) {
1840     s += v_traceG[i] * v_sigma2[i];
1841   }
1842   s += v_sigma2[n_vc];
1843 
1844   // Compute pve.
1845   v_pve.clear();
1846   pve_total = 0;
1847   for (size_t i = 0; i < n_vc; i++) {
1848     d = v_traceG[i] * v_sigma2[i] / s;
1849     v_pve.push_back(d);
1850     pve_total += d;
1851   }
1852 
1853   // Compute se_pve; k=n_vc+1: total.
1854   double d1, d2;
1855   v_se_pve.clear();
1856   se_pve_total = 0;
1857   for (size_t k = 0; k < n_vc + 1; k++) {
1858     d = 0;
1859     for (size_t i = 0; i < n_vc + 1; i++) {
1860       if (noconstrain) {
1861         d1 = gsl_vector_get(s_fdf->x, i);
1862         d1 = 1;
1863       } else {
1864         d1 = exp(gsl_vector_get(s_fdf->x, i));
1865       }
1866 
1867       if (k < n_vc) {
1868         if (i == k) {
1869           d1 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s);
1870         } else if (i == n_vc) {
1871           d1 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s);
1872         } else {
1873           d1 *= -1 * v_traceG[i] * v_traceG[k] * v_sigma2[k] / (s * s);
1874         }
1875       } else {
1876         if (i == k) {
1877           d1 *= -1 * (s - v_sigma2[n_vc]) / (s * s);
1878         } else {
1879           d1 *= v_traceG[i] * v_sigma2[n_vc] / (s * s);
1880         }
1881       }
1882 
1883       for (size_t j = 0; j < n_vc + 1; j++) {
1884         if (noconstrain) {
1885           d2 = gsl_vector_get(s_fdf->x, j);
1886           d2 = 1;
1887         } else {
1888           d2 = exp(gsl_vector_get(s_fdf->x, j));
1889         }
1890 
1891         if (k < n_vc) {
1892           if (j == k) {
1893             d2 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s);
1894           } else if (j == n_vc) {
1895             d2 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s);
1896           } else {
1897             d2 *= -1 * v_traceG[j] * v_traceG[k] * v_sigma2[k] / (s * s);
1898           }
1899         } else {
1900           if (j == k) {
1901             d2 *= -1 * (s - v_sigma2[n_vc]) / (s * s);
1902           } else {
1903             d2 *= v_traceG[j] * v_sigma2[n_vc] / (s * s);
1904           }
1905         }
1906 
1907         d += -1.0 * d1 * d2 * gsl_matrix_get(Hessian, i, j);
1908       }
1909     }
1910 
1911     if (k < n_vc) {
1912       v_se_pve.push_back(sqrt(d));
1913     } else {
1914       se_pve_total = sqrt(d);
1915     }
1916   }
1917 
1918   gsl_multiroot_fdfsolver_free(s_fdf);
1919 
1920   gsl_vector_free(log_sigma2);
1921   gsl_matrix_free(P);
1922   gsl_vector_free(Py);
1923   gsl_matrix_free(KPy_mat);
1924   gsl_matrix_free(PKPy_mat);
1925   gsl_vector_free(dev1);
1926   gsl_matrix_free(dev2);
1927   gsl_matrix_free(Hessian);
1928 
1929   return;
1930 }
1931 
1932 // Ks are not scaled.
CalcVCacl(const gsl_matrix * K,const gsl_matrix * W,const gsl_vector * y)1933 void VC::CalcVCacl(const gsl_matrix *K, const gsl_matrix *W,
1934                    const gsl_vector *y) {
1935   size_t n1 = K->size1, n2 = K->size2;
1936   size_t n_vc = n2 / n1;
1937 
1938   double d, y2_sum;
1939 
1940   // New matrices/vectors.
1941   gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
1942   gsl_vector *y_scale = gsl_vector_alloc(n1);
1943   gsl_vector *y2 = gsl_vector_alloc(n1);
1944   gsl_vector *n1_vec = gsl_vector_alloc(n1);
1945   gsl_matrix *Ay = gsl_matrix_alloc(n1, n_vc);
1946   gsl_matrix *K2 = gsl_matrix_alloc(n1, n_vc * n_vc);
1947   gsl_matrix *K_tmp = gsl_matrix_alloc(n1, n1);
1948   gsl_matrix *V_mat = gsl_matrix_alloc(n1, n1);
1949 
1950   // Old matrices/vectors.
1951   gsl_vector *pve = gsl_vector_alloc(n_vc);
1952   gsl_vector *se_pve = gsl_vector_alloc(n_vc);
1953   gsl_vector *q_vec = gsl_vector_alloc(n_vc);
1954   gsl_matrix *S1 = gsl_matrix_alloc(n_vc, n_vc);
1955   gsl_matrix *S2 = gsl_matrix_alloc(n_vc, n_vc);
1956   gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc);
1957   gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
1958   gsl_matrix *J_mat = gsl_matrix_alloc(n_vc, n_vc);
1959   gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
1960 
1961   int sig;
1962   gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
1963 
1964   // Center and scale K by W, and standardize K further so that all
1965   // diagonal elements are 1
1966   for (size_t i = 0; i < n_vc; i++) {
1967     gsl_matrix_view Kscale_sub =
1968         gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
1969     gsl_matrix_const_view K_sub =
1970         gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1);
1971     gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix);
1972 
1973     CenterMatrix(&Kscale_sub.matrix, W);
1974     StandardizeMatrix(&Kscale_sub.matrix);
1975   }
1976 
1977   // Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1)
1978   gsl_vector_memcpy(y_scale, y);
1979   CenterVector(y_scale, W);
1980 
1981   // Compute y^2 and sum(y^2), which is also the variance of y*n1.
1982   gsl_vector_memcpy(y2, y_scale);
1983   gsl_vector_mul(y2, y_scale);
1984 
1985   y2_sum = 0;
1986   for (size_t i = 0; i < y2->size; i++) {
1987     y2_sum += gsl_vector_get(y2, i);
1988   }
1989 
1990   // Compute the n_vc size q vector.
1991   for (size_t i = 0; i < n_vc; i++) {
1992     gsl_matrix_const_view Kscale_sub =
1993         gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
1994 
1995     gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, n1_vec);
1996 
1997     gsl_blas_ddot(n1_vec, y_scale, &d);
1998     gsl_vector_set(q_vec, i, d - y2_sum);
1999   }
2000 
2001   // Compute the n_vc by n_vc S1 and S2 matrix (and eventually
2002   // S=S1-\tau^{-1}S2).
2003   for (size_t i = 0; i < n_vc; i++) {
2004     gsl_matrix_const_view Kscale_sub1 =
2005         gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
2006 
2007     for (size_t j = i; j < n_vc; j++) {
2008       gsl_matrix_const_view Kscale_sub2 =
2009           gsl_matrix_const_submatrix(K_scale, 0, n1 * j, n1, n1);
2010 
2011       gsl_matrix_memcpy(K_tmp, &Kscale_sub1.matrix);
2012       gsl_matrix_mul_elements(K_tmp, &Kscale_sub2.matrix);
2013 
2014       gsl_vector_set_zero(n1_vec);
2015       for (size_t t = 0; t < K_tmp->size1; t++) {
2016         gsl_vector_view Ktmp_col = gsl_matrix_column(K_tmp, t);
2017         gsl_vector_add(n1_vec, &Ktmp_col.vector);
2018       }
2019       gsl_vector_add_constant(n1_vec, -1.0);
2020 
2021       // Compute S1.
2022       gsl_blas_ddot(n1_vec, y2, &d);
2023       gsl_matrix_set(S1, i, j, 2 * d);
2024       if (i != j) {
2025         gsl_matrix_set(S1, j, i, 2 * d);
2026       }
2027 
2028       // Compute S2.
2029       d = 0;
2030       for (size_t t = 0; t < n1_vec->size; t++) {
2031         d += gsl_vector_get(n1_vec, t);
2032       }
2033       gsl_matrix_set(S2, i, j, d);
2034       if (i != j) {
2035         gsl_matrix_set(S2, j, i, d);
2036       }
2037 
2038       // Save information to compute J.
2039       gsl_vector_view K2col1 = gsl_matrix_column(K2, n_vc * i + j);
2040       gsl_vector_view K2col2 = gsl_matrix_column(K2, n_vc * j + i);
2041 
2042       gsl_vector_memcpy(&K2col1.vector, n1_vec);
2043       if (i != j) {
2044         gsl_vector_memcpy(&K2col2.vector, n1_vec);
2045       }
2046     }
2047   }
2048 
2049   // Iterate to solve tau and h's.
2050   size_t it = 0;
2051   double s = 1;
2052   double tau_inv = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1));
2053   while (abs(s) > 1e-3 && it < 100) {
2054 
2055     // Update tau_inv.
2056     gsl_blas_ddot(q_vec, pve, &d);
2057     if (it > 0) {
2058       s = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1)) - tau_inv;
2059     }
2060     tau_inv = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1));
2061     if (it > 0) {
2062       s /= tau_inv;
2063     }
2064 
2065     // Update S.
2066     gsl_matrix_memcpy(S_mat, S2);
2067     gsl_matrix_scale(S_mat, -1 * tau_inv);
2068     gsl_matrix_add(S_mat, S1);
2069 
2070     // Update h=S^{-1}q.
2071     int sig;
2072     gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
2073     LUDecomp(S_mat, pmt, &sig);
2074     LUInvert(S_mat, pmt, Si_mat);
2075     gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
2076 
2077     it++;
2078   }
2079 
2080   // Compute V matrix and A matrix (K_scale is destroyed, so need to
2081   // compute V first).
2082   gsl_matrix_set_zero(V_mat);
2083   for (size_t i = 0; i < n_vc; i++) {
2084     gsl_matrix_view Kscale_sub =
2085         gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
2086 
2087     // Compute V.
2088     gsl_matrix_memcpy(K_tmp, &Kscale_sub.matrix);
2089     gsl_matrix_scale(K_tmp, gsl_vector_get(pve, i));
2090     gsl_matrix_add(V_mat, K_tmp);
2091 
2092     // Compute A; the corresponding Kscale is destroyed.
2093     gsl_matrix_const_view K2_sub =
2094         gsl_matrix_const_submatrix(K2, 0, n_vc * i, n1, n_vc);
2095     gsl_blas_dgemv(CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec);
2096 
2097     for (size_t t = 0; t < n1; t++) {
2098       gsl_matrix_set(K_scale, t, n1 * i + t, gsl_vector_get(n1_vec, t));
2099     }
2100 
2101     // Compute Ay.
2102     gsl_vector_view Ay_col = gsl_matrix_column(Ay, i);
2103     gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0,
2104                    &Ay_col.vector);
2105   }
2106   gsl_matrix_scale(V_mat, tau_inv);
2107 
2108   // Compute J matrix.
2109   for (size_t i = 0; i < n_vc; i++) {
2110     gsl_vector_view Ay_col1 = gsl_matrix_column(Ay, i);
2111     gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, &Ay_col1.vector, 0.0, n1_vec);
2112 
2113     for (size_t j = i; j < n_vc; j++) {
2114       gsl_vector_view Ay_col2 = gsl_matrix_column(Ay, j);
2115 
2116       gsl_blas_ddot(&Ay_col2.vector, n1_vec, &d);
2117       gsl_matrix_set(J_mat, i, j, 2.0 * d);
2118       if (i != j) {
2119         gsl_matrix_set(J_mat, j, i, 2.0 * d);
2120       }
2121     }
2122   }
2123 
2124   // Compute H^{-1}JH^{-1} as V(\hat h), where H=S2*tau_inv; this is
2125   // stored in Var_mat.
2126   gsl_matrix_memcpy(S_mat, S2);
2127   gsl_matrix_scale(S_mat, tau_inv);
2128 
2129   LUDecomp(S_mat, pmt, &sig);
2130   LUInvert(S_mat, pmt, Si_mat);
2131 
2132   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, J_mat, 0.0, S_mat);
2133   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, S_mat, Si_mat, 0.0, Var_mat);
2134 
2135   // Compute variance for tau_inv.
2136   gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, y_scale, 0.0, n1_vec);
2137   gsl_blas_ddot(y_scale, n1_vec, &d);
2138   // auto se_tau_inv = sqrt(2 * d) / (double)n1;  UNUSED
2139 
2140   // Transform pve back to the original scale and save data.
2141   v_pve.clear();
2142   v_se_pve.clear();
2143   v_sigma2.clear();
2144   v_se_sigma2.clear();
2145 
2146   pve_total = 0, se_pve_total = 0;
2147   for (size_t i = 0; i < n_vc; i++) {
2148     d = gsl_vector_get(pve, i);
2149     pve_total += d;
2150 
2151     v_pve.push_back(d);
2152     v_sigma2.push_back(d * tau_inv / v_traceG[i]);
2153 
2154     d = sqrt(gsl_matrix_get(Var_mat, i, i));
2155     v_se_pve.push_back(d);
2156     v_se_sigma2.push_back(d * tau_inv / v_traceG[i]);
2157 
2158     for (size_t j = 0; j < n_vc; j++) {
2159       se_pve_total += gsl_matrix_get(Var_mat, i, j);
2160     }
2161   }
2162   v_sigma2.push_back((1 - pve_total) * tau_inv);
2163   v_se_sigma2.push_back(sqrt(se_pve_total) * tau_inv);
2164   se_pve_total = sqrt(se_pve_total);
2165 
2166   cout << "sigma2 = ";
2167   for (size_t i = 0; i < n_vc + 1; i++) {
2168     cout << v_sigma2[i] << " ";
2169   }
2170   cout << endl;
2171 
2172   cout << "se(sigma2) = ";
2173   for (size_t i = 0; i < n_vc + 1; i++) {
2174     cout << v_se_sigma2[i] << " ";
2175   }
2176   cout << endl;
2177 
2178   cout << "pve = ";
2179   for (size_t i = 0; i < n_vc; i++) {
2180     cout << v_pve[i] << " ";
2181   }
2182   cout << endl;
2183 
2184   cout << "se(pve) = ";
2185   for (size_t i = 0; i < n_vc; i++) {
2186     cout << v_se_pve[i] << " ";
2187   }
2188   cout << endl;
2189 
2190   if (n_vc > 1) {
2191     cout << "total pve = " << pve_total << endl;
2192     cout << "se(total pve) = " << se_pve_total << endl;
2193   }
2194 
2195   gsl_permutation_free(pmt);
2196 
2197   gsl_matrix_free(K_scale);
2198   gsl_vector_free(y_scale);
2199   gsl_vector_free(y2);
2200   gsl_vector_free(n1_vec);
2201   gsl_matrix_free(Ay);
2202   gsl_matrix_free(K2);
2203   gsl_matrix_free(K_tmp);
2204   gsl_matrix_free(V_mat);
2205 
2206   gsl_vector_free(pve);
2207   gsl_vector_free(se_pve);
2208   gsl_vector_free(q_vec);
2209   gsl_matrix_free(S1);
2210   gsl_matrix_free(S2);
2211   gsl_matrix_free(S_mat);
2212   gsl_matrix_free(Si_mat);
2213   gsl_matrix_free(J_mat);
2214   gsl_matrix_free(Var_mat);
2215 
2216   return;
2217 }
2218 
2219 // Read bimbam mean genotype file and compute XWz.
BimbamXwz(const string & file_geno,const int display_pace,vector<int> & indicator_idv,vector<int> & indicator_snp,const vector<size_t> & vec_cat,const gsl_vector * w,const gsl_vector * z,size_t ns_test,gsl_matrix * XWz)2220 bool BimbamXwz(const string &file_geno, const int display_pace,
2221                vector<int> &indicator_idv, vector<int> &indicator_snp,
2222                const vector<size_t> &vec_cat, const gsl_vector *w,
2223                const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
2224   debug_msg("entering");
2225   igzstream infile(file_geno.c_str(), igzstream::in);
2226   if (!infile) {
2227     cout << "error reading genotype file:" << file_geno << endl;
2228     return false;
2229   }
2230 
2231   string line;
2232   char *ch_ptr;
2233 
2234   size_t n_miss;
2235   double d, geno_mean, geno_var;
2236 
2237   size_t ni_test = XWz->size1;
2238   gsl_vector *geno = gsl_vector_alloc(ni_test);
2239   gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
2240   gsl_vector *wz = gsl_vector_alloc(w->size);
2241   gsl_vector_memcpy(wz, z);
2242   gsl_vector_mul(wz, w);
2243 
2244   for (size_t t = 0; t < indicator_snp.size(); ++t) {
2245     safeGetline(infile, line).eof();
2246     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
2247       ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
2248     }
2249     if (indicator_snp[t] == 0) {
2250       continue;
2251     }
2252 
2253     ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
2254     ch_ptr = strtok_safe(NULL, " , \t");
2255     ch_ptr = strtok_safe(NULL, " , \t");
2256 
2257     geno_mean = 0.0;
2258     n_miss = 0;
2259     geno_var = 0.0;
2260     gsl_vector_set_all(geno_miss, 0);
2261 
2262     size_t j = 0;
2263     for (size_t i = 0; i < indicator_idv.size(); ++i) {
2264       if (indicator_idv[i] == 0) {
2265         continue;
2266       }
2267       ch_ptr = strtok_safe(NULL, " , \t");
2268       if (strcmp(ch_ptr, "NA") == 0) {
2269         gsl_vector_set(geno_miss, i, 0);
2270         n_miss++;
2271       } else {
2272         d = atof(ch_ptr);
2273         gsl_vector_set(geno, j, d);
2274         gsl_vector_set(geno_miss, j, 1);
2275         geno_mean += d;
2276         geno_var += d * d;
2277       }
2278       j++;
2279     }
2280 
2281     geno_mean /= (double)(ni_test - n_miss);
2282     geno_var += geno_mean * geno_mean * (double)n_miss;
2283     geno_var /= (double)ni_test;
2284     geno_var -= geno_mean * geno_mean;
2285 
2286     for (size_t i = 0; i < ni_test; ++i) {
2287       if (gsl_vector_get(geno_miss, i) == 0) {
2288         gsl_vector_set(geno, i, geno_mean);
2289       }
2290     }
2291 
2292     gsl_vector_add_constant(geno, -1.0 * geno_mean);
2293 
2294     gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]);
2295     d = gsl_vector_get(wz, ns_test);
2296     gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector);
2297 
2298     ns_test++;
2299   }
2300 
2301   cout << endl;
2302 
2303   gsl_vector_free(geno);
2304   gsl_vector_free(geno_miss);
2305   gsl_vector_free(wz);
2306 
2307   infile.close();
2308   infile.clear();
2309 
2310   return true;
2311 }
2312 
2313 // Read PLINK bed file and compute XWz.
PlinkXwz(const string & file_bed,const int display_pace,vector<int> & indicator_idv,vector<int> & indicator_snp,const vector<size_t> & vec_cat,const gsl_vector * w,const gsl_vector * z,size_t ns_test,gsl_matrix * XWz)2314 bool PlinkXwz(const string &file_bed, const int display_pace,
2315               vector<int> &indicator_idv, vector<int> &indicator_snp,
2316               const vector<size_t> &vec_cat, const gsl_vector *w,
2317               const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
2318   debug_msg("entering");
2319   ifstream infile(file_bed.c_str(), ios::binary);
2320   if (!infile) {
2321     cout << "error reading bed file:" << file_bed << endl;
2322     return false;
2323   }
2324 
2325   char ch[1];
2326   bitset<8> b;
2327 
2328   size_t n_miss, ci_total, ci_test;
2329   double d, geno_mean, geno_var;
2330 
2331   size_t ni_test = XWz->size1;
2332   size_t ni_total = indicator_idv.size();
2333   gsl_vector *geno = gsl_vector_alloc(ni_test);
2334   gsl_vector *wz = gsl_vector_alloc(w->size);
2335   gsl_vector_memcpy(wz, z);
2336   gsl_vector_mul(wz, w);
2337 
2338   int n_bit;
2339 
2340   // Calculate n_bit and c, the number of bit for each snp.
2341   if (ni_total % 4 == 0) {
2342     n_bit = ni_total / 4;
2343   } else {
2344     n_bit = ni_total / 4 + 1;
2345   }
2346 
2347   // Print the first three magic numbers.
2348   for (int i = 0; i < 3; ++i) {
2349     infile.read(ch, 1);
2350     b = ch[0];
2351   }
2352 
2353   for (size_t t = 0; t < indicator_snp.size(); ++t) {
2354     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
2355       ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
2356     }
2357     if (indicator_snp[t] == 0) {
2358       continue;
2359     }
2360 
2361     // n_bit, and 3 is the number of magic numbers.
2362     infile.seekg(t * n_bit + 3);
2363 
2364     // Read genotypes.
2365     geno_mean = 0.0;
2366     n_miss = 0;
2367     ci_total = 0;
2368     geno_var = 0.0;
2369     ci_test = 0;
2370     for (int i = 0; i < n_bit; ++i) {
2371       infile.read(ch, 1);
2372       b = ch[0];
2373 
2374       // Minor allele homozygous: 2.0; major: 0.0.
2375       for (size_t j = 0; j < 4; ++j) {
2376         if ((i == (n_bit - 1)) && ci_total == ni_total) {
2377           break;
2378         }
2379         if (indicator_idv[ci_total] == 0) {
2380           ci_total++;
2381           continue;
2382         }
2383 
2384         if (b[2 * j] == 0) {
2385           if (b[2 * j + 1] == 0) {
2386             gsl_vector_set(geno, ci_test, 2.0);
2387             geno_mean += 2.0;
2388             geno_var += 4.0;
2389           } else {
2390             gsl_vector_set(geno, ci_test, 1.0);
2391             geno_mean += 1.0;
2392             geno_var += 1.0;
2393           }
2394         } else {
2395           if (b[2 * j + 1] == 1) {
2396             gsl_vector_set(geno, ci_test, 0.0);
2397           } else {
2398             gsl_vector_set(geno, ci_test, -9.0);
2399             n_miss++;
2400           }
2401         }
2402 
2403         ci_test++;
2404         ci_total++;
2405       }
2406     }
2407 
2408     geno_mean /= (double)(ni_test - n_miss);
2409     geno_var += geno_mean * geno_mean * (double)n_miss;
2410     geno_var /= (double)ni_test;
2411     geno_var -= geno_mean * geno_mean;
2412 
2413     for (size_t i = 0; i < ni_test; ++i) {
2414       d = gsl_vector_get(geno, i);
2415       if (d == -9.0) {
2416         gsl_vector_set(geno, i, geno_mean);
2417       }
2418     }
2419 
2420     gsl_vector_add_constant(geno, -1.0 * geno_mean);
2421 
2422     gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]);
2423     d = gsl_vector_get(wz, ns_test);
2424     gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector);
2425 
2426     ns_test++;
2427   }
2428   cout << endl;
2429 
2430   gsl_vector_free(geno);
2431   gsl_vector_free(wz);
2432 
2433   infile.close();
2434   infile.clear();
2435 
2436   return true;
2437 }
2438 
2439 // Read multiple genotype files and compute XWz.
MFILEXwz(const size_t mfile_mode,const string & file_mfile,const int display_pace,vector<int> & indicator_idv,vector<vector<int>> & mindicator_snp,const vector<size_t> & vec_cat,const gsl_vector * w,const gsl_vector * z,gsl_matrix * XWz)2440 bool MFILEXwz(const size_t mfile_mode, const string &file_mfile,
2441               const int display_pace, vector<int> &indicator_idv,
2442               vector<vector<int>> &mindicator_snp,
2443               const vector<size_t> &vec_cat, const gsl_vector *w,
2444               const gsl_vector *z, gsl_matrix *XWz) {
2445   debug_msg("entering");
2446   gsl_matrix_set_zero(XWz);
2447 
2448   igzstream infile(file_mfile.c_str(), igzstream::in);
2449   if (!infile) {
2450     cout << "error! fail to open mfile file: " << file_mfile << endl;
2451     return false;
2452   }
2453 
2454   string file_name;
2455   size_t l = 0, ns_test = 0;
2456 
2457   while (!safeGetline(infile, file_name).eof()) {
2458     if (mfile_mode == 1) {
2459       file_name += ".bed";
2460       PlinkXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
2461                vec_cat, w, z, ns_test, XWz);
2462     } else {
2463       BimbamXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
2464                 vec_cat, w, z, ns_test, XWz);
2465     }
2466 
2467     l++;
2468   }
2469 
2470   infile.close();
2471   infile.clear();
2472 
2473   return true;
2474 }
2475 
2476 // Read bimbam mean genotype file and compute X_i^TX_jWz.
BimbamXtXwz(const string & file_geno,const int display_pace,vector<int> & indicator_idv,vector<int> & indicator_snp,const gsl_matrix * XWz,size_t ns_test,gsl_matrix * XtXWz)2477 bool BimbamXtXwz(const string &file_geno, const int display_pace,
2478                  vector<int> &indicator_idv, vector<int> &indicator_snp,
2479                  const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
2480   debug_msg("entering");
2481   igzstream infile(file_geno.c_str(), igzstream::in);
2482   if (!infile) {
2483     cout << "error reading genotype file:" << file_geno << endl;
2484     return false;
2485   }
2486 
2487   string line;
2488   char *ch_ptr;
2489 
2490   size_t n_miss;
2491   double d, geno_mean, geno_var;
2492 
2493   size_t ni_test = XWz->size1;
2494   gsl_vector *geno = gsl_vector_alloc(ni_test);
2495   gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
2496 
2497   for (size_t t = 0; t < indicator_snp.size(); ++t) {
2498     safeGetline(infile, line).eof();
2499     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
2500       ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
2501     }
2502     if (indicator_snp[t] == 0) {
2503       continue;
2504     }
2505 
2506     ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
2507     ch_ptr = strtok_safe(NULL, " , \t");
2508     ch_ptr = strtok_safe(NULL, " , \t");
2509 
2510     geno_mean = 0.0;
2511     n_miss = 0;
2512     geno_var = 0.0;
2513     gsl_vector_set_all(geno_miss, 0);
2514 
2515     size_t j = 0;
2516     for (size_t i = 0; i < indicator_idv.size(); ++i) {
2517       if (indicator_idv[i] == 0) {
2518         continue;
2519       }
2520       ch_ptr = strtok_safe(NULL, " , \t");
2521       if (strcmp(ch_ptr, "NA") == 0) {
2522         gsl_vector_set(geno_miss, i, 0);
2523         n_miss++;
2524       } else {
2525         d = atof(ch_ptr);
2526         gsl_vector_set(geno, j, d);
2527         gsl_vector_set(geno_miss, j, 1);
2528         geno_mean += d;
2529         geno_var += d * d;
2530       }
2531       j++;
2532     }
2533 
2534     geno_mean /= (double)(ni_test - n_miss);
2535     geno_var += geno_mean * geno_mean * (double)n_miss;
2536     geno_var /= (double)ni_test;
2537     geno_var -= geno_mean * geno_mean;
2538 
2539     for (size_t i = 0; i < ni_test; ++i) {
2540       if (gsl_vector_get(geno_miss, i) == 0) {
2541         gsl_vector_set(geno, i, geno_mean);
2542       }
2543     }
2544 
2545     gsl_vector_add_constant(geno, -1.0 * geno_mean);
2546 
2547     for (size_t i = 0; i < XWz->size2; i++) {
2548       gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i);
2549       gsl_blas_ddot(geno, &XWz_col.vector, &d);
2550       gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var));
2551     }
2552 
2553     ns_test++;
2554   }
2555 
2556   cout << endl;
2557 
2558   gsl_vector_free(geno);
2559   gsl_vector_free(geno_miss);
2560 
2561   infile.close();
2562   infile.clear();
2563 
2564   return true;
2565 }
2566 
2567 // Read PLINK bed file and compute XWz.
PlinkXtXwz(const string & file_bed,const int display_pace,vector<int> & indicator_idv,vector<int> & indicator_snp,const gsl_matrix * XWz,size_t ns_test,gsl_matrix * XtXWz)2568 bool PlinkXtXwz(const string &file_bed, const int display_pace,
2569                 vector<int> &indicator_idv, vector<int> &indicator_snp,
2570                 const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
2571   debug_msg("entering");
2572   ifstream infile(file_bed.c_str(), ios::binary);
2573   if (!infile) {
2574     cout << "error reading bed file:" << file_bed << endl;
2575     return false;
2576   }
2577 
2578   char ch[1];
2579   bitset<8> b;
2580 
2581   size_t n_miss, ci_total, ci_test;
2582   double d, geno_mean, geno_var;
2583 
2584   size_t ni_test = XWz->size1;
2585   size_t ni_total = indicator_idv.size();
2586   gsl_vector *geno = gsl_vector_alloc(ni_test);
2587 
2588   int n_bit;
2589 
2590   // Calculate n_bit and c, the number of bit for each snp.
2591   if (ni_total % 4 == 0) {
2592     n_bit = ni_total / 4;
2593   } else {
2594     n_bit = ni_total / 4 + 1;
2595   }
2596 
2597   // Print the first three magic numbers.
2598   for (int i = 0; i < 3; ++i) {
2599     infile.read(ch, 1);
2600     b = ch[0];
2601   }
2602 
2603   for (size_t t = 0; t < indicator_snp.size(); ++t) {
2604     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
2605       ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
2606     }
2607     if (indicator_snp[t] == 0) {
2608       continue;
2609     }
2610 
2611     // n_bit, and 3 is the number of magic numbers.
2612     infile.seekg(t * n_bit + 3);
2613 
2614     // Read genotypes.
2615     geno_mean = 0.0;
2616     n_miss = 0;
2617     ci_total = 0;
2618     geno_var = 0.0;
2619     ci_test = 0;
2620     for (int i = 0; i < n_bit; ++i) {
2621       infile.read(ch, 1);
2622       b = ch[0];
2623 
2624       // Minor allele homozygous: 2.0; major: 0.0;
2625       for (size_t j = 0; j < 4; ++j) {
2626         if ((i == (n_bit - 1)) && ci_total == ni_total) {
2627           break;
2628         }
2629         if (indicator_idv[ci_total] == 0) {
2630           ci_total++;
2631           continue;
2632         }
2633 
2634         if (b[2 * j] == 0) {
2635           if (b[2 * j + 1] == 0) {
2636             gsl_vector_set(geno, ci_test, 2.0);
2637             geno_mean += 2.0;
2638             geno_var += 4.0;
2639           } else {
2640             gsl_vector_set(geno, ci_test, 1.0);
2641             geno_mean += 1.0;
2642             geno_var += 1.0;
2643           }
2644         } else {
2645           if (b[2 * j + 1] == 1) {
2646             gsl_vector_set(geno, ci_test, 0.0);
2647           } else {
2648             gsl_vector_set(geno, ci_test, -9.0);
2649             n_miss++;
2650           }
2651         }
2652 
2653         ci_test++;
2654         ci_total++;
2655       }
2656     }
2657 
2658     geno_mean /= (double)(ni_test - n_miss);
2659     geno_var += geno_mean * geno_mean * (double)n_miss;
2660     geno_var /= (double)ni_test;
2661     geno_var -= geno_mean * geno_mean;
2662 
2663     for (size_t i = 0; i < ni_test; ++i) {
2664       d = gsl_vector_get(geno, i);
2665       if (d == -9.0) {
2666         gsl_vector_set(geno, i, geno_mean);
2667       }
2668     }
2669 
2670     gsl_vector_add_constant(geno, -1.0 * geno_mean);
2671 
2672     for (size_t i = 0; i < XWz->size2; i++) {
2673       gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i);
2674       gsl_blas_ddot(geno, &XWz_col.vector, &d);
2675       gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var));
2676     }
2677 
2678     ns_test++;
2679   }
2680   cout << endl;
2681 
2682   gsl_vector_free(geno);
2683 
2684   infile.close();
2685   infile.clear();
2686 
2687   return true;
2688 }
2689 
2690 // Read multiple genotype files and compute XWz.
MFILEXtXwz(const size_t mfile_mode,const string & file_mfile,const int display_pace,vector<int> & indicator_idv,vector<vector<int>> & mindicator_snp,const gsl_matrix * XWz,gsl_matrix * XtXWz)2691 bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile,
2692                 const int display_pace, vector<int> &indicator_idv,
2693                 vector<vector<int>> &mindicator_snp, const gsl_matrix *XWz,
2694                 gsl_matrix *XtXWz) {
2695   debug_msg("entering");
2696   gsl_matrix_set_zero(XtXWz);
2697 
2698   igzstream infile(file_mfile.c_str(), igzstream::in);
2699   if (!infile) {
2700     cout << "error! fail to open mfile file: " << file_mfile << endl;
2701     return false;
2702   }
2703 
2704   string file_name;
2705   size_t l = 0, ns_test = 0;
2706 
2707   while (!safeGetline(infile, file_name).eof()) {
2708     if (mfile_mode == 1) {
2709       file_name += ".bed";
2710       PlinkXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l], XWz,
2711                  ns_test, XtXWz);
2712     } else {
2713       BimbamXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
2714                   XWz, ns_test, XtXWz);
2715     }
2716 
2717     l++;
2718   }
2719 
2720   infile.close();
2721   infile.clear();
2722 
2723   return true;
2724 }
2725 
2726 // Compute confidence intervals from summary statistics.
CalcCIss(const gsl_matrix * Xz,const gsl_matrix * XWz,const gsl_matrix * XtXWz,const gsl_matrix * S_mat,const gsl_matrix * Svar_mat,const gsl_vector * w,const gsl_vector * z,const gsl_vector * s_vec,const vector<size_t> & vec_cat,const vector<double> & v_pve,vector<double> & v_se_pve,double & pve_total,double & se_pve_total,vector<double> & v_sigma2,vector<double> & v_se_sigma2,vector<double> & v_enrich,vector<double> & v_se_enrich)2727 void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz,
2728               const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
2729               const gsl_matrix *Svar_mat, const gsl_vector *w,
2730               const gsl_vector *z, const gsl_vector *s_vec,
2731               const vector<size_t> &vec_cat, const vector<double> &v_pve,
2732               vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
2733               vector<double> &v_sigma2, vector<double> &v_se_sigma2,
2734               vector<double> &v_enrich, vector<double> &v_se_enrich) {
2735   size_t n_vc = XWz->size2, ns_test = w->size, ni_test = XWz->size1;
2736 
2737   // Set up matrices.
2738   gsl_vector *w_pve = gsl_vector_alloc(ns_test);
2739   gsl_vector *wz = gsl_vector_alloc(ns_test);
2740   gsl_vector *zwz = gsl_vector_alloc(n_vc);
2741   gsl_vector *zz = gsl_vector_alloc(n_vc);
2742   gsl_vector *Xz_pve = gsl_vector_alloc(ni_test);
2743   gsl_vector *WXtXWz = gsl_vector_alloc(ns_test);
2744 
2745   gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
2746   gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
2747   gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
2748   gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc);
2749   gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc);
2750   gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
2751 
2752   double d, s0, s1, s, s_pve, s_snp;
2753 
2754   // Compute wz and zwz.
2755   gsl_vector_memcpy(wz, z);
2756   gsl_vector_mul(wz, w);
2757 
2758   gsl_vector_set_zero(zwz);
2759   gsl_vector_set_zero(zz);
2760   for (size_t i = 0; i < w->size; i++) {
2761     d = gsl_vector_get(wz, i) * gsl_vector_get(z, i);
2762     d += gsl_vector_get(zwz, vec_cat[i]);
2763     gsl_vector_set(zwz, vec_cat[i], d);
2764 
2765     d = gsl_vector_get(z, i) * gsl_vector_get(z, i);
2766     d += gsl_vector_get(zz, vec_cat[i]);
2767     gsl_vector_set(zz, vec_cat[i], d);
2768   }
2769 
2770   // Compute wz, ve and Xz_pve.
2771   gsl_vector_set_zero(Xz_pve);
2772   s_pve = 0;
2773   s_snp = 0;
2774   for (size_t i = 0; i < n_vc; i++) {
2775     s_pve += v_pve[i];
2776     s_snp += gsl_vector_get(s_vec, i);
2777 
2778     gsl_vector_const_view Xz_col = gsl_matrix_const_column(Xz, i);
2779     gsl_blas_daxpy(v_pve[i] / gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve);
2780   }
2781 
2782   // Set up wpve vector.
2783   for (size_t i = 0; i < w->size; i++) {
2784     d = v_pve[vec_cat[i]] / gsl_vector_get(s_vec, vec_cat[i]);
2785     gsl_vector_set(w_pve, i, d);
2786   }
2787 
2788   // Compute Vq (in qvar_mat).
2789   s0 = 1 - s_pve;
2790   for (size_t i = 0; i < n_vc; i++) {
2791     s0 += gsl_vector_get(zz, i) * v_pve[i] / gsl_vector_get(s_vec, i);
2792   }
2793 
2794   for (size_t i = 0; i < n_vc; i++) {
2795     s1 = s0;
2796     s1 -= gsl_vector_get(zwz, i) * (1 - s_pve) / gsl_vector_get(s_vec, i);
2797 
2798     gsl_vector_const_view XWz_col1 = gsl_matrix_const_column(XWz, i);
2799     gsl_vector_const_view XtXWz_col1 = gsl_matrix_const_column(XtXWz, i);
2800 
2801     gsl_vector_memcpy(WXtXWz, &XtXWz_col1.vector);
2802     gsl_vector_mul(WXtXWz, w_pve);
2803 
2804     gsl_blas_ddot(Xz_pve, &XWz_col1.vector, &d);
2805     s1 -= d / gsl_vector_get(s_vec, i);
2806 
2807     for (size_t j = 0; j < n_vc; j++) {
2808       s = s1;
2809 
2810       s -= gsl_vector_get(zwz, j) * (1 - s_pve) / gsl_vector_get(s_vec, j);
2811 
2812       gsl_vector_const_view XWz_col2 = gsl_matrix_const_column(XWz, j);
2813       gsl_vector_const_view XtXWz_col2 = gsl_matrix_const_column(XtXWz, j);
2814 
2815       gsl_blas_ddot(WXtXWz, &XtXWz_col2.vector, &d);
2816       s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j));
2817 
2818       gsl_blas_ddot(&XWz_col1.vector, &XWz_col2.vector, &d);
2819       s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j)) *
2820            (1 - s_pve);
2821 
2822       gsl_blas_ddot(Xz_pve, &XWz_col2.vector, &d);
2823       s -= d / gsl_vector_get(s_vec, j);
2824 
2825       gsl_matrix_set(qvar_mat, i, j, s);
2826     }
2827   }
2828 
2829   d = (double)(ni_test - 1);
2830   gsl_matrix_scale(qvar_mat, 2.0 / (d * d * d));
2831 
2832   // Calculate S^{-1}.
2833   gsl_matrix_memcpy(tmp_mat, S_mat);
2834   int sig;
2835   gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
2836   LUDecomp(tmp_mat, pmt, &sig);
2837   LUInvert(tmp_mat, pmt, Si_mat);
2838 
2839   // Calculate variance for the estimates.
2840   for (size_t i = 0; i < n_vc; i++) {
2841     for (size_t j = i; j < n_vc; j++) {
2842       d = gsl_matrix_get(Svar_mat, i, j);
2843       d *= v_pve[i] * v_pve[j];
2844 
2845       d += gsl_matrix_get(qvar_mat, i, j);
2846       gsl_matrix_set(Var_mat, i, j, d);
2847       if (i != j) {
2848         gsl_matrix_set(Var_mat, j, i, d);
2849       }
2850     }
2851   }
2852 
2853   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0,
2854                  tmp_mat);
2855   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
2856                  Var_mat);
2857 
2858   // Compute sigma2 per snp, enrich.
2859   v_sigma2.clear();
2860   v_enrich.clear();
2861   for (size_t i = 0; i < n_vc; i++) {
2862     v_sigma2.push_back(v_pve[i] / gsl_vector_get(s_vec, i));
2863     v_enrich.push_back(v_pve[i] / gsl_vector_get(s_vec, i) * s_snp / s_pve);
2864   }
2865 
2866   // Compute se_pve, se_sigma2.
2867   for (size_t i = 0; i < n_vc; i++) {
2868     d = sqrt(gsl_matrix_get(Var_mat, i, i));
2869     v_se_pve.push_back(d);
2870     v_se_sigma2.push_back(d / gsl_vector_get(s_vec, i));
2871   }
2872 
2873   // Compute pve_total, se_pve_total.
2874   pve_total = 0;
2875   for (size_t i = 0; i < n_vc; i++) {
2876     pve_total += v_pve[i];
2877   }
2878 
2879   se_pve_total = 0;
2880   for (size_t i = 0; i < n_vc; i++) {
2881     for (size_t j = 0; j < n_vc; j++) {
2882       se_pve_total += gsl_matrix_get(Var_mat, i, j);
2883     }
2884   }
2885   se_pve_total = sqrt(se_pve_total);
2886 
2887   // Compute se_enrich.
2888   gsl_matrix_set_identity(tmp_mat);
2889 
2890   double d1;
2891   for (size_t i = 0; i < n_vc; i++) {
2892     d = v_pve[i] / s_pve;
2893     d1 = gsl_vector_get(s_vec, i);
2894     for (size_t j = 0; j < n_vc; j++) {
2895       if (i == j) {
2896         gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve);
2897       } else {
2898         gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve);
2899       }
2900     }
2901   }
2902   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0,
2903                  tmp_mat1);
2904   gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0,
2905                  VarEnrich_mat);
2906 
2907   for (size_t i = 0; i < n_vc; i++) {
2908     d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
2909     v_se_enrich.push_back(d);
2910   }
2911 
2912   cout << "pve = ";
2913   for (size_t i = 0; i < n_vc; i++) {
2914     cout << v_pve[i] << " ";
2915   }
2916   cout << endl;
2917 
2918   cout << "se(pve) = ";
2919   for (size_t i = 0; i < n_vc; i++) {
2920     cout << v_se_pve[i] << " ";
2921   }
2922   cout << endl;
2923 
2924   cout << "sigma2 per snp = ";
2925   for (size_t i = 0; i < n_vc; i++) {
2926     cout << v_sigma2[i] << " ";
2927   }
2928   cout << endl;
2929 
2930   cout << "se(sigma2 per snp) = ";
2931   for (size_t i = 0; i < n_vc; i++) {
2932     cout << v_se_sigma2[i] << " ";
2933   }
2934   cout << endl;
2935 
2936   cout << "enrichment = ";
2937   for (size_t i = 0; i < n_vc; i++) {
2938     cout << v_enrich[i] << " ";
2939   }
2940   cout << endl;
2941 
2942   cout << "se(enrichment) = ";
2943   for (size_t i = 0; i < n_vc; i++) {
2944     cout << v_se_enrich[i] << " ";
2945   }
2946   cout << endl;
2947 
2948   // Delete matrices.
2949   gsl_matrix_free(Si_mat);
2950   gsl_matrix_free(Var_mat);
2951   gsl_matrix_free(VarEnrich_mat);
2952   gsl_matrix_free(tmp_mat);
2953   gsl_matrix_free(tmp_mat1);
2954   gsl_matrix_free(qvar_mat);
2955 
2956   gsl_vector_free(w_pve);
2957   gsl_vector_free(wz);
2958   gsl_vector_free(zwz);
2959   gsl_vector_free(WXtXWz);
2960   gsl_vector_free(Xz_pve);
2961 
2962   return;
2963 }
2964