1 /*
2 Genome-wide Efficient Mixed Model Association (GEMMA)
3 Copyright (C) 2011-2017, Xiang Zhou
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include <fstream>
20 #include <iostream>
21 #include <sstream>
22
23 #include <bitset>
24 #include <cmath>
25 #include <cstring>
26 #include <iomanip>
27 #include <iostream>
28 #include <map>
29 #include <set>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string>
33 #include <vector>
34
35 #include "gsl/gsl_blas.h"
36 #include "gsl/gsl_linalg.h"
37 #include "gsl/gsl_matrix.h"
38 #include "gsl/gsl_vector.h"
39
40 #include "gsl/gsl_cdf.h"
41 #include "gsl/gsl_min.h"
42 #include "gsl/gsl_multiroots.h"
43
44 // #include "Eigen/Dense"
45
46 #include "gzstream.h"
47 #include "gemma_io.h"
48 #include "lapack.h"
49 #include "lmm.h"
50 #include "mathfunc.h"
51 #include "param.h"
52 #include "vc.h"
53 #include "fastblas.h"
54
55 using namespace std;
56 // using namespace Eigen;
57
58 // In this file, X, Y are already transformed (i.e. UtX and UtY).
CopyFromParam(PARAM & cPar)59 void VC::CopyFromParam(PARAM &cPar) {
60 a_mode = cPar.a_mode;
61
62 file_cat = cPar.file_cat;
63 file_beta = cPar.file_beta;
64 file_cor = cPar.file_cor;
65
66 setSnps = cPar.setSnps;
67
68 file_out = cPar.file_out;
69 path_out = cPar.path_out;
70
71 time_UtX = 0.0;
72 time_opt = 0.0;
73
74 v_traceG = cPar.v_traceG;
75
76 ni_total = cPar.ni_total;
77 ns_total = cPar.ns_total;
78 ns_test = cPar.ns_test;
79
80 crt = cPar.crt;
81 window_cm = cPar.window_cm;
82 window_bp = cPar.window_bp;
83 window_ns = cPar.window_ns;
84
85 n_vc = cPar.n_vc;
86
87 return;
88 }
89
CopyToParam(PARAM & cPar)90 void VC::CopyToParam(PARAM &cPar) {
91 cPar.time_UtX = time_UtX;
92 cPar.time_opt = time_opt;
93
94 cPar.v_pve = v_pve;
95 cPar.v_se_pve = v_se_pve;
96 cPar.v_sigma2 = v_sigma2;
97 cPar.v_se_sigma2 = v_se_sigma2;
98 cPar.pve_total = pve_total;
99 cPar.se_pve_total = se_pve_total;
100 cPar.v_traceG = v_traceG;
101
102 cPar.v_beta = v_beta;
103 cPar.v_se_beta = v_se_beta;
104
105 cPar.ni_total = ni_total;
106 cPar.ns_total = ns_total;
107 cPar.ns_test = ns_test;
108
109 cPar.n_vc = n_vc;
110
111 return;
112 }
113
WriteFile_qs(const gsl_vector * s_vec,const gsl_vector * q_vec,const gsl_vector * qvar_vec,const gsl_matrix * S_mat,const gsl_matrix * Svar_mat)114 void VC::WriteFile_qs(const gsl_vector *s_vec, const gsl_vector *q_vec,
115 const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
116 const gsl_matrix *Svar_mat) {
117 string file_str;
118 file_str = path_out + "/" + file_out;
119 file_str += ".qvec.txt";
120
121 ofstream outfile_q(file_str.c_str(), ofstream::out);
122 if (!outfile_q) {
123 cout << "error writing file: " << file_str.c_str() << endl;
124 return;
125 }
126
127 for (size_t i = 0; i < s_vec->size; i++) {
128 outfile_q << gsl_vector_get(s_vec, i) << endl;
129 }
130 for (size_t i = 0; i < q_vec->size; i++) {
131 outfile_q << gsl_vector_get(q_vec, i) << endl;
132 }
133 for (size_t i = 0; i < qvar_vec->size; i++) {
134 outfile_q << gsl_vector_get(qvar_vec, i) << endl;
135 }
136
137 outfile_q.clear();
138 outfile_q.close();
139
140 file_str = path_out + "/" + file_out;
141 file_str += ".smat.txt";
142
143 ofstream outfile_s(file_str.c_str(), ofstream::out);
144 if (!outfile_s) {
145 cout << "error writing file: " << file_str.c_str() << endl;
146 return;
147 }
148
149 for (size_t i = 0; i < S_mat->size1; i++) {
150 for (size_t j = 0; j < S_mat->size2; j++) {
151 outfile_s << gsl_matrix_get(S_mat, i, j) << "\t";
152 }
153 outfile_s << endl;
154 }
155 for (size_t i = 0; i < Svar_mat->size1; i++) {
156 for (size_t j = 0; j < Svar_mat->size2; j++) {
157 outfile_s << gsl_matrix_get(Svar_mat, i, j) << "\t";
158 }
159 outfile_s << endl;
160 }
161
162 outfile_s.clear();
163 outfile_s.close();
164
165 return;
166 }
167
UpdateParam(const gsl_vector * log_sigma2,VC_PARAM * p)168 void UpdateParam(const gsl_vector *log_sigma2, VC_PARAM *p) {
169 size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1, n_cvt = (p->W)->size2;
170
171 gsl_matrix *K_temp = gsl_matrix_alloc(n1, n1);
172 gsl_matrix *HiW = gsl_matrix_alloc(n1, n_cvt);
173 gsl_matrix *WtHiW = gsl_matrix_alloc(n_cvt, n_cvt);
174 gsl_matrix *WtHiWi = gsl_matrix_alloc(n_cvt, n_cvt);
175 gsl_matrix *WtHiWiWtHi = gsl_matrix_alloc(n_cvt, n1);
176
177 double sigma2;
178
179 // Calculate H = \sum_i^{k+1} \sigma_i^2 K_i.
180 gsl_matrix_set_zero(p->P);
181 for (size_t i = 0; i < n_vc + 1; i++) {
182 if (i == n_vc) {
183 gsl_matrix_set_identity(K_temp);
184 } else {
185 gsl_matrix_const_view K_sub =
186 gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1);
187 gsl_matrix_memcpy(K_temp, &K_sub.matrix);
188 }
189
190 // When unconstrained, update on sigma2 instead of log_sigma2.
191 if (p->noconstrain) {
192 sigma2 = gsl_vector_get(log_sigma2, i);
193 } else {
194 sigma2 = exp(gsl_vector_get(log_sigma2, i));
195 }
196 gsl_matrix_scale(K_temp, sigma2);
197 gsl_matrix_add(p->P, K_temp);
198 }
199
200 // Calculate H^{-1}.
201 fast_inverse(p->P);
202
203 fast_dgemm("N", "N", 1.0, p->P, p->W, 0.0, HiW);
204 fast_dgemm("T", "N", 1.0, p->W, HiW, 0.0, WtHiW);
205
206 fast_inverse(WtHiW);
207 gsl_matrix_memcpy(WtHiWi, WtHiW);
208
209 fast_dgemm("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
210 fast_dgemm("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
211
212 // Calculate Py, KPy, PKPy.
213 gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py);
214
215 double d;
216 for (size_t i = 0; i < n_vc + 1; i++) {
217 gsl_vector_view KPy = gsl_matrix_column(p->KPy_mat, i);
218 gsl_vector_view PKPy = gsl_matrix_column(p->PKPy_mat, i);
219
220 if (i == n_vc) {
221 gsl_vector_memcpy(&KPy.vector, p->Py);
222 } else {
223 gsl_matrix_const_view K_sub =
224 gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1);
225
226 // Seems to be important to use gsl dgemv here instead of
227 // fast_dgemv; otherwise.
228 gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector);
229 }
230
231 gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector);
232
233 // When phenotypes are not normalized well, then some values in
234 // the following matrix maybe NaN; change that to 0; this seems to
235 // only happen when fast_dgemv was used above.
236 for (size_t j = 0; j < p->KPy_mat->size1; j++) {
237 d = gsl_matrix_get(p->KPy_mat, j, i);
238 if (isnan(d)) {
239 gsl_matrix_set(p->KPy_mat, j, i, 0);
240 cout << "nan appears in " << i << " " << j << endl;
241 }
242 d = gsl_matrix_get(p->PKPy_mat, j, i);
243 if (isnan(d)) {
244 gsl_matrix_set(p->PKPy_mat, j, i, 0);
245 cout << "nan appears in " << i << " " << j << endl;
246 }
247 }
248 }
249
250 gsl_matrix_free(K_temp);
251 gsl_matrix_free(HiW);
252 gsl_matrix_free(WtHiW);
253 gsl_matrix_free(WtHiWi);
254 gsl_matrix_free(WtHiWiWtHi);
255
256 return;
257 }
258
259 // Below are functions for AI algorithm.
LogRL_dev1(const gsl_vector * log_sigma2,void * params,gsl_vector * dev1)260 int LogRL_dev1(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) {
261 VC_PARAM *p = (VC_PARAM *)params;
262
263 size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1;
264
265 double tr, d;
266
267 // Update parameters.
268 UpdateParam(log_sigma2, p);
269
270 // Calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy.
271 for (size_t i = 0; i < n_vc + 1; i++) {
272 if (i == n_vc) {
273 tr = 0;
274 for (size_t l = 0; l < n1; l++) {
275 tr += gsl_matrix_get(p->P, l, l);
276 }
277 } else {
278 tr = 0;
279 for (size_t l = 0; l < n1; l++) {
280 gsl_vector_view P_row = gsl_matrix_row(p->P, l);
281 gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l);
282 gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
283 tr += d;
284 }
285 }
286
287 gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
288 gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
289
290 if (p->noconstrain) {
291 d = (-0.5 * tr + 0.5 * d);
292 } else {
293 d = (-0.5 * tr + 0.5 * d) * exp(gsl_vector_get(log_sigma2, i));
294 }
295
296 gsl_vector_set(dev1, i, d);
297 }
298
299 return GSL_SUCCESS;
300 }
301
LogRL_dev2(const gsl_vector * log_sigma2,void * params,gsl_matrix * dev2)302 int LogRL_dev2(const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) {
303 VC_PARAM *p = (VC_PARAM *)params;
304
305 size_t n_vc = log_sigma2->size - 1;
306
307 double d, sigma2_i, sigma2_j;
308
309 // Update parameters.
310 UpdateParam(log_sigma2, p);
311
312 // Calculate dev2 = 0.5(yPKPKPy).
313 for (size_t i = 0; i < n_vc + 1; i++) {
314 gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
315 if (p->noconstrain) {
316 sigma2_i = gsl_vector_get(log_sigma2, i);
317 } else {
318 sigma2_i = exp(gsl_vector_get(log_sigma2, i));
319 }
320
321 for (size_t j = i; j < n_vc + 1; j++) {
322 gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j);
323
324 gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
325 if (p->noconstrain) {
326 sigma2_j = gsl_vector_get(log_sigma2, j);
327 d *= -0.5;
328 } else {
329 sigma2_j = exp(gsl_vector_get(log_sigma2, j));
330 d *= -0.5 * sigma2_i * sigma2_j;
331 }
332
333 gsl_matrix_set(dev2, i, j, d);
334 if (j != i) {
335 gsl_matrix_set(dev2, j, i, d);
336 }
337 }
338 }
339
340 gsl_matrix_memcpy(p->Hessian, dev2);
341 return GSL_SUCCESS;
342 }
343
LogRL_dev12(const gsl_vector * log_sigma2,void * params,gsl_vector * dev1,gsl_matrix * dev2)344 int LogRL_dev12(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1,
345 gsl_matrix *dev2) {
346 VC_PARAM *p = (VC_PARAM *)params;
347
348 size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1;
349
350 double tr, d, sigma2_i, sigma2_j;
351
352 // Update parameters.
353 UpdateParam(log_sigma2, p);
354
355 for (size_t i = 0; i < n_vc + 1; i++) {
356 if (i == n_vc) {
357 tr = 0;
358 for (size_t l = 0; l < n1; l++) {
359 tr += gsl_matrix_get(p->P, l, l);
360 }
361 } else {
362 tr = 0;
363 for (size_t l = 0; l < n1; l++) {
364 gsl_vector_view P_row = gsl_matrix_row(p->P, l);
365 gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l);
366 gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
367 tr += d;
368 }
369 }
370
371 gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
372 gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
373
374 if (p->noconstrain) {
375 sigma2_i = gsl_vector_get(log_sigma2, i);
376 d = (-0.5 * tr + 0.5 * d);
377 } else {
378 sigma2_i = exp(gsl_vector_get(log_sigma2, i));
379 d = (-0.5 * tr + 0.5 * d) * sigma2_i;
380 }
381
382 gsl_vector_set(dev1, i, d);
383
384 for (size_t j = i; j < n_vc + 1; j++) {
385 gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j);
386 gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
387
388 if (p->noconstrain) {
389 sigma2_j = gsl_vector_get(log_sigma2, j);
390 d *= -0.5;
391 } else {
392 sigma2_j = exp(gsl_vector_get(log_sigma2, j));
393 d *= -0.5 * sigma2_i * sigma2_j;
394 }
395
396 gsl_matrix_set(dev2, i, j, d);
397 if (j != i) {
398 gsl_matrix_set(dev2, j, i, d);
399 }
400 }
401 }
402
403 gsl_matrix_memcpy(p->Hessian, dev2);
404
405 return GSL_SUCCESS;
406 }
407
408 // Read header to determine which column contains which item.
ReadHeader_vc(const string & line,HEADER & header)409 bool ReadHeader_vc(const string &line, HEADER &header) {
410 debug_msg("entering");
411 string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps",
412 "SNPS", "snpid", "SNPID", "rsid", "RSID"};
413 set<string> rs_set(rs_ptr, rs_ptr + 10);
414 string chr_ptr[] = {"chr", "CHR"};
415 set<string> chr_set(chr_ptr, chr_ptr + 2);
416 string pos_ptr[] = {
417 "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"};
418 set<string> pos_set(pos_ptr, pos_ptr + 8);
419 string cm_ptr[] = {"cm", "CM"};
420 set<string> cm_set(cm_ptr, cm_ptr + 2);
421 string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1"};
422 set<string> a1_set(a1_ptr, a1_ptr + 4);
423 string a0_ptr[] = {"a0", "A0", "allele0", "ALLELE0"};
424 set<string> a0_set(a0_ptr, a0_ptr + 4);
425
426 string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"};
427 set<string> z_set(z_ptr, z_ptr + 6);
428 string beta_ptr[] = {"beta", "BETA", "b", "B"};
429 set<string> beta_set(beta_ptr, beta_ptr + 4);
430 string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"};
431 set<string> sebeta_set(sebeta_ptr, sebeta_ptr + 4);
432 string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"};
433 set<string> chisq_set(chisq_ptr, chisq_ptr + 4);
434 string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"};
435 set<string> p_set(p_ptr, p_ptr + 6);
436
437 string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"};
438 set<string> n_set(n_ptr, n_ptr + 6);
439 string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"};
440 set<string> nmis_set(nmis_ptr, nmis_ptr + 6);
441 string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"};
442 set<string> nobs_set(nobs_ptr, nobs_ptr + 4);
443
444 string af_ptr[] = {"af",
445 "AF",
446 "maf",
447 "MAF",
448 "f",
449 "F",
450 "allele_freq",
451 "ALLELE_FREQ",
452 "allele_frequency",
453 "ALLELE_FREQUENCY"};
454 set<string> af_set(af_ptr, af_ptr + 10);
455 string var_ptr[] = {"var", "VAR"};
456 set<string> var_set(var_ptr, var_ptr + 2);
457
458 string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"};
459 set<string> ws_set(ws_ptr, ws_ptr + 4);
460 string cor_ptr[] = {"cor", "COR", "r", "R"};
461 set<string> cor_set(cor_ptr, cor_ptr + 4);
462
463 header.rs_col = 0;
464 header.chr_col = 0;
465 header.pos_col = 0;
466 header.a1_col = 0;
467 header.a0_col = 0;
468 header.z_col = 0;
469 header.beta_col = 0;
470 header.sebeta_col = 0;
471 header.chisq_col = 0;
472 header.p_col = 0;
473 header.n_col = 0;
474 header.nmis_col = 0;
475 header.nobs_col = 0;
476 header.af_col = 0;
477 header.var_col = 0;
478 header.ws_col = 0;
479 header.cor_col = 0;
480 header.coln = 0;
481
482 char *ch_ptr;
483 string type;
484 size_t n_error = 0;
485
486 ch_ptr = strtok((char *)line.c_str(), " , \t");
487 while (ch_ptr != NULL) {
488 type = ch_ptr;
489 if (rs_set.count(type) != 0) {
490 if (header.rs_col == 0) {
491 header.rs_col = header.coln + 1;
492 } else {
493 cout << "error! more than two rs columns in the file." << endl;
494 n_error++;
495 }
496 } else if (chr_set.count(type) != 0) {
497 if (header.chr_col == 0) {
498 header.chr_col = header.coln + 1;
499 } else {
500 cout << "error! more than two chr columns in the file." << endl;
501 n_error++;
502 }
503 } else if (pos_set.count(type) != 0) {
504 if (header.pos_col == 0) {
505 header.pos_col = header.coln + 1;
506 } else {
507 cout << "error! more than two pos columns in the file." << endl;
508 n_error++;
509 }
510 } else if (cm_set.count(type) != 0) {
511 if (header.cm_col == 0) {
512 header.cm_col = header.coln + 1;
513 } else {
514 cout << "error! more than two cm columns in the file." << endl;
515 n_error++;
516 }
517 } else if (a1_set.count(type) != 0) {
518 if (header.a1_col == 0) {
519 header.a1_col = header.coln + 1;
520 } else {
521 cout << "error! more than two allele1 columns in the file." << endl;
522 n_error++;
523 }
524 } else if (a0_set.count(type) != 0) {
525 if (header.a0_col == 0) {
526 header.a0_col = header.coln + 1;
527 } else {
528 cout << "error! more than two allele0 columns in the file." << endl;
529 n_error++;
530 }
531 } else if (z_set.count(type) != 0) {
532 if (header.z_col == 0) {
533 header.z_col = header.coln + 1;
534 } else {
535 cout << "error! more than two z columns in the file." << endl;
536 n_error++;
537 }
538 } else if (beta_set.count(type) != 0) {
539 if (header.beta_col == 0) {
540 header.beta_col = header.coln + 1;
541 } else {
542 cout << "error! more than two beta columns in the file." << endl;
543 n_error++;
544 }
545 } else if (sebeta_set.count(type) != 0) {
546 if (header.sebeta_col == 0) {
547 header.sebeta_col = header.coln + 1;
548 } else {
549 cout << "error! more than two se_beta columns in the file." << endl;
550 n_error++;
551 }
552 } else if (chisq_set.count(type) != 0) {
553 if (header.chisq_col == 0) {
554 header.chisq_col = header.coln + 1;
555 } else {
556 cout << "error! more than two z columns in the file." << endl;
557 n_error++;
558 }
559 } else if (p_set.count(type) != 0) {
560 if (header.p_col == 0) {
561 header.p_col = header.coln + 1;
562 } else {
563 cout << "error! more than two p columns in the file." << endl;
564 n_error++;
565 }
566 } else if (n_set.count(type) != 0) {
567 if (header.n_col == 0) {
568 header.n_col = header.coln + 1;
569 } else {
570 cout << "error! more than two n_total columns in the file." << endl;
571 n_error++;
572 }
573 } else if (nmis_set.count(type) != 0) {
574 if (header.nmis_col == 0) {
575 header.nmis_col = header.coln + 1;
576 } else {
577 cout << "error! more than two n_mis columns in the file." << endl;
578 n_error++;
579 }
580 } else if (nobs_set.count(type) != 0) {
581 if (header.nobs_col == 0) {
582 header.nobs_col = header.coln + 1;
583 } else {
584 cout << "error! more than two n_obs columns in the file." << endl;
585 n_error++;
586 }
587 } else if (ws_set.count(type) != 0) {
588 if (header.ws_col == 0) {
589 header.ws_col = header.coln + 1;
590 } else {
591 cout << "error! more than two window_size columns in the file." << endl;
592 n_error++;
593 }
594 } else if (af_set.count(type) != 0) {
595 if (header.af_col == 0) {
596 header.af_col = header.coln + 1;
597 } else {
598 cout << "error! more than two af columns in the file." << endl;
599 n_error++;
600 }
601 } else if (cor_set.count(type) != 0) {
602 if (header.cor_col == 0) {
603 header.cor_col = header.coln + 1;
604 } else {
605 cout << "error! more than two cor columns in the file." << endl;
606 n_error++;
607 }
608 } else {
609 }
610
611 ch_ptr = strtok(NULL, " , \t");
612 header.coln++;
613 }
614
615 if (header.cor_col != 0 && header.cor_col != header.coln) {
616 cout << "error! the cor column should be the last column." << endl;
617 n_error++;
618 }
619
620 if (header.rs_col == 0) {
621 if (header.chr_col != 0 && header.pos_col != 0) {
622 cout << "missing an rs column. rs id will be replaced by chr:pos" << endl;
623 } else {
624 cout << "error! missing an rs column." << endl;
625 n_error++;
626 }
627 }
628
629 if (n_error == 0) {
630 return true;
631 } else {
632 return false;
633 }
634 }
635
636 // Read cov file the first time, record mapRS2in, mapRS2var (in case
637 // var is not provided in the z file), store vec_n and vec_rs.
ReadFile_cor(const string & file_cor,const set<string> & setSnps,vector<string> & vec_rs,vector<size_t> & vec_n,vector<double> & vec_cm,vector<double> & vec_bp,map<string,size_t> & mapRS2in,map<string,double> & mapRS2var)638 void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
639 vector<string> &vec_rs, vector<size_t> &vec_n,
640 vector<double> &vec_cm, vector<double> &vec_bp,
641 map<string, size_t> &mapRS2in,
642 map<string, double> &mapRS2var) {
643 debug_msg("entering");
644 vec_rs.clear();
645 vec_n.clear();
646 mapRS2in.clear();
647 mapRS2var.clear();
648
649 igzstream infile(file_cor.c_str(), igzstream::in);
650 if (!infile) {
651 cout << "error! fail to open cov file: " << file_cor << endl;
652 return;
653 }
654
655 string line;
656 char *ch_ptr;
657
658 string rs, chr, a1, a0, pos, cm;
659 double af = 0, var_x = 0, d_pos, d_cm;
660 size_t n_total = 0, n_mis = 0, n_obs = 0, ni_total = 0;
661 size_t ns_test = 0, ns_total = 0;
662
663 HEADER header;
664
665 // Header.
666 safeGetline(infile, line).eof();
667 ReadHeader_vc(line, header);
668
669 if (header.n_col == 0) {
670 if (header.nobs_col == 0 && header.nmis_col == 0) {
671 cout << "error! missing sample size in the cor file." << endl;
672 } else {
673 cout << "total sample size will be replaced by obs/mis sample size."
674 << endl;
675 }
676 }
677
678 while (!safeGetline(infile, line).eof()) {
679
680 // do not read cor values this time; upto col_n-1.
681 ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
682
683 n_total = 0;
684 n_mis = 0;
685 n_obs = 0;
686 af = 0;
687 var_x = 0;
688 d_cm = 0;
689 d_pos = 0;
690 for (size_t i = 0; i < header.coln - 1; i++) {
691 enforce(ch_ptr);
692 if (header.rs_col != 0 && header.rs_col == i + 1) {
693 rs = ch_ptr;
694 }
695 if (header.chr_col != 0 && header.chr_col == i + 1) {
696 chr = ch_ptr;
697 }
698 if (header.pos_col != 0 && header.pos_col == i + 1) {
699 pos = ch_ptr;
700 d_pos = atof(ch_ptr);
701 }
702 if (header.cm_col != 0 && header.cm_col == i + 1) {
703 cm = ch_ptr;
704 d_cm = atof(ch_ptr);
705 }
706 if (header.a1_col != 0 && header.a1_col == i + 1) {
707 a1 = ch_ptr;
708 }
709 if (header.a0_col != 0 && header.a0_col == i + 1) {
710 a0 = ch_ptr;
711 }
712
713 if (header.n_col != 0 && header.n_col == i + 1) {
714 n_total = atoi(ch_ptr);
715 }
716 if (header.nmis_col != 0 && header.nmis_col == i + 1) {
717 n_mis = atoi(ch_ptr);
718 }
719 if (header.nobs_col != 0 && header.nobs_col == i + 1) {
720 n_obs = atoi(ch_ptr);
721 }
722
723 if (header.af_col != 0 && header.af_col == i + 1) {
724 af = atof(ch_ptr);
725 }
726 if (header.var_col != 0 && header.var_col == i + 1) {
727 var_x = atof(ch_ptr);
728 }
729
730 ch_ptr = strtok(NULL, " , \t");
731 }
732
733 if (header.rs_col == 0) {
734 rs = chr + ":" + pos;
735 }
736
737 if (header.n_col == 0) {
738 n_total = n_mis + n_obs;
739 }
740
741 // Record rs, n.
742 vec_rs.push_back(rs);
743 vec_n.push_back(n_total);
744 if (d_cm > 0) {
745 vec_cm.push_back(d_cm);
746 } else {
747 vec_cm.push_back(d_cm);
748 }
749 if (d_pos > 0) {
750 vec_bp.push_back(d_pos);
751 } else {
752 vec_bp.push_back(d_pos);
753 }
754
755 // Record mapRS2in and mapRS2var.
756 if (setSnps.size() == 0 || setSnps.count(rs) != 0) {
757 if (mapRS2in.count(rs) == 0) {
758 mapRS2in[rs] = 1;
759
760 if (header.var_col != 0) {
761 mapRS2var[rs] = var_x;
762 } else if (header.af_col != 0) {
763 var_x = 2.0 * af * (1.0 - af);
764 mapRS2var[rs] = var_x;
765 } else {
766 }
767
768 ns_test++;
769
770 } else {
771 cout << "error! more than one snp has the same id " << rs
772 << " in cor file?" << endl;
773 }
774 }
775
776 // Record max pos.
777 ni_total = max(ni_total, n_total);
778 ns_total++;
779 }
780
781 infile.close();
782 infile.clear();
783
784 return;
785 }
786
787 // Read beta file, store mapRS2var if var is provided here, calculate
788 // q and var_y.
ReadFile_beta(const bool flag_priorscale,const string & file_beta,const map<string,size_t> & mapRS2cat,map<string,size_t> & mapRS2in,map<string,double> & mapRS2var,map<string,size_t> & mapRS2nsamp,gsl_vector * q_vec,gsl_vector * qvar_vec,gsl_vector * s_vec,size_t & ni_total,size_t & ns_total)789 void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
790 const map<string, size_t> &mapRS2cat,
791 map<string, size_t> &mapRS2in,
792 map<string, double> &mapRS2var,
793 map<string, size_t> &mapRS2nsamp, gsl_vector *q_vec,
794 gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total,
795 size_t &ns_total) {
796 debug_msg("entering");
797 mapRS2nsamp.clear();
798
799 igzstream infile(file_beta.c_str(), igzstream::in);
800 if (!infile) {
801 cout << "error! fail to open beta file: " << file_beta << endl;
802 return;
803 }
804
805 string line;
806 char *ch_ptr;
807 string type;
808
809 string rs, chr, a1, a0, pos, cm;
810 double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0,
811 af = 0, var_x = 0;
812 size_t n_total = 0, n_mis = 0, n_obs = 0;
813 size_t ns_test = 0;
814 ns_total = 0;
815 ni_total = 0;
816
817 vector<double> vec_q, vec_qvar, vec_s;
818 for (size_t i = 0; i < q_vec->size; i++) {
819 vec_q.push_back(0.0);
820 vec_qvar.push_back(0.0);
821 vec_s.push_back(0.0);
822 }
823
824 // Read header.
825 HEADER header;
826 safeGetline(infile, line).eof();
827 ReadHeader_vc(line, header);
828
829 if (header.n_col == 0) {
830 if (header.nobs_col == 0 && header.nmis_col == 0) {
831 cout << "error! missing sample size in the beta file." << endl;
832 } else {
833 cout << "total sample size will be replaced by obs/mis sample size."
834 << endl;
835 }
836 }
837
838 if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0) &&
839 header.chisq_col == 0 && header.p_col == 0) {
840 cout << "error! missing z scores in the beta file." << endl;
841 }
842
843 if (header.af_col == 0 && header.var_col == 0 && mapRS2var.size() == 0) {
844 cout << "error! missing allele frequency in the beta file." << endl;
845 }
846
847 while (!safeGetline(infile, line).eof()) {
848 ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
849
850 z = 0;
851 beta = 0;
852 se_beta = 0;
853 chisq = 0;
854 pvalue = 0;
855 n_total = 0;
856 n_mis = 0;
857 n_obs = 0;
858 af = 0;
859 var_x = 0;
860 for (size_t i = 0; i < header.coln; i++) {
861 enforce(ch_ptr);
862 if (header.rs_col != 0 && header.rs_col == i + 1) {
863 rs = ch_ptr;
864 }
865 if (header.chr_col != 0 && header.chr_col == i + 1) {
866 chr = ch_ptr;
867 }
868 if (header.pos_col != 0 && header.pos_col == i + 1) {
869 pos = ch_ptr;
870 }
871 if (header.cm_col != 0 && header.cm_col == i + 1) {
872 cm = ch_ptr;
873 }
874 if (header.a1_col != 0 && header.a1_col == i + 1) {
875 a1 = ch_ptr;
876 }
877 if (header.a0_col != 0 && header.a0_col == i + 1) {
878 a0 = ch_ptr;
879 }
880
881 if (header.z_col != 0 && header.z_col == i + 1) {
882 z = atof(ch_ptr);
883 }
884 if (header.beta_col != 0 && header.beta_col == i + 1) {
885 beta = atof(ch_ptr);
886 }
887 if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
888 se_beta = atof(ch_ptr);
889 }
890 if (header.chisq_col != 0 && header.chisq_col == i + 1) {
891 chisq = atof(ch_ptr);
892 }
893 if (header.p_col != 0 && header.p_col == i + 1) {
894 pvalue = atof(ch_ptr);
895 }
896
897 if (header.n_col != 0 && header.n_col == i + 1) {
898 n_total = atoi(ch_ptr);
899 }
900 if (header.nmis_col != 0 && header.nmis_col == i + 1) {
901 n_mis = atoi(ch_ptr);
902 }
903 if (header.nobs_col != 0 && header.nobs_col == i + 1) {
904 n_obs = atoi(ch_ptr);
905 }
906
907 if (header.af_col != 0 && header.af_col == i + 1) {
908 af = atof(ch_ptr);
909 }
910 if (header.var_col != 0 && header.var_col == i + 1) {
911 var_x = atof(ch_ptr);
912 }
913
914 ch_ptr = strtok(NULL, " , \t");
915 }
916
917 if (header.rs_col == 0) {
918 rs = chr + ":" + pos;
919 }
920
921 if (header.n_col == 0) {
922 n_total = n_mis + n_obs;
923 }
924
925 // Both z values and beta/se_beta have directions, while
926 // chisq/pvalue do not.
927 if (header.z_col != 0) {
928 zsquare = z * z;
929 } else if (header.beta_col != 0 && header.sebeta_col != 0) {
930 z = beta / se_beta;
931 zsquare = z * z;
932 } else if (header.chisq_col != 0) {
933 zsquare = chisq;
934 } else if (header.p_col != 0) {
935 zsquare = gsl_cdf_chisq_Qinv(pvalue, 1);
936 } else {
937 zsquare = 0;
938 }
939
940 // If the snp is also present in cor file, then do calculations.
941 if ((header.var_col != 0 || header.af_col != 0 ||
942 mapRS2var.count(rs) != 0) &&
943 mapRS2in.count(rs) != 0 &&
944 (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) {
945 if (mapRS2in.at(rs) > 1) {
946 cout << "error! more than one snp has the same id " << rs
947 << " in beta file?" << endl;
948 break;
949 }
950
951 if (header.var_col == 0) {
952 if (header.af_col != 0) {
953 var_x = 2.0 * af * (1.0 - af);
954 } else {
955 var_x = mapRS2var.at(rs);
956 }
957 }
958
959 if (flag_priorscale) {
960 var_x = 1;
961 }
962
963 mapRS2in[rs]++;
964 mapRS2var[rs] = var_x;
965 mapRS2nsamp[rs] = n_total;
966
967 if (mapRS2cat.size() != 0) {
968 vec_q[mapRS2cat.at(rs)] += (zsquare - 1.0) * var_x / (double)n_total;
969 vec_s[mapRS2cat.at(rs)] += var_x;
970 vec_qvar[mapRS2cat.at(rs)] +=
971 var_x * var_x / ((double)n_total * (double)n_total);
972 } else {
973 vec_q[0] += (zsquare - 1.0) * var_x / (double)n_total;
974 vec_s[0] += var_x;
975 vec_qvar[0] += var_x * var_x / ((double)n_total * (double)n_total);
976 }
977
978 ni_total = max(ni_total, n_total);
979 ns_test++;
980 }
981
982 ns_total++;
983 }
984
985 for (size_t i = 0; i < q_vec->size; i++) {
986 gsl_vector_set(q_vec, i, vec_q[i]);
987 gsl_vector_set(qvar_vec, i, 2.0 * vec_qvar[i]);
988 gsl_vector_set(s_vec, i, vec_s[i]);
989 }
990
991 infile.clear();
992 infile.close();
993
994 return;
995 }
996
997 // Read covariance file the second time.
998 // Look for rs, n_mis+n_obs, var, window_size, cov.
999 // If window_cm/bp/ns is provided, then use these max values to
1000 // calibrate estimates.
ReadFile_cor(const string & file_cor,const vector<string> & vec_rs,const vector<size_t> & vec_n,const vector<double> & vec_cm,const vector<double> & vec_bp,const map<string,size_t> & mapRS2cat,const map<string,size_t> & mapRS2in,const map<string,double> & mapRS2var,const map<string,size_t> & mapRS2nsamp,const size_t crt,const double & window_cm,const double & window_bp,const double & window_ns,gsl_matrix * S_mat,gsl_matrix * Svar_mat,gsl_vector * qvar_vec,size_t & ni_total,size_t & ns_total,size_t & ns_test,size_t & ns_pair)1001 void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
1002 const vector<size_t> &vec_n, const vector<double> &vec_cm,
1003 const vector<double> &vec_bp,
1004 const map<string, size_t> &mapRS2cat,
1005 const map<string, size_t> &mapRS2in,
1006 const map<string, double> &mapRS2var,
1007 const map<string, size_t> &mapRS2nsamp, const size_t crt,
1008 const double &window_cm, const double &window_bp,
1009 const double &window_ns, gsl_matrix *S_mat,
1010 gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total,
1011 size_t &ns_total, size_t &ns_test, size_t &ns_pair) {
1012 debug_msg("entering");
1013 igzstream infile(file_cor.c_str(), igzstream::in);
1014 if (!infile) {
1015 cout << "error! fail to open cov file: " << file_cor << endl;
1016 return;
1017 }
1018
1019 string line;
1020 char *ch_ptr;
1021
1022 string rs1, rs2;
1023 double d1, d2, d3, cor, var1, var2;
1024 size_t n_nb, nsamp1, nsamp2, n12, bin_size = 10, bin;
1025
1026 vector<vector<double>> mat_S, mat_Svar, mat_tmp;
1027 vector<double> vec_qvar, vec_tmp;
1028 vector<vector<vector<double>>> mat3d_Sbin;
1029
1030 for (size_t i = 0; i < S_mat->size1; i++) {
1031 vec_qvar.push_back(0.0);
1032 }
1033
1034 for (size_t i = 0; i < S_mat->size1; i++) {
1035 mat_S.push_back(vec_qvar);
1036 mat_Svar.push_back(vec_qvar);
1037 }
1038
1039 for (size_t k = 0; k < bin_size; k++) {
1040 vec_tmp.push_back(0.0);
1041 }
1042 for (size_t i = 0; i < S_mat->size1; i++) {
1043 mat_tmp.push_back(vec_tmp);
1044 }
1045 for (size_t i = 0; i < S_mat->size1; i++) {
1046 mat3d_Sbin.push_back(mat_tmp);
1047 }
1048
1049 string rs, chr, a1, a0, type, pos, cm;
1050 size_t n_total = 0, n_mis = 0, n_obs = 0;
1051 double d_pos1, d_pos2, d_pos, d_cm1, d_cm2, d_cm;
1052 ns_test = 0;
1053 ns_total = 0;
1054 ns_pair = 0;
1055 ni_total = 0;
1056
1057 // Header.
1058 HEADER header;
1059
1060 safeGetline(infile, line).eof();
1061 ReadHeader_vc(line, header);
1062
1063 while (!safeGetline(infile, line).eof()) {
1064
1065 // Do not read cor values this time; upto col_n-1.
1066 d_pos1 = 0;
1067 d_cm1 = 0;
1068 ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
1069 for (size_t i = 0; i < header.coln - 1; i++) {
1070 enforce(ch_ptr);
1071 if (header.rs_col != 0 && header.rs_col == i + 1) {
1072 rs = ch_ptr;
1073 }
1074 if (header.chr_col != 0 && header.chr_col == i + 1) {
1075 chr = ch_ptr;
1076 }
1077 if (header.pos_col != 0 && header.pos_col == i + 1) {
1078 pos = ch_ptr;
1079 d_pos1 = atof(ch_ptr);
1080 }
1081 if (header.cm_col != 0 && header.cm_col == i + 1) {
1082 cm = ch_ptr;
1083 d_cm1 = atof(ch_ptr);
1084 }
1085 if (header.a1_col != 0 && header.a1_col == i + 1) {
1086 a1 = ch_ptr;
1087 }
1088 if (header.a0_col != 0 && header.a0_col == i + 1) {
1089 a0 = ch_ptr;
1090 }
1091
1092 if (header.n_col != 0 && header.n_col == i + 1) {
1093 n_total = atoi(ch_ptr);
1094 }
1095 if (header.nmis_col != 0 && header.nmis_col == i + 1) {
1096 n_mis = atoi(ch_ptr);
1097 }
1098 if (header.nobs_col != 0 && header.nobs_col == i + 1) {
1099 n_obs = atoi(ch_ptr);
1100 }
1101
1102 ch_ptr = strtok(NULL, " , \t");
1103 }
1104
1105 if (header.rs_col == 0) {
1106 rs = chr + ":" + pos;
1107 }
1108
1109 if (header.n_col == 0) {
1110 n_total = n_mis + n_obs;
1111 }
1112
1113 rs1 = rs;
1114
1115 if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs1) != 0) &&
1116 mapRS2in.count(rs1) != 0 && mapRS2in.at(rs1) == 2) {
1117 var1 = mapRS2var.at(rs1);
1118 nsamp1 = mapRS2nsamp.at(rs1);
1119 d2 = var1 * var1;
1120
1121 if (mapRS2cat.size() != 0) {
1122 mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] +=
1123 (1 - 1.0 / (double)vec_n[ns_total]) * d2;
1124 mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] +=
1125 d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]);
1126 if (crt == 1) {
1127 mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)][0] +=
1128 (1 - 1.0 / (double)vec_n[ns_total]) * d2;
1129 }
1130 } else {
1131 mat_S[0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2;
1132 mat_Svar[0][0] +=
1133 d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]);
1134 if (crt == 1) {
1135 mat3d_Sbin[0][0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2;
1136 }
1137 }
1138
1139 n_nb = 0;
1140 while (ch_ptr != NULL) {
1141 type = ch_ptr;
1142 if (type.compare("NA") != 0 && type.compare("na") != 0 &&
1143 type.compare("nan") != 0 && type.compare("-nan") != 0) {
1144 cor = atof(ch_ptr);
1145 rs2 = vec_rs[ns_total + n_nb + 1];
1146 d_pos2 = vec_bp[ns_total + n_nb + 1];
1147 d_cm2 = vec_cm[ns_total + n_nb + 1];
1148 d_pos = abs(d_pos2 - d_pos1);
1149 d_cm = abs(d_cm2 - d_cm1);
1150
1151 if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs2) != 0) &&
1152 mapRS2in.count(rs2) != 0 && mapRS2in.at(rs2) == 2) {
1153 var2 = mapRS2var.at(rs2);
1154 nsamp2 = mapRS2nsamp.at(rs2);
1155 d1 = cor * cor -
1156 1.0 / (double)min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]);
1157 d2 = var1 * var2;
1158 d3 = cor * cor / ((double)nsamp1 * (double)nsamp2);
1159 n12 = min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]);
1160
1161 // Compute bin.
1162 if (crt == 1) {
1163 if (window_cm != 0 && d_cm1 != 0 && d_cm2 != 0) {
1164 bin =
1165 min((int)floor(d_cm / window_cm * bin_size), (int)bin_size);
1166 } else if (window_bp != 0 && d_pos1 != 0 && d_pos2 != 0) {
1167 bin = min((int)floor(d_pos / window_bp * bin_size),
1168 (int)bin_size);
1169 } else if (window_ns != 0) {
1170 bin = min((int)floor(((double)n_nb + 1) / window_ns * bin_size),
1171 (int)bin_size);
1172 }
1173 }
1174
1175 if (mapRS2cat.size() != 0) {
1176 if (mapRS2cat.at(rs1) == mapRS2cat.at(rs2)) {
1177 vec_qvar[mapRS2cat.at(rs1)] += 2 * d3 * d2;
1178 mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += 2 * d1 * d2;
1179 mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] +=
1180 2 * d2 * d2 / ((double)n12 * (double)n12);
1181 if (crt == 1) {
1182 mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] +=
1183 2 * d1 * d2;
1184 }
1185 } else {
1186 mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += d1 * d2;
1187 mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] +=
1188 d2 * d2 / ((double)n12 * (double)n12);
1189 if (crt == 1) {
1190 mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] +=
1191 d1 * d2;
1192 }
1193 }
1194 } else {
1195 vec_qvar[0] += 2 * d3 * d2;
1196 mat_S[0][0] += 2 * d1 * d2;
1197 mat_Svar[0][0] += 2 * d2 * d2 / ((double)n12 * (double)n12);
1198
1199 if (crt == 1) {
1200 mat3d_Sbin[0][0][bin] += 2 * d1 * d2;
1201 }
1202 }
1203 ns_pair++;
1204 }
1205 }
1206
1207 ch_ptr = strtok(NULL, " , \t");
1208 n_nb++;
1209 }
1210 ni_total = max(ni_total, n_total);
1211 ns_test++;
1212 }
1213
1214 ns_total++;
1215 }
1216
1217 // Use S_bin to fit a rational function y=1/(a+bx)^2, where
1218 // x=seq(0.5,bin_size-0.5,by=1) and then compute a correlation
1219 // factor as a percentage.
1220 double a, b, x, y, n, var_y, var_x, mean_y, mean_x, cov_xy, crt_factor;
1221 if (crt == 1) {
1222 for (size_t i = 0; i < S_mat->size1; i++) {
1223 for (size_t j = i; j < S_mat->size2; j++) {
1224
1225 // Correct mat_S.
1226 n = 0;
1227 var_y = 0;
1228 var_x = 0;
1229 mean_y = 0;
1230 mean_x = 0;
1231 cov_xy = 0;
1232 for (size_t k = 0; k < bin_size; k++) {
1233 if (j == i) {
1234 y = mat3d_Sbin[i][j][k];
1235 } else {
1236 y = mat3d_Sbin[i][j][k] + mat3d_Sbin[j][i][k];
1237 }
1238 x = k + 0.5;
1239 cout << y << ", ";
1240 if (y > 0) {
1241 y = 1 / sqrt(y);
1242 mean_x += x;
1243 mean_y += y;
1244 var_x += x * x;
1245 var_y += y * y;
1246 cov_xy += x * y;
1247 n++;
1248 }
1249 }
1250 cout << endl;
1251
1252 if (n >= 5) {
1253 mean_x /= n;
1254 mean_y /= n;
1255 var_x /= n;
1256 var_y /= n;
1257 cov_xy /= n;
1258 var_x -= mean_x * mean_x;
1259 var_y -= mean_y * mean_y;
1260 cov_xy -= mean_x * mean_y;
1261 b = cov_xy / var_x;
1262 a = mean_y - b * mean_x;
1263 crt_factor = a / (b * (bin_size + 0.5)) + 1;
1264 if (i == j) {
1265 mat_S[i][j] *= crt_factor;
1266 } else {
1267 mat_S[i][j] *= crt_factor;
1268 mat_S[j][i] *= crt_factor;
1269 }
1270 cout << crt_factor << endl;
1271
1272 // Correct qvar.
1273 if (i == j) {
1274 vec_qvar[i] *= crt_factor;
1275 }
1276 }
1277 }
1278 }
1279 }
1280
1281 // Save to gsl_vector and gsl_matrix: qvar_vec, S_mat, Svar_mat.
1282 for (size_t i = 0; i < S_mat->size1; i++) {
1283 d1 = gsl_vector_get(qvar_vec, i) + 2 * vec_qvar[i];
1284 gsl_vector_set(qvar_vec, i, d1);
1285 for (size_t j = 0; j < S_mat->size2; j++) {
1286 if (i == j) {
1287 gsl_matrix_set(S_mat, i, j, mat_S[i][i]);
1288 gsl_matrix_set(Svar_mat, i, j, 2.0 * mat_Svar[i][i] * ns_test *
1289 ns_test / (2.0 * ns_pair));
1290 } else {
1291 gsl_matrix_set(S_mat, i, j, mat_S[i][j] + mat_S[j][i]);
1292 gsl_matrix_set(Svar_mat, i, j, 2.0 * (mat_Svar[i][j] + mat_Svar[j][i]) *
1293 ns_test * ns_test / (2.0 * ns_pair));
1294 }
1295 }
1296 }
1297
1298 infile.clear();
1299 infile.close();
1300
1301 return;
1302 }
1303
1304 // Use the new method to calculate variance components with summary
1305 // statistics first, use a function CalcS to compute S matrix (where
1306 // the diagonal elements are part of V(q) ), and then use bootstrap to
1307 // compute the variance for S, use a set of genotypes, phenotypes, and
1308 // individual ids, and snp category label.
CalcVCss(const gsl_matrix * Vq,const gsl_matrix * S_mat,const gsl_matrix * Svar_mat,const gsl_vector * q_vec,const gsl_vector * s_vec,const double df,vector<double> & v_pve,vector<double> & v_se_pve,double & pve_total,double & se_pve_total,vector<double> & v_sigma2,vector<double> & v_se_sigma2,vector<double> & v_enrich,vector<double> & v_se_enrich)1309 void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
1310 const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
1311 const gsl_vector *s_vec, const double df, vector<double> &v_pve,
1312 vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
1313 vector<double> &v_sigma2, vector<double> &v_se_sigma2,
1314 vector<double> &v_enrich, vector<double> &v_se_enrich) {
1315 size_t n_vc = S_mat->size1;
1316
1317 gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
1318 gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
1319 gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
1320 gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc);
1321 gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc);
1322 gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
1323
1324 gsl_vector *pve = gsl_vector_alloc(n_vc);
1325 gsl_vector *pve_plus = gsl_vector_alloc(n_vc + 1);
1326 gsl_vector *tmp = gsl_vector_alloc(n_vc + 1);
1327 gsl_vector *sigma2persnp = gsl_vector_alloc(n_vc);
1328 gsl_vector *enrich = gsl_vector_alloc(n_vc);
1329 gsl_vector *se_pve = gsl_vector_alloc(n_vc);
1330 gsl_vector *se_sigma2persnp = gsl_vector_alloc(n_vc);
1331 gsl_vector *se_enrich = gsl_vector_alloc(n_vc);
1332
1333 double d;
1334
1335 // Calculate S^{-1}q.
1336 gsl_matrix_memcpy(tmp_mat, S_mat);
1337 int sig;
1338 gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
1339 LUDecomp(tmp_mat, pmt, &sig);
1340 LUInvert(tmp_mat, pmt, Si_mat);
1341
1342 // Calculate sigma2snp and pve.
1343 gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
1344 gsl_vector_memcpy(sigma2persnp, pve);
1345 gsl_vector_div(sigma2persnp, s_vec);
1346
1347 // Get qvar_mat.
1348 gsl_matrix_memcpy(qvar_mat, Vq);
1349 gsl_matrix_scale(qvar_mat, 1.0 / (df * df));
1350
1351 // Calculate variance for these estimates.
1352 for (size_t i = 0; i < n_vc; i++) {
1353 for (size_t j = i; j < n_vc; j++) {
1354 d = gsl_matrix_get(Svar_mat, i, j);
1355 d *= gsl_vector_get(pve, i) * gsl_vector_get(pve, j);
1356
1357 d += gsl_matrix_get(qvar_mat, i, j);
1358 gsl_matrix_set(Var_mat, i, j, d);
1359 if (i != j) {
1360 gsl_matrix_set(Var_mat, j, i, d);
1361 }
1362 }
1363 }
1364
1365 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0,
1366 tmp_mat);
1367 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
1368 Var_mat);
1369
1370 for (size_t i = 0; i < n_vc; i++) {
1371 d = sqrt(gsl_matrix_get(Var_mat, i, i));
1372 gsl_vector_set(se_pve, i, d);
1373 d /= gsl_vector_get(s_vec, i);
1374 gsl_vector_set(se_sigma2persnp, i, d);
1375 }
1376
1377 // Compute pve_total, se_pve_total.
1378 pve_total = 0;
1379 se_pve_total = 0;
1380 for (size_t i = 0; i < n_vc; i++) {
1381 pve_total += gsl_vector_get(pve, i);
1382
1383 for (size_t j = 0; j < n_vc; j++) {
1384 se_pve_total += gsl_matrix_get(Var_mat, i, j);
1385 }
1386 }
1387 se_pve_total = sqrt(se_pve_total);
1388
1389 // Compute enrichment and its variance.
1390 double s_pve = 0, s_snp = 0;
1391 for (size_t i = 0; i < n_vc; i++) {
1392 s_pve += gsl_vector_get(pve, i);
1393 s_snp += gsl_vector_get(s_vec, i);
1394 }
1395 gsl_vector_memcpy(enrich, sigma2persnp);
1396 gsl_vector_scale(enrich, s_snp / s_pve);
1397
1398 gsl_matrix_set_identity(tmp_mat);
1399
1400 double d1;
1401 for (size_t i = 0; i < n_vc; i++) {
1402 d = gsl_vector_get(pve, i) / s_pve;
1403 d1 = gsl_vector_get(s_vec, i);
1404 for (size_t j = 0; j < n_vc; j++) {
1405 if (i == j) {
1406 gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve);
1407 } else {
1408 gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve);
1409 }
1410 }
1411 }
1412 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0,
1413 tmp_mat1);
1414 gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0,
1415 VarEnrich_mat);
1416
1417 for (size_t i = 0; i < n_vc; i++) {
1418 d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
1419 gsl_vector_set(se_enrich, i, d);
1420 }
1421
1422 cout << "pve = ";
1423 for (size_t i = 0; i < n_vc; i++) {
1424 cout << gsl_vector_get(pve, i) << " ";
1425 }
1426 cout << endl;
1427
1428 cout << "se(pve) = ";
1429 for (size_t i = 0; i < n_vc; i++) {
1430 cout << gsl_vector_get(se_pve, i) << " ";
1431 }
1432 cout << endl;
1433
1434 cout << "sigma2 per snp = ";
1435 for (size_t i = 0; i < n_vc; i++) {
1436 cout << gsl_vector_get(sigma2persnp, i) << " ";
1437 }
1438 cout << endl;
1439
1440 cout << "se(sigma2 per snp) = ";
1441 for (size_t i = 0; i < n_vc; i++) {
1442 cout << gsl_vector_get(se_sigma2persnp, i) << " ";
1443 }
1444 cout << endl;
1445
1446 cout << "enrichment = ";
1447 for (size_t i = 0; i < n_vc; i++) {
1448 cout << gsl_vector_get(enrich, i) << " ";
1449 }
1450 cout << endl;
1451
1452 cout << "se(enrichment) = ";
1453 for (size_t i = 0; i < n_vc; i++) {
1454 cout << gsl_vector_get(se_enrich, i) << " ";
1455 }
1456 cout << endl;
1457
1458 // Save data.
1459 v_pve.clear();
1460 v_se_pve.clear();
1461 v_sigma2.clear();
1462 v_se_sigma2.clear();
1463 v_enrich.clear();
1464 v_se_enrich.clear();
1465 for (size_t i = 0; i < n_vc; i++) {
1466 d = gsl_vector_get(pve, i);
1467 v_pve.push_back(d);
1468 d = gsl_vector_get(se_pve, i);
1469 v_se_pve.push_back(d);
1470
1471 d = gsl_vector_get(sigma2persnp, i);
1472 v_sigma2.push_back(d);
1473 d = gsl_vector_get(se_sigma2persnp, i);
1474 v_se_sigma2.push_back(d);
1475
1476 d = gsl_vector_get(enrich, i);
1477 v_enrich.push_back(d);
1478 d = gsl_vector_get(se_enrich, i);
1479 v_se_enrich.push_back(d);
1480 }
1481
1482 // Delete matrices.
1483 gsl_matrix_free(Si_mat);
1484 gsl_matrix_free(Var_mat);
1485 gsl_matrix_free(VarEnrich_mat);
1486 gsl_matrix_free(tmp_mat);
1487 gsl_matrix_free(tmp_mat1);
1488 gsl_matrix_free(qvar_mat);
1489
1490 gsl_vector_free(pve);
1491 gsl_vector_free(pve_plus);
1492 gsl_vector_free(tmp);
1493 gsl_vector_free(sigma2persnp);
1494 gsl_vector_free(enrich);
1495 gsl_vector_free(se_pve);
1496 gsl_vector_free(se_sigma2persnp);
1497 gsl_vector_free(se_enrich);
1498
1499 return;
1500 }
1501
1502 // Ks are not scaled.
CalcVChe(const gsl_matrix * K,const gsl_matrix * W,const gsl_vector * y)1503 void VC::CalcVChe(const gsl_matrix *K, const gsl_matrix *W,
1504 const gsl_vector *y) {
1505 size_t n1 = K->size1, n2 = K->size2;
1506 size_t n_vc = n2 / n1;
1507
1508 double r = (double)n1 / (double)(n1 - W->size2);
1509 double var_y, var_y_new;
1510 double d, tr, s, v;
1511 vector<double> traceG_new;
1512
1513 // New matrices/vectors.
1514 gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
1515 gsl_vector *y_scale = gsl_vector_alloc(n1);
1516 gsl_matrix *Kry = gsl_matrix_alloc(n1, n_vc);
1517 gsl_matrix *yKrKKry = gsl_matrix_alloc(n_vc, n_vc * (n_vc + 1));
1518 gsl_vector *KKry = gsl_vector_alloc(n1);
1519
1520 // Old matrices/vectors.
1521 gsl_vector *pve = gsl_vector_alloc(n_vc);
1522 gsl_vector *se_pve = gsl_vector_alloc(n_vc);
1523 gsl_vector *q_vec = gsl_vector_alloc(n_vc);
1524 gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
1525 gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
1526 gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc);
1527 gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
1528 gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
1529
1530 // Center and scale K by W.
1531 for (size_t i = 0; i < n_vc; i++) {
1532 gsl_matrix_view Kscale_sub =
1533 gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
1534 gsl_matrix_const_view K_sub =
1535 gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1);
1536 gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix);
1537
1538 CenterMatrix(&Kscale_sub.matrix, W);
1539 d = ScaleMatrix(&Kscale_sub.matrix);
1540 traceG_new.push_back(d);
1541 }
1542
1543 // Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1).
1544 gsl_vector_memcpy(y_scale, y);
1545 CenterVector(y_scale, W);
1546
1547 var_y = VectorVar(y);
1548 var_y_new = VectorVar(y_scale);
1549
1550 StandardizeVector(y_scale);
1551
1552 // Compute Kry, which is used for confidence interval; also compute
1553 // q_vec (*n^2).
1554 for (size_t i = 0; i < n_vc; i++) {
1555 gsl_matrix_const_view Kscale_sub =
1556 gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
1557 gsl_vector_view Kry_col = gsl_matrix_column(Kry, i);
1558
1559 gsl_vector_memcpy(&Kry_col.vector, y_scale);
1560 gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0 * r,
1561 &Kry_col.vector);
1562
1563 gsl_blas_ddot(&Kry_col.vector, y_scale, &d);
1564 gsl_vector_set(q_vec, i, d);
1565 }
1566
1567 // Compute yKrKKry, which is used later for confidence interval.
1568 for (size_t i = 0; i < n_vc; i++) {
1569 gsl_vector_const_view Kry_coli = gsl_matrix_const_column(Kry, i);
1570 for (size_t j = i; j < n_vc; j++) {
1571 gsl_vector_const_view Kry_colj = gsl_matrix_const_column(Kry, j);
1572 for (size_t l = 0; l < n_vc; l++) {
1573 gsl_matrix_const_view Kscale_sub =
1574 gsl_matrix_const_submatrix(K_scale, 0, n1 * l, n1, n1);
1575 gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, &Kry_coli.vector,
1576 0.0, KKry);
1577 gsl_blas_ddot(&Kry_colj.vector, KKry, &d);
1578 gsl_matrix_set(yKrKKry, i, l * n_vc + j, d);
1579 if (i != j) {
1580 gsl_matrix_set(yKrKKry, j, l * n_vc + i, d);
1581 }
1582 }
1583 gsl_blas_ddot(&Kry_coli.vector, &Kry_colj.vector, &d);
1584 gsl_matrix_set(yKrKKry, i, n_vc * n_vc + j, d);
1585 if (i != j) {
1586 gsl_matrix_set(yKrKKry, j, n_vc * n_vc + i, d);
1587 }
1588 }
1589 }
1590
1591 // Compute Sij (*n^2).
1592 for (size_t i = 0; i < n_vc; i++) {
1593 for (size_t j = i; j < n_vc; j++) {
1594 tr = 0;
1595 for (size_t l = 0; l < n1; l++) {
1596 gsl_vector_const_view Ki_col =
1597 gsl_matrix_const_column(K_scale, i * n1 + l);
1598 gsl_vector_const_view Kj_col =
1599 gsl_matrix_const_column(K_scale, j * n1 + l);
1600 gsl_blas_ddot(&Ki_col.vector, &Kj_col.vector, &d);
1601 tr += d;
1602 }
1603
1604 tr = tr - r * (double)n1;
1605 gsl_matrix_set(S_mat, i, j, tr);
1606 if (i != j) {
1607 gsl_matrix_set(S_mat, j, i, tr);
1608 }
1609 }
1610 }
1611
1612 // Compute S^{-1}q.
1613 int sig;
1614 gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
1615 LUDecomp(S_mat, pmt, &sig);
1616 LUInvert(S_mat, pmt, Si_mat);
1617
1618 // Compute pve (on the transformed scale).
1619 gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
1620
1621 // Compute q_var (*n^4).
1622 gsl_matrix_set_zero(qvar_mat);
1623 s = 1;
1624 for (size_t i = 0; i < n_vc; i++) {
1625 d = gsl_vector_get(pve, i);
1626 gsl_matrix_view yKrKKry_sub =
1627 gsl_matrix_submatrix(yKrKKry, 0, i * n_vc, n_vc, n_vc);
1628 gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix);
1629 gsl_matrix_scale(tmp_mat, d);
1630 gsl_matrix_add(qvar_mat, tmp_mat);
1631 s -= d;
1632 }
1633 gsl_matrix_view yKrKKry_sub =
1634 gsl_matrix_submatrix(yKrKKry, 0, n_vc * n_vc, n_vc, n_vc);
1635 gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix);
1636 gsl_matrix_scale(tmp_mat, s);
1637 gsl_matrix_add(qvar_mat, tmp_mat);
1638
1639 gsl_matrix_scale(qvar_mat, 2.0);
1640
1641 // Compute S^{-1}var_qS^{-1}.
1642 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat, 0.0,
1643 tmp_mat);
1644 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
1645 Var_mat);
1646
1647 // Transform pve back to the original scale and save data.
1648 v_pve.clear();
1649 v_se_pve.clear();
1650 v_sigma2.clear();
1651 v_se_sigma2.clear();
1652
1653 s = 1.0, v = 0, pve_total = 0, se_pve_total = 0;
1654 for (size_t i = 0; i < n_vc; i++) {
1655 d = gsl_vector_get(pve, i);
1656 v_sigma2.push_back(d * var_y_new / traceG_new[i]);
1657 v_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y));
1658 s -= d;
1659 pve_total += d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y);
1660
1661 d = sqrt(gsl_matrix_get(Var_mat, i, i));
1662 v_se_sigma2.push_back(d * var_y_new / traceG_new[i]);
1663 v_se_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y));
1664
1665 for (size_t j = 0; j < n_vc; j++) {
1666 v += gsl_matrix_get(Var_mat, i, j);
1667 se_pve_total += gsl_matrix_get(Var_mat, i, j) *
1668 (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y) *
1669 (var_y_new / traceG_new[j]) * (v_traceG[j] / var_y);
1670 }
1671 }
1672 v_sigma2.push_back(s * r * var_y_new);
1673 v_se_sigma2.push_back(sqrt(v) * r * var_y_new);
1674 se_pve_total = sqrt(se_pve_total);
1675
1676 cout << "sigma2 = ";
1677 for (size_t i = 0; i < n_vc + 1; i++) {
1678 cout << v_sigma2[i] << " ";
1679 }
1680 cout << endl;
1681
1682 cout << "se(sigma2) = ";
1683 for (size_t i = 0; i < n_vc + 1; i++) {
1684 cout << v_se_sigma2[i] << " ";
1685 }
1686 cout << endl;
1687
1688 cout << "pve = ";
1689 for (size_t i = 0; i < n_vc; i++) {
1690 cout << v_pve[i] << " ";
1691 }
1692 cout << endl;
1693
1694 cout << "se(pve) = ";
1695 for (size_t i = 0; i < n_vc; i++) {
1696 cout << v_se_pve[i] << " ";
1697 }
1698 cout << endl;
1699
1700 if (n_vc > 1) {
1701 cout << "total pve = " << pve_total << endl;
1702 cout << "se(total pve) = " << se_pve_total << endl;
1703 }
1704
1705 gsl_permutation_free(pmt);
1706 gsl_matrix_free(K_scale);
1707 gsl_vector_free(y_scale);
1708 gsl_matrix_free(Kry);
1709 gsl_matrix_free(yKrKKry);
1710 gsl_vector_free(KKry);
1711
1712 // Old matrices/vectors.
1713 gsl_vector_free(pve);
1714 gsl_vector_free(se_pve);
1715 gsl_vector_free(q_vec);
1716 gsl_matrix_free(qvar_mat);
1717 gsl_matrix_free(tmp_mat);
1718 gsl_matrix_free(S_mat);
1719 gsl_matrix_free(Si_mat);
1720 gsl_matrix_free(Var_mat);
1721
1722 return;
1723 }
1724
1725 // REML for log(sigma2) based on the AI algorithm.
CalcVCreml(bool noconstrain,const gsl_matrix * K,const gsl_matrix * W,const gsl_vector * y)1726 void VC::CalcVCreml(bool noconstrain, const gsl_matrix *K, const gsl_matrix *W,
1727 const gsl_vector *y) {
1728 size_t n1 = K->size1, n2 = K->size2;
1729 size_t n_vc = n2 / n1;
1730 gsl_vector *log_sigma2 = gsl_vector_alloc(n_vc + 1);
1731 double d, s;
1732
1733 // Set up params.
1734 gsl_matrix *P = gsl_matrix_alloc(n1, n1);
1735 gsl_vector *Py = gsl_vector_alloc(n1);
1736 gsl_matrix *KPy_mat = gsl_matrix_alloc(n1, n_vc + 1);
1737 gsl_matrix *PKPy_mat = gsl_matrix_alloc(n1, n_vc + 1);
1738 gsl_vector *dev1 = gsl_vector_alloc(n_vc + 1);
1739 gsl_matrix *dev2 = gsl_matrix_alloc(n_vc + 1, n_vc + 1);
1740 gsl_matrix *Hessian = gsl_matrix_alloc(n_vc + 1, n_vc + 1);
1741 VC_PARAM params = {K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain};
1742
1743 // Initialize sigma2/log_sigma2.
1744 CalcVChe(K, W, y);
1745
1746 gsl_blas_ddot(y, y, &s);
1747 s /= (double)n1;
1748 for (size_t i = 0; i < n_vc + 1; i++) {
1749 if (noconstrain) {
1750 d = v_sigma2[i];
1751 } else {
1752 if (v_sigma2[i] <= 0) {
1753 d = log(0.1);
1754 } else {
1755 d = log(v_sigma2[i]);
1756 }
1757 }
1758 gsl_vector_set(log_sigma2, i, d);
1759 }
1760
1761 cout << "iteration " << 0 << endl;
1762 cout << "sigma2 = ";
1763 for (size_t i = 0; i < n_vc + 1; i++) {
1764 if (noconstrain) {
1765 cout << gsl_vector_get(log_sigma2, i) << " ";
1766 } else {
1767 cout << exp(gsl_vector_get(log_sigma2, i)) << " ";
1768 }
1769 }
1770 cout << endl;
1771
1772 // Set up fdf.
1773 gsl_multiroot_function_fdf FDF;
1774 FDF.n = n_vc + 1;
1775 FDF.params = ¶ms;
1776 FDF.f = &LogRL_dev1;
1777 FDF.df = &LogRL_dev2;
1778 FDF.fdf = &LogRL_dev12;
1779
1780 // Set up solver.
1781 int status;
1782 int iter = 0, max_iter = 100;
1783
1784 const gsl_multiroot_fdfsolver_type *T_fdf;
1785 gsl_multiroot_fdfsolver *s_fdf;
1786 T_fdf = gsl_multiroot_fdfsolver_hybridsj;
1787 s_fdf = gsl_multiroot_fdfsolver_alloc(T_fdf, n_vc + 1);
1788
1789 gsl_multiroot_fdfsolver_set(s_fdf, &FDF, log_sigma2);
1790
1791 do {
1792 iter++;
1793 status = gsl_multiroot_fdfsolver_iterate(s_fdf);
1794
1795 if (status)
1796 break;
1797
1798 cout << "iteration " << iter << endl;
1799 cout << "sigma2 = ";
1800 for (size_t i = 0; i < n_vc + 1; i++) {
1801 if (noconstrain) {
1802 cout << gsl_vector_get(s_fdf->x, i) << " ";
1803 } else {
1804 cout << exp(gsl_vector_get(s_fdf->x, i)) << " ";
1805 }
1806 }
1807 cout << endl;
1808 status = gsl_multiroot_test_residual(s_fdf->f, 1e-3);
1809 } while (status == GSL_CONTINUE && iter < max_iter);
1810
1811 // Obtain Hessian and Hessian inverse.
1812 int sig = LogRL_dev12(s_fdf->x, ¶ms, dev1, dev2);
1813
1814 gsl_permutation *pmt = gsl_permutation_alloc(n_vc + 1);
1815 LUDecomp(dev2, pmt, &sig);
1816 LUInvert(dev2, pmt, Hessian);
1817 gsl_permutation_free(pmt);
1818
1819 // Save sigma2 and se_sigma2.
1820 v_sigma2.clear();
1821 v_se_sigma2.clear();
1822 for (size_t i = 0; i < n_vc + 1; i++) {
1823 if (noconstrain) {
1824 d = gsl_vector_get(s_fdf->x, i);
1825 } else {
1826 d = exp(gsl_vector_get(s_fdf->x, i));
1827 }
1828 v_sigma2.push_back(d);
1829
1830 if (noconstrain) {
1831 d = -1.0 * gsl_matrix_get(Hessian, i, i);
1832 } else {
1833 d = -1.0 * d * d * gsl_matrix_get(Hessian, i, i);
1834 }
1835 v_se_sigma2.push_back(sqrt(d));
1836 }
1837
1838 s = 0;
1839 for (size_t i = 0; i < n_vc; i++) {
1840 s += v_traceG[i] * v_sigma2[i];
1841 }
1842 s += v_sigma2[n_vc];
1843
1844 // Compute pve.
1845 v_pve.clear();
1846 pve_total = 0;
1847 for (size_t i = 0; i < n_vc; i++) {
1848 d = v_traceG[i] * v_sigma2[i] / s;
1849 v_pve.push_back(d);
1850 pve_total += d;
1851 }
1852
1853 // Compute se_pve; k=n_vc+1: total.
1854 double d1, d2;
1855 v_se_pve.clear();
1856 se_pve_total = 0;
1857 for (size_t k = 0; k < n_vc + 1; k++) {
1858 d = 0;
1859 for (size_t i = 0; i < n_vc + 1; i++) {
1860 if (noconstrain) {
1861 d1 = gsl_vector_get(s_fdf->x, i);
1862 d1 = 1;
1863 } else {
1864 d1 = exp(gsl_vector_get(s_fdf->x, i));
1865 }
1866
1867 if (k < n_vc) {
1868 if (i == k) {
1869 d1 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s);
1870 } else if (i == n_vc) {
1871 d1 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s);
1872 } else {
1873 d1 *= -1 * v_traceG[i] * v_traceG[k] * v_sigma2[k] / (s * s);
1874 }
1875 } else {
1876 if (i == k) {
1877 d1 *= -1 * (s - v_sigma2[n_vc]) / (s * s);
1878 } else {
1879 d1 *= v_traceG[i] * v_sigma2[n_vc] / (s * s);
1880 }
1881 }
1882
1883 for (size_t j = 0; j < n_vc + 1; j++) {
1884 if (noconstrain) {
1885 d2 = gsl_vector_get(s_fdf->x, j);
1886 d2 = 1;
1887 } else {
1888 d2 = exp(gsl_vector_get(s_fdf->x, j));
1889 }
1890
1891 if (k < n_vc) {
1892 if (j == k) {
1893 d2 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s);
1894 } else if (j == n_vc) {
1895 d2 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s);
1896 } else {
1897 d2 *= -1 * v_traceG[j] * v_traceG[k] * v_sigma2[k] / (s * s);
1898 }
1899 } else {
1900 if (j == k) {
1901 d2 *= -1 * (s - v_sigma2[n_vc]) / (s * s);
1902 } else {
1903 d2 *= v_traceG[j] * v_sigma2[n_vc] / (s * s);
1904 }
1905 }
1906
1907 d += -1.0 * d1 * d2 * gsl_matrix_get(Hessian, i, j);
1908 }
1909 }
1910
1911 if (k < n_vc) {
1912 v_se_pve.push_back(sqrt(d));
1913 } else {
1914 se_pve_total = sqrt(d);
1915 }
1916 }
1917
1918 gsl_multiroot_fdfsolver_free(s_fdf);
1919
1920 gsl_vector_free(log_sigma2);
1921 gsl_matrix_free(P);
1922 gsl_vector_free(Py);
1923 gsl_matrix_free(KPy_mat);
1924 gsl_matrix_free(PKPy_mat);
1925 gsl_vector_free(dev1);
1926 gsl_matrix_free(dev2);
1927 gsl_matrix_free(Hessian);
1928
1929 return;
1930 }
1931
1932 // Ks are not scaled.
CalcVCacl(const gsl_matrix * K,const gsl_matrix * W,const gsl_vector * y)1933 void VC::CalcVCacl(const gsl_matrix *K, const gsl_matrix *W,
1934 const gsl_vector *y) {
1935 size_t n1 = K->size1, n2 = K->size2;
1936 size_t n_vc = n2 / n1;
1937
1938 double d, y2_sum;
1939
1940 // New matrices/vectors.
1941 gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
1942 gsl_vector *y_scale = gsl_vector_alloc(n1);
1943 gsl_vector *y2 = gsl_vector_alloc(n1);
1944 gsl_vector *n1_vec = gsl_vector_alloc(n1);
1945 gsl_matrix *Ay = gsl_matrix_alloc(n1, n_vc);
1946 gsl_matrix *K2 = gsl_matrix_alloc(n1, n_vc * n_vc);
1947 gsl_matrix *K_tmp = gsl_matrix_alloc(n1, n1);
1948 gsl_matrix *V_mat = gsl_matrix_alloc(n1, n1);
1949
1950 // Old matrices/vectors.
1951 gsl_vector *pve = gsl_vector_alloc(n_vc);
1952 gsl_vector *se_pve = gsl_vector_alloc(n_vc);
1953 gsl_vector *q_vec = gsl_vector_alloc(n_vc);
1954 gsl_matrix *S1 = gsl_matrix_alloc(n_vc, n_vc);
1955 gsl_matrix *S2 = gsl_matrix_alloc(n_vc, n_vc);
1956 gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc);
1957 gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
1958 gsl_matrix *J_mat = gsl_matrix_alloc(n_vc, n_vc);
1959 gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
1960
1961 int sig;
1962 gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
1963
1964 // Center and scale K by W, and standardize K further so that all
1965 // diagonal elements are 1
1966 for (size_t i = 0; i < n_vc; i++) {
1967 gsl_matrix_view Kscale_sub =
1968 gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
1969 gsl_matrix_const_view K_sub =
1970 gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1);
1971 gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix);
1972
1973 CenterMatrix(&Kscale_sub.matrix, W);
1974 StandardizeMatrix(&Kscale_sub.matrix);
1975 }
1976
1977 // Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1)
1978 gsl_vector_memcpy(y_scale, y);
1979 CenterVector(y_scale, W);
1980
1981 // Compute y^2 and sum(y^2), which is also the variance of y*n1.
1982 gsl_vector_memcpy(y2, y_scale);
1983 gsl_vector_mul(y2, y_scale);
1984
1985 y2_sum = 0;
1986 for (size_t i = 0; i < y2->size; i++) {
1987 y2_sum += gsl_vector_get(y2, i);
1988 }
1989
1990 // Compute the n_vc size q vector.
1991 for (size_t i = 0; i < n_vc; i++) {
1992 gsl_matrix_const_view Kscale_sub =
1993 gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
1994
1995 gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, n1_vec);
1996
1997 gsl_blas_ddot(n1_vec, y_scale, &d);
1998 gsl_vector_set(q_vec, i, d - y2_sum);
1999 }
2000
2001 // Compute the n_vc by n_vc S1 and S2 matrix (and eventually
2002 // S=S1-\tau^{-1}S2).
2003 for (size_t i = 0; i < n_vc; i++) {
2004 gsl_matrix_const_view Kscale_sub1 =
2005 gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
2006
2007 for (size_t j = i; j < n_vc; j++) {
2008 gsl_matrix_const_view Kscale_sub2 =
2009 gsl_matrix_const_submatrix(K_scale, 0, n1 * j, n1, n1);
2010
2011 gsl_matrix_memcpy(K_tmp, &Kscale_sub1.matrix);
2012 gsl_matrix_mul_elements(K_tmp, &Kscale_sub2.matrix);
2013
2014 gsl_vector_set_zero(n1_vec);
2015 for (size_t t = 0; t < K_tmp->size1; t++) {
2016 gsl_vector_view Ktmp_col = gsl_matrix_column(K_tmp, t);
2017 gsl_vector_add(n1_vec, &Ktmp_col.vector);
2018 }
2019 gsl_vector_add_constant(n1_vec, -1.0);
2020
2021 // Compute S1.
2022 gsl_blas_ddot(n1_vec, y2, &d);
2023 gsl_matrix_set(S1, i, j, 2 * d);
2024 if (i != j) {
2025 gsl_matrix_set(S1, j, i, 2 * d);
2026 }
2027
2028 // Compute S2.
2029 d = 0;
2030 for (size_t t = 0; t < n1_vec->size; t++) {
2031 d += gsl_vector_get(n1_vec, t);
2032 }
2033 gsl_matrix_set(S2, i, j, d);
2034 if (i != j) {
2035 gsl_matrix_set(S2, j, i, d);
2036 }
2037
2038 // Save information to compute J.
2039 gsl_vector_view K2col1 = gsl_matrix_column(K2, n_vc * i + j);
2040 gsl_vector_view K2col2 = gsl_matrix_column(K2, n_vc * j + i);
2041
2042 gsl_vector_memcpy(&K2col1.vector, n1_vec);
2043 if (i != j) {
2044 gsl_vector_memcpy(&K2col2.vector, n1_vec);
2045 }
2046 }
2047 }
2048
2049 // Iterate to solve tau and h's.
2050 size_t it = 0;
2051 double s = 1;
2052 double tau_inv = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1));
2053 while (abs(s) > 1e-3 && it < 100) {
2054
2055 // Update tau_inv.
2056 gsl_blas_ddot(q_vec, pve, &d);
2057 if (it > 0) {
2058 s = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1)) - tau_inv;
2059 }
2060 tau_inv = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1));
2061 if (it > 0) {
2062 s /= tau_inv;
2063 }
2064
2065 // Update S.
2066 gsl_matrix_memcpy(S_mat, S2);
2067 gsl_matrix_scale(S_mat, -1 * tau_inv);
2068 gsl_matrix_add(S_mat, S1);
2069
2070 // Update h=S^{-1}q.
2071 int sig;
2072 gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
2073 LUDecomp(S_mat, pmt, &sig);
2074 LUInvert(S_mat, pmt, Si_mat);
2075 gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
2076
2077 it++;
2078 }
2079
2080 // Compute V matrix and A matrix (K_scale is destroyed, so need to
2081 // compute V first).
2082 gsl_matrix_set_zero(V_mat);
2083 for (size_t i = 0; i < n_vc; i++) {
2084 gsl_matrix_view Kscale_sub =
2085 gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
2086
2087 // Compute V.
2088 gsl_matrix_memcpy(K_tmp, &Kscale_sub.matrix);
2089 gsl_matrix_scale(K_tmp, gsl_vector_get(pve, i));
2090 gsl_matrix_add(V_mat, K_tmp);
2091
2092 // Compute A; the corresponding Kscale is destroyed.
2093 gsl_matrix_const_view K2_sub =
2094 gsl_matrix_const_submatrix(K2, 0, n_vc * i, n1, n_vc);
2095 gsl_blas_dgemv(CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec);
2096
2097 for (size_t t = 0; t < n1; t++) {
2098 gsl_matrix_set(K_scale, t, n1 * i + t, gsl_vector_get(n1_vec, t));
2099 }
2100
2101 // Compute Ay.
2102 gsl_vector_view Ay_col = gsl_matrix_column(Ay, i);
2103 gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0,
2104 &Ay_col.vector);
2105 }
2106 gsl_matrix_scale(V_mat, tau_inv);
2107
2108 // Compute J matrix.
2109 for (size_t i = 0; i < n_vc; i++) {
2110 gsl_vector_view Ay_col1 = gsl_matrix_column(Ay, i);
2111 gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, &Ay_col1.vector, 0.0, n1_vec);
2112
2113 for (size_t j = i; j < n_vc; j++) {
2114 gsl_vector_view Ay_col2 = gsl_matrix_column(Ay, j);
2115
2116 gsl_blas_ddot(&Ay_col2.vector, n1_vec, &d);
2117 gsl_matrix_set(J_mat, i, j, 2.0 * d);
2118 if (i != j) {
2119 gsl_matrix_set(J_mat, j, i, 2.0 * d);
2120 }
2121 }
2122 }
2123
2124 // Compute H^{-1}JH^{-1} as V(\hat h), where H=S2*tau_inv; this is
2125 // stored in Var_mat.
2126 gsl_matrix_memcpy(S_mat, S2);
2127 gsl_matrix_scale(S_mat, tau_inv);
2128
2129 LUDecomp(S_mat, pmt, &sig);
2130 LUInvert(S_mat, pmt, Si_mat);
2131
2132 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, J_mat, 0.0, S_mat);
2133 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, S_mat, Si_mat, 0.0, Var_mat);
2134
2135 // Compute variance for tau_inv.
2136 gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, y_scale, 0.0, n1_vec);
2137 gsl_blas_ddot(y_scale, n1_vec, &d);
2138 // auto se_tau_inv = sqrt(2 * d) / (double)n1; UNUSED
2139
2140 // Transform pve back to the original scale and save data.
2141 v_pve.clear();
2142 v_se_pve.clear();
2143 v_sigma2.clear();
2144 v_se_sigma2.clear();
2145
2146 pve_total = 0, se_pve_total = 0;
2147 for (size_t i = 0; i < n_vc; i++) {
2148 d = gsl_vector_get(pve, i);
2149 pve_total += d;
2150
2151 v_pve.push_back(d);
2152 v_sigma2.push_back(d * tau_inv / v_traceG[i]);
2153
2154 d = sqrt(gsl_matrix_get(Var_mat, i, i));
2155 v_se_pve.push_back(d);
2156 v_se_sigma2.push_back(d * tau_inv / v_traceG[i]);
2157
2158 for (size_t j = 0; j < n_vc; j++) {
2159 se_pve_total += gsl_matrix_get(Var_mat, i, j);
2160 }
2161 }
2162 v_sigma2.push_back((1 - pve_total) * tau_inv);
2163 v_se_sigma2.push_back(sqrt(se_pve_total) * tau_inv);
2164 se_pve_total = sqrt(se_pve_total);
2165
2166 cout << "sigma2 = ";
2167 for (size_t i = 0; i < n_vc + 1; i++) {
2168 cout << v_sigma2[i] << " ";
2169 }
2170 cout << endl;
2171
2172 cout << "se(sigma2) = ";
2173 for (size_t i = 0; i < n_vc + 1; i++) {
2174 cout << v_se_sigma2[i] << " ";
2175 }
2176 cout << endl;
2177
2178 cout << "pve = ";
2179 for (size_t i = 0; i < n_vc; i++) {
2180 cout << v_pve[i] << " ";
2181 }
2182 cout << endl;
2183
2184 cout << "se(pve) = ";
2185 for (size_t i = 0; i < n_vc; i++) {
2186 cout << v_se_pve[i] << " ";
2187 }
2188 cout << endl;
2189
2190 if (n_vc > 1) {
2191 cout << "total pve = " << pve_total << endl;
2192 cout << "se(total pve) = " << se_pve_total << endl;
2193 }
2194
2195 gsl_permutation_free(pmt);
2196
2197 gsl_matrix_free(K_scale);
2198 gsl_vector_free(y_scale);
2199 gsl_vector_free(y2);
2200 gsl_vector_free(n1_vec);
2201 gsl_matrix_free(Ay);
2202 gsl_matrix_free(K2);
2203 gsl_matrix_free(K_tmp);
2204 gsl_matrix_free(V_mat);
2205
2206 gsl_vector_free(pve);
2207 gsl_vector_free(se_pve);
2208 gsl_vector_free(q_vec);
2209 gsl_matrix_free(S1);
2210 gsl_matrix_free(S2);
2211 gsl_matrix_free(S_mat);
2212 gsl_matrix_free(Si_mat);
2213 gsl_matrix_free(J_mat);
2214 gsl_matrix_free(Var_mat);
2215
2216 return;
2217 }
2218
2219 // Read bimbam mean genotype file and compute XWz.
BimbamXwz(const string & file_geno,const int display_pace,vector<int> & indicator_idv,vector<int> & indicator_snp,const vector<size_t> & vec_cat,const gsl_vector * w,const gsl_vector * z,size_t ns_test,gsl_matrix * XWz)2220 bool BimbamXwz(const string &file_geno, const int display_pace,
2221 vector<int> &indicator_idv, vector<int> &indicator_snp,
2222 const vector<size_t> &vec_cat, const gsl_vector *w,
2223 const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
2224 debug_msg("entering");
2225 igzstream infile(file_geno.c_str(), igzstream::in);
2226 if (!infile) {
2227 cout << "error reading genotype file:" << file_geno << endl;
2228 return false;
2229 }
2230
2231 string line;
2232 char *ch_ptr;
2233
2234 size_t n_miss;
2235 double d, geno_mean, geno_var;
2236
2237 size_t ni_test = XWz->size1;
2238 gsl_vector *geno = gsl_vector_alloc(ni_test);
2239 gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
2240 gsl_vector *wz = gsl_vector_alloc(w->size);
2241 gsl_vector_memcpy(wz, z);
2242 gsl_vector_mul(wz, w);
2243
2244 for (size_t t = 0; t < indicator_snp.size(); ++t) {
2245 safeGetline(infile, line).eof();
2246 if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
2247 ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
2248 }
2249 if (indicator_snp[t] == 0) {
2250 continue;
2251 }
2252
2253 ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
2254 ch_ptr = strtok_safe(NULL, " , \t");
2255 ch_ptr = strtok_safe(NULL, " , \t");
2256
2257 geno_mean = 0.0;
2258 n_miss = 0;
2259 geno_var = 0.0;
2260 gsl_vector_set_all(geno_miss, 0);
2261
2262 size_t j = 0;
2263 for (size_t i = 0; i < indicator_idv.size(); ++i) {
2264 if (indicator_idv[i] == 0) {
2265 continue;
2266 }
2267 ch_ptr = strtok_safe(NULL, " , \t");
2268 if (strcmp(ch_ptr, "NA") == 0) {
2269 gsl_vector_set(geno_miss, i, 0);
2270 n_miss++;
2271 } else {
2272 d = atof(ch_ptr);
2273 gsl_vector_set(geno, j, d);
2274 gsl_vector_set(geno_miss, j, 1);
2275 geno_mean += d;
2276 geno_var += d * d;
2277 }
2278 j++;
2279 }
2280
2281 geno_mean /= (double)(ni_test - n_miss);
2282 geno_var += geno_mean * geno_mean * (double)n_miss;
2283 geno_var /= (double)ni_test;
2284 geno_var -= geno_mean * geno_mean;
2285
2286 for (size_t i = 0; i < ni_test; ++i) {
2287 if (gsl_vector_get(geno_miss, i) == 0) {
2288 gsl_vector_set(geno, i, geno_mean);
2289 }
2290 }
2291
2292 gsl_vector_add_constant(geno, -1.0 * geno_mean);
2293
2294 gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]);
2295 d = gsl_vector_get(wz, ns_test);
2296 gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector);
2297
2298 ns_test++;
2299 }
2300
2301 cout << endl;
2302
2303 gsl_vector_free(geno);
2304 gsl_vector_free(geno_miss);
2305 gsl_vector_free(wz);
2306
2307 infile.close();
2308 infile.clear();
2309
2310 return true;
2311 }
2312
2313 // Read PLINK bed file and compute XWz.
PlinkXwz(const string & file_bed,const int display_pace,vector<int> & indicator_idv,vector<int> & indicator_snp,const vector<size_t> & vec_cat,const gsl_vector * w,const gsl_vector * z,size_t ns_test,gsl_matrix * XWz)2314 bool PlinkXwz(const string &file_bed, const int display_pace,
2315 vector<int> &indicator_idv, vector<int> &indicator_snp,
2316 const vector<size_t> &vec_cat, const gsl_vector *w,
2317 const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
2318 debug_msg("entering");
2319 ifstream infile(file_bed.c_str(), ios::binary);
2320 if (!infile) {
2321 cout << "error reading bed file:" << file_bed << endl;
2322 return false;
2323 }
2324
2325 char ch[1];
2326 bitset<8> b;
2327
2328 size_t n_miss, ci_total, ci_test;
2329 double d, geno_mean, geno_var;
2330
2331 size_t ni_test = XWz->size1;
2332 size_t ni_total = indicator_idv.size();
2333 gsl_vector *geno = gsl_vector_alloc(ni_test);
2334 gsl_vector *wz = gsl_vector_alloc(w->size);
2335 gsl_vector_memcpy(wz, z);
2336 gsl_vector_mul(wz, w);
2337
2338 int n_bit;
2339
2340 // Calculate n_bit and c, the number of bit for each snp.
2341 if (ni_total % 4 == 0) {
2342 n_bit = ni_total / 4;
2343 } else {
2344 n_bit = ni_total / 4 + 1;
2345 }
2346
2347 // Print the first three magic numbers.
2348 for (int i = 0; i < 3; ++i) {
2349 infile.read(ch, 1);
2350 b = ch[0];
2351 }
2352
2353 for (size_t t = 0; t < indicator_snp.size(); ++t) {
2354 if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
2355 ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
2356 }
2357 if (indicator_snp[t] == 0) {
2358 continue;
2359 }
2360
2361 // n_bit, and 3 is the number of magic numbers.
2362 infile.seekg(t * n_bit + 3);
2363
2364 // Read genotypes.
2365 geno_mean = 0.0;
2366 n_miss = 0;
2367 ci_total = 0;
2368 geno_var = 0.0;
2369 ci_test = 0;
2370 for (int i = 0; i < n_bit; ++i) {
2371 infile.read(ch, 1);
2372 b = ch[0];
2373
2374 // Minor allele homozygous: 2.0; major: 0.0.
2375 for (size_t j = 0; j < 4; ++j) {
2376 if ((i == (n_bit - 1)) && ci_total == ni_total) {
2377 break;
2378 }
2379 if (indicator_idv[ci_total] == 0) {
2380 ci_total++;
2381 continue;
2382 }
2383
2384 if (b[2 * j] == 0) {
2385 if (b[2 * j + 1] == 0) {
2386 gsl_vector_set(geno, ci_test, 2.0);
2387 geno_mean += 2.0;
2388 geno_var += 4.0;
2389 } else {
2390 gsl_vector_set(geno, ci_test, 1.0);
2391 geno_mean += 1.0;
2392 geno_var += 1.0;
2393 }
2394 } else {
2395 if (b[2 * j + 1] == 1) {
2396 gsl_vector_set(geno, ci_test, 0.0);
2397 } else {
2398 gsl_vector_set(geno, ci_test, -9.0);
2399 n_miss++;
2400 }
2401 }
2402
2403 ci_test++;
2404 ci_total++;
2405 }
2406 }
2407
2408 geno_mean /= (double)(ni_test - n_miss);
2409 geno_var += geno_mean * geno_mean * (double)n_miss;
2410 geno_var /= (double)ni_test;
2411 geno_var -= geno_mean * geno_mean;
2412
2413 for (size_t i = 0; i < ni_test; ++i) {
2414 d = gsl_vector_get(geno, i);
2415 if (d == -9.0) {
2416 gsl_vector_set(geno, i, geno_mean);
2417 }
2418 }
2419
2420 gsl_vector_add_constant(geno, -1.0 * geno_mean);
2421
2422 gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]);
2423 d = gsl_vector_get(wz, ns_test);
2424 gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector);
2425
2426 ns_test++;
2427 }
2428 cout << endl;
2429
2430 gsl_vector_free(geno);
2431 gsl_vector_free(wz);
2432
2433 infile.close();
2434 infile.clear();
2435
2436 return true;
2437 }
2438
2439 // Read multiple genotype files and compute XWz.
MFILEXwz(const size_t mfile_mode,const string & file_mfile,const int display_pace,vector<int> & indicator_idv,vector<vector<int>> & mindicator_snp,const vector<size_t> & vec_cat,const gsl_vector * w,const gsl_vector * z,gsl_matrix * XWz)2440 bool MFILEXwz(const size_t mfile_mode, const string &file_mfile,
2441 const int display_pace, vector<int> &indicator_idv,
2442 vector<vector<int>> &mindicator_snp,
2443 const vector<size_t> &vec_cat, const gsl_vector *w,
2444 const gsl_vector *z, gsl_matrix *XWz) {
2445 debug_msg("entering");
2446 gsl_matrix_set_zero(XWz);
2447
2448 igzstream infile(file_mfile.c_str(), igzstream::in);
2449 if (!infile) {
2450 cout << "error! fail to open mfile file: " << file_mfile << endl;
2451 return false;
2452 }
2453
2454 string file_name;
2455 size_t l = 0, ns_test = 0;
2456
2457 while (!safeGetline(infile, file_name).eof()) {
2458 if (mfile_mode == 1) {
2459 file_name += ".bed";
2460 PlinkXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
2461 vec_cat, w, z, ns_test, XWz);
2462 } else {
2463 BimbamXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
2464 vec_cat, w, z, ns_test, XWz);
2465 }
2466
2467 l++;
2468 }
2469
2470 infile.close();
2471 infile.clear();
2472
2473 return true;
2474 }
2475
2476 // Read bimbam mean genotype file and compute X_i^TX_jWz.
BimbamXtXwz(const string & file_geno,const int display_pace,vector<int> & indicator_idv,vector<int> & indicator_snp,const gsl_matrix * XWz,size_t ns_test,gsl_matrix * XtXWz)2477 bool BimbamXtXwz(const string &file_geno, const int display_pace,
2478 vector<int> &indicator_idv, vector<int> &indicator_snp,
2479 const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
2480 debug_msg("entering");
2481 igzstream infile(file_geno.c_str(), igzstream::in);
2482 if (!infile) {
2483 cout << "error reading genotype file:" << file_geno << endl;
2484 return false;
2485 }
2486
2487 string line;
2488 char *ch_ptr;
2489
2490 size_t n_miss;
2491 double d, geno_mean, geno_var;
2492
2493 size_t ni_test = XWz->size1;
2494 gsl_vector *geno = gsl_vector_alloc(ni_test);
2495 gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
2496
2497 for (size_t t = 0; t < indicator_snp.size(); ++t) {
2498 safeGetline(infile, line).eof();
2499 if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
2500 ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
2501 }
2502 if (indicator_snp[t] == 0) {
2503 continue;
2504 }
2505
2506 ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
2507 ch_ptr = strtok_safe(NULL, " , \t");
2508 ch_ptr = strtok_safe(NULL, " , \t");
2509
2510 geno_mean = 0.0;
2511 n_miss = 0;
2512 geno_var = 0.0;
2513 gsl_vector_set_all(geno_miss, 0);
2514
2515 size_t j = 0;
2516 for (size_t i = 0; i < indicator_idv.size(); ++i) {
2517 if (indicator_idv[i] == 0) {
2518 continue;
2519 }
2520 ch_ptr = strtok_safe(NULL, " , \t");
2521 if (strcmp(ch_ptr, "NA") == 0) {
2522 gsl_vector_set(geno_miss, i, 0);
2523 n_miss++;
2524 } else {
2525 d = atof(ch_ptr);
2526 gsl_vector_set(geno, j, d);
2527 gsl_vector_set(geno_miss, j, 1);
2528 geno_mean += d;
2529 geno_var += d * d;
2530 }
2531 j++;
2532 }
2533
2534 geno_mean /= (double)(ni_test - n_miss);
2535 geno_var += geno_mean * geno_mean * (double)n_miss;
2536 geno_var /= (double)ni_test;
2537 geno_var -= geno_mean * geno_mean;
2538
2539 for (size_t i = 0; i < ni_test; ++i) {
2540 if (gsl_vector_get(geno_miss, i) == 0) {
2541 gsl_vector_set(geno, i, geno_mean);
2542 }
2543 }
2544
2545 gsl_vector_add_constant(geno, -1.0 * geno_mean);
2546
2547 for (size_t i = 0; i < XWz->size2; i++) {
2548 gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i);
2549 gsl_blas_ddot(geno, &XWz_col.vector, &d);
2550 gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var));
2551 }
2552
2553 ns_test++;
2554 }
2555
2556 cout << endl;
2557
2558 gsl_vector_free(geno);
2559 gsl_vector_free(geno_miss);
2560
2561 infile.close();
2562 infile.clear();
2563
2564 return true;
2565 }
2566
2567 // Read PLINK bed file and compute XWz.
PlinkXtXwz(const string & file_bed,const int display_pace,vector<int> & indicator_idv,vector<int> & indicator_snp,const gsl_matrix * XWz,size_t ns_test,gsl_matrix * XtXWz)2568 bool PlinkXtXwz(const string &file_bed, const int display_pace,
2569 vector<int> &indicator_idv, vector<int> &indicator_snp,
2570 const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
2571 debug_msg("entering");
2572 ifstream infile(file_bed.c_str(), ios::binary);
2573 if (!infile) {
2574 cout << "error reading bed file:" << file_bed << endl;
2575 return false;
2576 }
2577
2578 char ch[1];
2579 bitset<8> b;
2580
2581 size_t n_miss, ci_total, ci_test;
2582 double d, geno_mean, geno_var;
2583
2584 size_t ni_test = XWz->size1;
2585 size_t ni_total = indicator_idv.size();
2586 gsl_vector *geno = gsl_vector_alloc(ni_test);
2587
2588 int n_bit;
2589
2590 // Calculate n_bit and c, the number of bit for each snp.
2591 if (ni_total % 4 == 0) {
2592 n_bit = ni_total / 4;
2593 } else {
2594 n_bit = ni_total / 4 + 1;
2595 }
2596
2597 // Print the first three magic numbers.
2598 for (int i = 0; i < 3; ++i) {
2599 infile.read(ch, 1);
2600 b = ch[0];
2601 }
2602
2603 for (size_t t = 0; t < indicator_snp.size(); ++t) {
2604 if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
2605 ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
2606 }
2607 if (indicator_snp[t] == 0) {
2608 continue;
2609 }
2610
2611 // n_bit, and 3 is the number of magic numbers.
2612 infile.seekg(t * n_bit + 3);
2613
2614 // Read genotypes.
2615 geno_mean = 0.0;
2616 n_miss = 0;
2617 ci_total = 0;
2618 geno_var = 0.0;
2619 ci_test = 0;
2620 for (int i = 0; i < n_bit; ++i) {
2621 infile.read(ch, 1);
2622 b = ch[0];
2623
2624 // Minor allele homozygous: 2.0; major: 0.0;
2625 for (size_t j = 0; j < 4; ++j) {
2626 if ((i == (n_bit - 1)) && ci_total == ni_total) {
2627 break;
2628 }
2629 if (indicator_idv[ci_total] == 0) {
2630 ci_total++;
2631 continue;
2632 }
2633
2634 if (b[2 * j] == 0) {
2635 if (b[2 * j + 1] == 0) {
2636 gsl_vector_set(geno, ci_test, 2.0);
2637 geno_mean += 2.0;
2638 geno_var += 4.0;
2639 } else {
2640 gsl_vector_set(geno, ci_test, 1.0);
2641 geno_mean += 1.0;
2642 geno_var += 1.0;
2643 }
2644 } else {
2645 if (b[2 * j + 1] == 1) {
2646 gsl_vector_set(geno, ci_test, 0.0);
2647 } else {
2648 gsl_vector_set(geno, ci_test, -9.0);
2649 n_miss++;
2650 }
2651 }
2652
2653 ci_test++;
2654 ci_total++;
2655 }
2656 }
2657
2658 geno_mean /= (double)(ni_test - n_miss);
2659 geno_var += geno_mean * geno_mean * (double)n_miss;
2660 geno_var /= (double)ni_test;
2661 geno_var -= geno_mean * geno_mean;
2662
2663 for (size_t i = 0; i < ni_test; ++i) {
2664 d = gsl_vector_get(geno, i);
2665 if (d == -9.0) {
2666 gsl_vector_set(geno, i, geno_mean);
2667 }
2668 }
2669
2670 gsl_vector_add_constant(geno, -1.0 * geno_mean);
2671
2672 for (size_t i = 0; i < XWz->size2; i++) {
2673 gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i);
2674 gsl_blas_ddot(geno, &XWz_col.vector, &d);
2675 gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var));
2676 }
2677
2678 ns_test++;
2679 }
2680 cout << endl;
2681
2682 gsl_vector_free(geno);
2683
2684 infile.close();
2685 infile.clear();
2686
2687 return true;
2688 }
2689
2690 // Read multiple genotype files and compute XWz.
MFILEXtXwz(const size_t mfile_mode,const string & file_mfile,const int display_pace,vector<int> & indicator_idv,vector<vector<int>> & mindicator_snp,const gsl_matrix * XWz,gsl_matrix * XtXWz)2691 bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile,
2692 const int display_pace, vector<int> &indicator_idv,
2693 vector<vector<int>> &mindicator_snp, const gsl_matrix *XWz,
2694 gsl_matrix *XtXWz) {
2695 debug_msg("entering");
2696 gsl_matrix_set_zero(XtXWz);
2697
2698 igzstream infile(file_mfile.c_str(), igzstream::in);
2699 if (!infile) {
2700 cout << "error! fail to open mfile file: " << file_mfile << endl;
2701 return false;
2702 }
2703
2704 string file_name;
2705 size_t l = 0, ns_test = 0;
2706
2707 while (!safeGetline(infile, file_name).eof()) {
2708 if (mfile_mode == 1) {
2709 file_name += ".bed";
2710 PlinkXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l], XWz,
2711 ns_test, XtXWz);
2712 } else {
2713 BimbamXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
2714 XWz, ns_test, XtXWz);
2715 }
2716
2717 l++;
2718 }
2719
2720 infile.close();
2721 infile.clear();
2722
2723 return true;
2724 }
2725
2726 // Compute confidence intervals from summary statistics.
CalcCIss(const gsl_matrix * Xz,const gsl_matrix * XWz,const gsl_matrix * XtXWz,const gsl_matrix * S_mat,const gsl_matrix * Svar_mat,const gsl_vector * w,const gsl_vector * z,const gsl_vector * s_vec,const vector<size_t> & vec_cat,const vector<double> & v_pve,vector<double> & v_se_pve,double & pve_total,double & se_pve_total,vector<double> & v_sigma2,vector<double> & v_se_sigma2,vector<double> & v_enrich,vector<double> & v_se_enrich)2727 void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz,
2728 const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
2729 const gsl_matrix *Svar_mat, const gsl_vector *w,
2730 const gsl_vector *z, const gsl_vector *s_vec,
2731 const vector<size_t> &vec_cat, const vector<double> &v_pve,
2732 vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
2733 vector<double> &v_sigma2, vector<double> &v_se_sigma2,
2734 vector<double> &v_enrich, vector<double> &v_se_enrich) {
2735 size_t n_vc = XWz->size2, ns_test = w->size, ni_test = XWz->size1;
2736
2737 // Set up matrices.
2738 gsl_vector *w_pve = gsl_vector_alloc(ns_test);
2739 gsl_vector *wz = gsl_vector_alloc(ns_test);
2740 gsl_vector *zwz = gsl_vector_alloc(n_vc);
2741 gsl_vector *zz = gsl_vector_alloc(n_vc);
2742 gsl_vector *Xz_pve = gsl_vector_alloc(ni_test);
2743 gsl_vector *WXtXWz = gsl_vector_alloc(ns_test);
2744
2745 gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
2746 gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
2747 gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
2748 gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc);
2749 gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc);
2750 gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
2751
2752 double d, s0, s1, s, s_pve, s_snp;
2753
2754 // Compute wz and zwz.
2755 gsl_vector_memcpy(wz, z);
2756 gsl_vector_mul(wz, w);
2757
2758 gsl_vector_set_zero(zwz);
2759 gsl_vector_set_zero(zz);
2760 for (size_t i = 0; i < w->size; i++) {
2761 d = gsl_vector_get(wz, i) * gsl_vector_get(z, i);
2762 d += gsl_vector_get(zwz, vec_cat[i]);
2763 gsl_vector_set(zwz, vec_cat[i], d);
2764
2765 d = gsl_vector_get(z, i) * gsl_vector_get(z, i);
2766 d += gsl_vector_get(zz, vec_cat[i]);
2767 gsl_vector_set(zz, vec_cat[i], d);
2768 }
2769
2770 // Compute wz, ve and Xz_pve.
2771 gsl_vector_set_zero(Xz_pve);
2772 s_pve = 0;
2773 s_snp = 0;
2774 for (size_t i = 0; i < n_vc; i++) {
2775 s_pve += v_pve[i];
2776 s_snp += gsl_vector_get(s_vec, i);
2777
2778 gsl_vector_const_view Xz_col = gsl_matrix_const_column(Xz, i);
2779 gsl_blas_daxpy(v_pve[i] / gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve);
2780 }
2781
2782 // Set up wpve vector.
2783 for (size_t i = 0; i < w->size; i++) {
2784 d = v_pve[vec_cat[i]] / gsl_vector_get(s_vec, vec_cat[i]);
2785 gsl_vector_set(w_pve, i, d);
2786 }
2787
2788 // Compute Vq (in qvar_mat).
2789 s0 = 1 - s_pve;
2790 for (size_t i = 0; i < n_vc; i++) {
2791 s0 += gsl_vector_get(zz, i) * v_pve[i] / gsl_vector_get(s_vec, i);
2792 }
2793
2794 for (size_t i = 0; i < n_vc; i++) {
2795 s1 = s0;
2796 s1 -= gsl_vector_get(zwz, i) * (1 - s_pve) / gsl_vector_get(s_vec, i);
2797
2798 gsl_vector_const_view XWz_col1 = gsl_matrix_const_column(XWz, i);
2799 gsl_vector_const_view XtXWz_col1 = gsl_matrix_const_column(XtXWz, i);
2800
2801 gsl_vector_memcpy(WXtXWz, &XtXWz_col1.vector);
2802 gsl_vector_mul(WXtXWz, w_pve);
2803
2804 gsl_blas_ddot(Xz_pve, &XWz_col1.vector, &d);
2805 s1 -= d / gsl_vector_get(s_vec, i);
2806
2807 for (size_t j = 0; j < n_vc; j++) {
2808 s = s1;
2809
2810 s -= gsl_vector_get(zwz, j) * (1 - s_pve) / gsl_vector_get(s_vec, j);
2811
2812 gsl_vector_const_view XWz_col2 = gsl_matrix_const_column(XWz, j);
2813 gsl_vector_const_view XtXWz_col2 = gsl_matrix_const_column(XtXWz, j);
2814
2815 gsl_blas_ddot(WXtXWz, &XtXWz_col2.vector, &d);
2816 s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j));
2817
2818 gsl_blas_ddot(&XWz_col1.vector, &XWz_col2.vector, &d);
2819 s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j)) *
2820 (1 - s_pve);
2821
2822 gsl_blas_ddot(Xz_pve, &XWz_col2.vector, &d);
2823 s -= d / gsl_vector_get(s_vec, j);
2824
2825 gsl_matrix_set(qvar_mat, i, j, s);
2826 }
2827 }
2828
2829 d = (double)(ni_test - 1);
2830 gsl_matrix_scale(qvar_mat, 2.0 / (d * d * d));
2831
2832 // Calculate S^{-1}.
2833 gsl_matrix_memcpy(tmp_mat, S_mat);
2834 int sig;
2835 gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
2836 LUDecomp(tmp_mat, pmt, &sig);
2837 LUInvert(tmp_mat, pmt, Si_mat);
2838
2839 // Calculate variance for the estimates.
2840 for (size_t i = 0; i < n_vc; i++) {
2841 for (size_t j = i; j < n_vc; j++) {
2842 d = gsl_matrix_get(Svar_mat, i, j);
2843 d *= v_pve[i] * v_pve[j];
2844
2845 d += gsl_matrix_get(qvar_mat, i, j);
2846 gsl_matrix_set(Var_mat, i, j, d);
2847 if (i != j) {
2848 gsl_matrix_set(Var_mat, j, i, d);
2849 }
2850 }
2851 }
2852
2853 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0,
2854 tmp_mat);
2855 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
2856 Var_mat);
2857
2858 // Compute sigma2 per snp, enrich.
2859 v_sigma2.clear();
2860 v_enrich.clear();
2861 for (size_t i = 0; i < n_vc; i++) {
2862 v_sigma2.push_back(v_pve[i] / gsl_vector_get(s_vec, i));
2863 v_enrich.push_back(v_pve[i] / gsl_vector_get(s_vec, i) * s_snp / s_pve);
2864 }
2865
2866 // Compute se_pve, se_sigma2.
2867 for (size_t i = 0; i < n_vc; i++) {
2868 d = sqrt(gsl_matrix_get(Var_mat, i, i));
2869 v_se_pve.push_back(d);
2870 v_se_sigma2.push_back(d / gsl_vector_get(s_vec, i));
2871 }
2872
2873 // Compute pve_total, se_pve_total.
2874 pve_total = 0;
2875 for (size_t i = 0; i < n_vc; i++) {
2876 pve_total += v_pve[i];
2877 }
2878
2879 se_pve_total = 0;
2880 for (size_t i = 0; i < n_vc; i++) {
2881 for (size_t j = 0; j < n_vc; j++) {
2882 se_pve_total += gsl_matrix_get(Var_mat, i, j);
2883 }
2884 }
2885 se_pve_total = sqrt(se_pve_total);
2886
2887 // Compute se_enrich.
2888 gsl_matrix_set_identity(tmp_mat);
2889
2890 double d1;
2891 for (size_t i = 0; i < n_vc; i++) {
2892 d = v_pve[i] / s_pve;
2893 d1 = gsl_vector_get(s_vec, i);
2894 for (size_t j = 0; j < n_vc; j++) {
2895 if (i == j) {
2896 gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve);
2897 } else {
2898 gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve);
2899 }
2900 }
2901 }
2902 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0,
2903 tmp_mat1);
2904 gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0,
2905 VarEnrich_mat);
2906
2907 for (size_t i = 0; i < n_vc; i++) {
2908 d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
2909 v_se_enrich.push_back(d);
2910 }
2911
2912 cout << "pve = ";
2913 for (size_t i = 0; i < n_vc; i++) {
2914 cout << v_pve[i] << " ";
2915 }
2916 cout << endl;
2917
2918 cout << "se(pve) = ";
2919 for (size_t i = 0; i < n_vc; i++) {
2920 cout << v_se_pve[i] << " ";
2921 }
2922 cout << endl;
2923
2924 cout << "sigma2 per snp = ";
2925 for (size_t i = 0; i < n_vc; i++) {
2926 cout << v_sigma2[i] << " ";
2927 }
2928 cout << endl;
2929
2930 cout << "se(sigma2 per snp) = ";
2931 for (size_t i = 0; i < n_vc; i++) {
2932 cout << v_se_sigma2[i] << " ";
2933 }
2934 cout << endl;
2935
2936 cout << "enrichment = ";
2937 for (size_t i = 0; i < n_vc; i++) {
2938 cout << v_enrich[i] << " ";
2939 }
2940 cout << endl;
2941
2942 cout << "se(enrichment) = ";
2943 for (size_t i = 0; i < n_vc; i++) {
2944 cout << v_se_enrich[i] << " ";
2945 }
2946 cout << endl;
2947
2948 // Delete matrices.
2949 gsl_matrix_free(Si_mat);
2950 gsl_matrix_free(Var_mat);
2951 gsl_matrix_free(VarEnrich_mat);
2952 gsl_matrix_free(tmp_mat);
2953 gsl_matrix_free(tmp_mat1);
2954 gsl_matrix_free(qvar_mat);
2955
2956 gsl_vector_free(w_pve);
2957 gsl_vector_free(wz);
2958 gsl_vector_free(zwz);
2959 gsl_vector_free(WXtXWz);
2960 gsl_vector_free(Xz_pve);
2961
2962 return;
2963 }
2964