1 /*
2 Genome-wide Efficient Mixed Model Association (GEMMA)
3 Copyright (C) 2011-2017, Xiang Zhou
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include <fstream>
20 #include <iostream>
21 #include <sstream>
22
23 #include <algorithm>
24 #include <cmath>
25 #include <cstring>
26 #include <ctime>
27 #include <iomanip>
28 #include <iostream>
29 #include <stdio.h>
30 #include <stdlib.h>
31
32 #include "gsl/gsl_blas.h"
33 #include "gsl/gsl_cdf.h"
34 #include "gsl/gsl_eigen.h"
35 #include "gsl/gsl_linalg.h"
36 #include "gsl/gsl_matrix.h"
37 #include "gsl/gsl_randist.h"
38 #include "gsl/gsl_roots.h"
39 #include "gsl/gsl_vector.h"
40
41 #include "bslmm.h"
42 #include "lapack.h"
43 #include "lm.h"
44 #include "lmm.h"
45 #include "mathfunc.h"
46 #include "param.h"
47
48 using namespace std;
49
CopyFromParam(PARAM & cPar)50 void BSLMM::CopyFromParam(PARAM &cPar) {
51 a_mode = cPar.a_mode;
52 d_pace = cPar.d_pace;
53
54 file_bfile = cPar.file_bfile;
55 file_geno = cPar.file_geno;
56 file_out = cPar.file_out;
57 path_out = cPar.path_out;
58
59 l_min = cPar.h_min;
60 l_max = cPar.h_max;
61 n_region = cPar.n_region;
62 pve_null = cPar.pve_null;
63 pheno_mean = cPar.pheno_mean;
64
65 time_UtZ = 0.0;
66 time_Omega = 0.0;
67 n_accept = 0;
68
69 h_min = cPar.h_min;
70 h_max = cPar.h_max;
71 h_scale = cPar.h_scale;
72 rho_min = cPar.rho_min;
73 rho_max = cPar.rho_max;
74 rho_scale = cPar.rho_scale;
75 logp_min = cPar.logp_min;
76 logp_max = cPar.logp_max;
77 logp_scale = cPar.logp_scale;
78
79 s_min = cPar.s_min;
80 s_max = cPar.s_max;
81 w_step = cPar.w_step;
82 s_step = cPar.s_step;
83 r_pace = cPar.r_pace;
84 w_pace = cPar.w_pace;
85 n_mh = cPar.n_mh;
86 geo_mean = cPar.geo_mean;
87 // randseed = cPar.randseed;
88 gsl_r = cPar.gsl_r;
89 trace_G = cPar.trace_G;
90
91 ni_total = cPar.ni_total;
92 ns_total = cPar.ns_total;
93 ni_test = cPar.ni_test;
94 ns_test = cPar.ns_test;
95 n_cvt = cPar.n_cvt;
96
97 indicator_idv = cPar.indicator_idv;
98 indicator_snp = cPar.indicator_snp;
99 snpInfo = cPar.snpInfo;
100
101 return;
102 }
103
CopyToParam(PARAM & cPar)104 void BSLMM::CopyToParam(PARAM &cPar) {
105 cPar.time_UtZ = time_UtZ;
106 cPar.time_Omega = time_Omega;
107 cPar.time_Proposal = time_Proposal;
108 cPar.cHyp_initial = cHyp_initial;
109 cPar.n_accept = n_accept;
110 cPar.pheno_mean = pheno_mean;
111 // cPar.randseed = randseed;
112
113 return;
114 }
115
WriteBV(const gsl_vector * bv)116 void BSLMM::WriteBV(const gsl_vector *bv) {
117 string file_str;
118 file_str = path_out + "/" + file_out;
119 file_str += ".bv.txt";
120
121 ofstream outfile(file_str.c_str(), ofstream::out);
122 if (!outfile) {
123 cout << "error writing file: " << file_str.c_str() << endl;
124 return;
125 }
126
127 size_t t = 0;
128 for (size_t i = 0; i < ni_total; ++i) {
129 if (indicator_idv[i] == 0) {
130 outfile << "NA" << endl;
131 } else {
132 outfile << scientific << setprecision(6) << gsl_vector_get(bv, t) << endl;
133 t++;
134 }
135 }
136
137 outfile.clear();
138 outfile.close();
139 return;
140 }
141
WriteParam(vector<pair<double,double>> & beta_g,const gsl_vector * alpha,const size_t w)142 void BSLMM::WriteParam(vector<pair<double, double>> &beta_g,
143 const gsl_vector *alpha, const size_t w) {
144 string file_str;
145 file_str = path_out + "/" + file_out;
146 file_str += ".param.txt";
147
148 ofstream outfile(file_str.c_str(), ofstream::out);
149 if (!outfile) {
150 cout << "error writing file: " << file_str.c_str() << endl;
151 return;
152 }
153
154 outfile << "chr"
155 << "\t"
156 << "rs"
157 << "\t"
158 << "ps"
159 << "\t"
160 << "n_miss"
161 << "\t"
162 << "alpha"
163 << "\t"
164 << "beta"
165 << "\t"
166 << "gamma" << endl;
167
168 size_t t = 0;
169 for (size_t i = 0; i < ns_total; ++i) {
170 if (indicator_snp[i] == 0) {
171 continue;
172 }
173
174 outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
175 << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t";
176
177 outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t)
178 << "\t";
179 if (beta_g[t].second != 0) {
180 outfile << beta_g[t].first / beta_g[t].second << "\t"
181 << beta_g[t].second / (double)w << endl;
182 } else {
183 outfile << 0.0 << "\t" << 0.0 << endl;
184 }
185 t++;
186 }
187
188 outfile.clear();
189 outfile.close();
190 return;
191 }
192
WriteParam(const gsl_vector * alpha)193 void BSLMM::WriteParam(const gsl_vector *alpha) {
194 string file_str;
195 file_str = path_out + "/" + file_out;
196 file_str += ".param.txt";
197
198 ofstream outfile(file_str.c_str(), ofstream::out);
199 if (!outfile) {
200 cout << "error writing file: " << file_str.c_str() << endl;
201 return;
202 }
203
204 outfile << "chr"
205 << "\t"
206 << "rs"
207 << "\t"
208 << "ps"
209 << "\t"
210 << "n_miss"
211 << "\t"
212 << "alpha"
213 << "\t"
214 << "beta"
215 << "\t"
216 << "gamma" << endl;
217
218 size_t t = 0;
219 for (size_t i = 0; i < ns_total; ++i) {
220 if (indicator_snp[i] == 0) {
221 continue;
222 }
223
224 outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
225 << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t";
226 outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t)
227 << "\t";
228 outfile << 0.0 << "\t" << 0.0 << endl;
229 t++;
230 }
231
232 outfile.clear();
233 outfile.close();
234 return;
235 }
236
WriteResult(const int flag,const gsl_matrix * Result_hyp,const gsl_matrix * Result_gamma,const size_t w_col)237 void BSLMM::WriteResult(const int flag, const gsl_matrix *Result_hyp,
238 const gsl_matrix *Result_gamma, const size_t w_col) {
239 string file_gamma, file_hyp;
240 file_gamma = path_out + "/" + file_out;
241 file_gamma += ".gamma.txt";
242 file_hyp = path_out + "/" + file_out;
243 file_hyp += ".hyp.txt";
244
245 ofstream outfile_gamma, outfile_hyp;
246
247 if (flag == 0) {
248 outfile_gamma.open(file_gamma.c_str(), ofstream::out);
249 outfile_hyp.open(file_hyp.c_str(), ofstream::out);
250 if (!outfile_gamma) {
251 cout << "error writing file: " << file_gamma << endl;
252 return;
253 }
254 if (!outfile_hyp) {
255 cout << "error writing file: " << file_hyp << endl;
256 return;
257 }
258
259 outfile_hyp << "h \t pve \t rho \t pge \t pi \t n_gamma" << endl;
260
261 for (size_t i = 0; i < s_max; ++i) {
262 outfile_gamma << "s" << i << "\t";
263 }
264 outfile_gamma << endl;
265 } else {
266 outfile_gamma.open(file_gamma.c_str(), ofstream::app);
267 outfile_hyp.open(file_hyp.c_str(), ofstream::app);
268 if (!outfile_gamma) {
269 cout << "error writing file: " << file_gamma << endl;
270 return;
271 }
272 if (!outfile_hyp) {
273 cout << "error writing file: " << file_hyp << endl;
274 return;
275 }
276
277 size_t w;
278 if (w_col == 0) {
279 w = w_pace;
280 } else {
281 w = w_col;
282 }
283
284 for (size_t i = 0; i < w; ++i) {
285 outfile_hyp << scientific;
286 for (size_t j = 0; j < 4; ++j) {
287 outfile_hyp << setprecision(6) << gsl_matrix_get(Result_hyp, i, j)
288 << "\t";
289 }
290 outfile_hyp << setprecision(6) << exp(gsl_matrix_get(Result_hyp, i, 4))
291 << "\t";
292 outfile_hyp << (int)gsl_matrix_get(Result_hyp, i, 5) << "\t";
293 outfile_hyp << endl;
294 }
295
296 for (size_t i = 0; i < w; ++i) {
297 for (size_t j = 0; j < s_max; ++j) {
298 outfile_gamma << (int)gsl_matrix_get(Result_gamma, i, j) << "\t";
299 }
300 outfile_gamma << endl;
301 }
302 }
303
304 outfile_hyp.close();
305 outfile_hyp.clear();
306 outfile_gamma.close();
307 outfile_gamma.clear();
308 return;
309 }
310
CalcPgamma(double * p_gamma)311 void BSLMM::CalcPgamma(double *p_gamma) {
312 double p, s = 0.0;
313 for (size_t i = 0; i < ns_test; ++i) {
314 p = 0.7 * gsl_ran_geometric_pdf(i + 1, 1.0 / geo_mean) +
315 0.3 / (double)ns_test;
316 p_gamma[i] = p;
317 s += p;
318 }
319 for (size_t i = 0; i < ns_test; ++i) {
320 p = p_gamma[i];
321 p_gamma[i] = p / s;
322 }
323 return;
324 }
325
SetXgamma(gsl_matrix * Xgamma,const gsl_matrix * X,vector<size_t> & rank)326 void BSLMM::SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X,
327 vector<size_t> &rank) {
328 size_t pos;
329 for (size_t i = 0; i < rank.size(); ++i) {
330 pos = mapRank2pos[rank[i]];
331 gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, i);
332 gsl_vector_const_view X_col = gsl_matrix_const_column(X, pos);
333 gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector);
334 }
335
336 return;
337 }
338
CalcPveLM(const gsl_matrix * UtXgamma,const gsl_vector * Uty,const double sigma_a2)339 double BSLMM::CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
340 const double sigma_a2) {
341 double pve, var_y;
342
343 gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
344 gsl_vector *Xty = gsl_vector_alloc(UtXgamma->size2);
345 gsl_vector *OiXty = gsl_vector_alloc(UtXgamma->size2);
346
347 gsl_matrix_set_identity(Omega);
348 gsl_matrix_scale(Omega, 1.0 / sigma_a2);
349
350 lapack_dgemm((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega);
351 gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty);
352
353 CholeskySolve(Omega, Xty, OiXty);
354
355 gsl_blas_ddot(Xty, OiXty, &pve);
356 gsl_blas_ddot(Uty, Uty, &var_y);
357
358 pve /= var_y;
359
360 gsl_matrix_free(Omega);
361 gsl_vector_free(Xty);
362 gsl_vector_free(OiXty);
363
364 return pve;
365 }
366
InitialMCMC(const gsl_matrix * UtX,const gsl_vector * Uty,vector<size_t> & rank,class HYPBSLMM & cHyp,vector<pair<size_t,double>> & pos_loglr)367 void BSLMM::InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty,
368 vector<size_t> &rank, class HYPBSLMM &cHyp,
369 vector<pair<size_t, double>> &pos_loglr) {
370 double q_genome = gsl_cdf_chisq_Qinv(0.05 / (double)ns_test, 1);
371
372 cHyp.n_gamma = 0;
373 for (size_t i = 0; i < pos_loglr.size(); ++i) {
374 if (2.0 * pos_loglr[i].second > q_genome) {
375 cHyp.n_gamma++;
376 }
377 }
378 if (cHyp.n_gamma < 10) {
379 cHyp.n_gamma = 10;
380 }
381
382 if (cHyp.n_gamma > s_max) {
383 cHyp.n_gamma = s_max;
384 }
385 if (cHyp.n_gamma < s_min) {
386 cHyp.n_gamma = s_min;
387 }
388
389 rank.clear();
390 for (size_t i = 0; i < cHyp.n_gamma; ++i) {
391 rank.push_back(i);
392 }
393
394 cHyp.logp = log((double)cHyp.n_gamma / (double)ns_test);
395 cHyp.h = pve_null;
396
397 if (cHyp.logp == 0) {
398 cHyp.logp = -0.000001;
399 }
400 if (cHyp.h == 0) {
401 cHyp.h = 0.1;
402 }
403
404 gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp.n_gamma);
405 SetXgamma(UtXgamma, UtX, rank);
406 double sigma_a2;
407 if (trace_G != 0) {
408 sigma_a2 = cHyp.h * 1.0 /
409 (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
410 } else {
411 sigma_a2 = cHyp.h * 1.0 / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
412 }
413 if (sigma_a2 == 0) {
414 sigma_a2 = 0.025;
415 }
416 cHyp.rho = CalcPveLM(UtXgamma, Uty, sigma_a2) / cHyp.h;
417 gsl_matrix_free(UtXgamma);
418
419 if (cHyp.rho > 1.0) {
420 cHyp.rho = 1.0;
421 }
422
423 if (cHyp.h < h_min) {
424 cHyp.h = h_min;
425 }
426 if (cHyp.h > h_max) {
427 cHyp.h = h_max;
428 }
429 if (cHyp.rho < rho_min) {
430 cHyp.rho = rho_min;
431 }
432 if (cHyp.rho > rho_max) {
433 cHyp.rho = rho_max;
434 }
435 if (cHyp.logp < logp_min) {
436 cHyp.logp = logp_min;
437 }
438 if (cHyp.logp > logp_max) {
439 cHyp.logp = logp_max;
440 }
441
442 cout << "initial value of h = " << cHyp.h << endl;
443 cout << "initial value of rho = " << cHyp.rho << endl;
444 cout << "initial value of pi = " << exp(cHyp.logp) << endl;
445 cout << "initial value of |gamma| = " << cHyp.n_gamma << endl;
446
447 return;
448 }
449
CalcPosterior(const gsl_vector * Uty,const gsl_vector * K_eval,gsl_vector * Utu,gsl_vector * alpha_prime,class HYPBSLMM & cHyp)450 double BSLMM::CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval,
451 gsl_vector *Utu, gsl_vector *alpha_prime,
452 class HYPBSLMM &cHyp) {
453 double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h));
454
455 gsl_vector *Utu_rand = gsl_vector_alloc(Uty->size);
456 gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size);
457
458 double logpost = 0.0;
459 double d, ds, uy, Hi_yy = 0, logdet_H = 0.0;
460 for (size_t i = 0; i < ni_test; ++i) {
461 d = gsl_vector_get(K_eval, i) * sigma_b2;
462 ds = d / (d + 1.0);
463 d = 1.0 / (d + 1.0);
464 gsl_vector_set(weight_Hi, i, d);
465
466 logdet_H -= log(d);
467 uy = gsl_vector_get(Uty, i);
468 Hi_yy += d * uy * uy;
469
470 gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds));
471 }
472
473 // Sample tau.
474 double tau = 1.0;
475 if (a_mode == 11) {
476 tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / Hi_yy);
477 }
478
479 // Sample alpha.
480 gsl_vector_memcpy(alpha_prime, Uty);
481 gsl_vector_mul(alpha_prime, weight_Hi);
482 gsl_vector_scale(alpha_prime, sigma_b2);
483
484 // Sample u.
485 gsl_vector_memcpy(Utu, alpha_prime);
486 gsl_vector_mul(Utu, K_eval);
487 if (a_mode == 11) {
488 gsl_vector_scale(Utu_rand, sqrt(1.0 / tau));
489 }
490 gsl_vector_add(Utu, Utu_rand);
491
492 // For quantitative traits, calculate pve and ppe.
493 if (a_mode == 11) {
494 gsl_blas_ddot(Utu, Utu, &d);
495 cHyp.pve = d / (double)ni_test;
496 cHyp.pve /= cHyp.pve + 1.0 / tau;
497 cHyp.pge = 0.0;
498 }
499
500 // Calculate likelihood.
501 logpost = -0.5 * logdet_H;
502 if (a_mode == 11) {
503 logpost -= 0.5 * (double)ni_test * log(Hi_yy);
504 } else {
505 logpost -= 0.5 * Hi_yy;
506 }
507
508 logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
509 ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp));
510
511 gsl_vector_free(Utu_rand);
512 gsl_vector_free(weight_Hi);
513
514 return logpost;
515 }
516
CalcPosterior(const gsl_matrix * UtXgamma,const gsl_vector * Uty,const gsl_vector * K_eval,gsl_vector * UtXb,gsl_vector * Utu,gsl_vector * alpha_prime,gsl_vector * beta,class HYPBSLMM & cHyp)517 double BSLMM::CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
518 const gsl_vector *K_eval, gsl_vector *UtXb,
519 gsl_vector *Utu, gsl_vector *alpha_prime,
520 gsl_vector *beta, class HYPBSLMM &cHyp) {
521 clock_t time_start;
522
523 double sigma_a2 = cHyp.h * cHyp.rho /
524 (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
525 double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h));
526
527 double logpost = 0.0;
528 double d, ds, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0;
529
530 gsl_matrix *UtXgamma_eval =
531 gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2);
532 gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
533 gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2);
534 gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2);
535 gsl_vector *Utu_rand = gsl_vector_alloc(UtXgamma->size1);
536 gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1);
537
538 gsl_matrix_memcpy(UtXgamma_eval, UtXgamma);
539
540 logdet_H = 0.0;
541 P_yy = 0.0;
542 for (size_t i = 0; i < ni_test; ++i) {
543 gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i);
544 d = gsl_vector_get(K_eval, i) * sigma_b2;
545 ds = d / (d + 1.0);
546 d = 1.0 / (d + 1.0);
547 gsl_vector_set(weight_Hi, i, d);
548
549 logdet_H -= log(d);
550 uy = gsl_vector_get(Uty, i);
551 P_yy += d * uy * uy;
552 gsl_vector_scale(&UtXgamma_row.vector, d);
553
554 gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds));
555 }
556
557 // Calculate Omega.
558 gsl_matrix_set_identity(Omega);
559
560 time_start = clock();
561 lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0,
562 Omega);
563 time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
564
565 // Calculate beta_hat.
566 gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
567
568 logdet_O = CholeskySolve(Omega, XtHiy, beta_hat);
569
570 gsl_vector_scale(beta_hat, sigma_a2);
571
572 gsl_blas_ddot(XtHiy, beta_hat, &d);
573 P_yy -= d;
574
575 // Sample tau.
576 double tau = 1.0;
577 if (a_mode == 11) {
578 tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy);
579 }
580
581 // Sample beta.
582 for (size_t i = 0; i < beta->size; i++) {
583 d = gsl_ran_gaussian(gsl_r, 1);
584 gsl_vector_set(beta, i, d);
585 }
586 gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
587
588 // This computes inv(L^T(Omega)) %*% beta.
589 gsl_vector_scale(beta, sqrt(sigma_a2 / tau));
590 gsl_vector_add(beta, beta_hat);
591 gsl_blas_dgemv(CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb);
592
593 // Sample alpha.
594 gsl_vector_memcpy(alpha_prime, Uty);
595 gsl_vector_sub(alpha_prime, UtXb);
596 gsl_vector_mul(alpha_prime, weight_Hi);
597 gsl_vector_scale(alpha_prime, sigma_b2);
598
599 // Sample u.
600 gsl_vector_memcpy(Utu, alpha_prime);
601 gsl_vector_mul(Utu, K_eval);
602
603 if (a_mode == 11) {
604 gsl_vector_scale(Utu_rand, sqrt(1.0 / tau));
605 }
606 gsl_vector_add(Utu, Utu_rand);
607
608 // For quantitative traits, calculate pve and pge.
609 if (a_mode == 11) {
610 gsl_blas_ddot(UtXb, UtXb, &d);
611 cHyp.pge = d / (double)ni_test;
612
613 gsl_blas_ddot(Utu, Utu, &d);
614 cHyp.pve = cHyp.pge + d / (double)ni_test;
615
616 if (cHyp.pve == 0) {
617 cHyp.pge = 0.0;
618 } else {
619 cHyp.pge /= cHyp.pve;
620 }
621 cHyp.pve /= cHyp.pve + 1.0 / tau;
622 }
623
624 gsl_matrix_free(UtXgamma_eval);
625 gsl_matrix_free(Omega);
626 gsl_vector_free(XtHiy);
627 gsl_vector_free(beta_hat);
628 gsl_vector_free(Utu_rand);
629 gsl_vector_free(weight_Hi);
630
631 logpost = -0.5 * logdet_H - 0.5 * logdet_O;
632 if (a_mode == 11) {
633 logpost -= 0.5 * (double)ni_test * log(P_yy);
634 } else {
635 logpost -= 0.5 * P_yy;
636 }
637 logpost +=
638 ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
639 ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
640
641 return logpost;
642 }
643
644 // Calculate pve and pge, and calculate z_hat for case-control data.
CalcCC_PVEnZ(const gsl_matrix * U,const gsl_vector * Utu,gsl_vector * z_hat,class HYPBSLMM & cHyp)645 void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu,
646 gsl_vector *z_hat, class HYPBSLMM &cHyp) {
647 double d;
648
649 gsl_blas_ddot(Utu, Utu, &d);
650 cHyp.pve = d / (double)ni_test;
651
652 gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, z_hat);
653
654 cHyp.pve /= cHyp.pve + 1.0;
655 cHyp.pge = 0.0;
656
657 return;
658 }
659
660 // Calculate pve and pge, and calculate z_hat for case-control data.
CalcCC_PVEnZ(const gsl_matrix * U,const gsl_vector * UtXb,const gsl_vector * Utu,gsl_vector * z_hat,class HYPBSLMM & cHyp)661 void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb,
662 const gsl_vector *Utu, gsl_vector *z_hat,
663 class HYPBSLMM &cHyp) {
664 double d;
665 gsl_vector *UtXbU = gsl_vector_alloc(Utu->size);
666
667 gsl_blas_ddot(UtXb, UtXb, &d);
668 cHyp.pge = d / (double)ni_test;
669
670 gsl_blas_ddot(Utu, Utu, &d);
671 cHyp.pve = cHyp.pge + d / (double)ni_test;
672
673 gsl_vector_memcpy(UtXbU, Utu);
674 gsl_vector_add(UtXbU, UtXb);
675 gsl_blas_dgemv(CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
676
677 if (cHyp.pve == 0) {
678 cHyp.pge = 0.0;
679 } else {
680 cHyp.pge /= cHyp.pve;
681 }
682
683 cHyp.pve /= cHyp.pve + 1.0;
684
685 gsl_vector_free(UtXbU);
686 return;
687 }
688
SampleZ(const gsl_vector * y,const gsl_vector * z_hat,gsl_vector * z)689 void BSLMM::SampleZ(const gsl_vector *y, const gsl_vector *z_hat,
690 gsl_vector *z) {
691 double d1, d2, z_rand = 0.0;
692 for (size_t i = 0; i < z->size; ++i) {
693 d1 = gsl_vector_get(y, i);
694 d2 = gsl_vector_get(z_hat, i);
695
696 // y is centered for case control studies.
697 if (d1 <= 0.0) {
698
699 // Control, right truncated.
700 do {
701 z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0);
702 } while (z_rand > 0.0);
703 } else {
704 do {
705 z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0);
706 } while (z_rand < 0.0);
707 }
708
709 gsl_vector_set(z, i, z_rand);
710 }
711
712 return;
713 }
714
ProposeHnRho(const class HYPBSLMM & cHyp_old,class HYPBSLMM & cHyp_new,const size_t & repeat)715 double BSLMM::ProposeHnRho(const class HYPBSLMM &cHyp_old,
716 class HYPBSLMM &cHyp_new, const size_t &repeat) {
717
718 double h = cHyp_old.h, rho = cHyp_old.rho;
719
720 double d_h = (h_max - h_min) * h_scale,
721 d_rho = (rho_max - rho_min) * rho_scale;
722
723 for (size_t i = 0; i < repeat; ++i) {
724 h = h + (gsl_rng_uniform(gsl_r) - 0.5) * d_h;
725 if (h < h_min) {
726 h = 2 * h_min - h;
727 }
728 if (h > h_max) {
729 h = 2 * h_max - h;
730 }
731
732 rho = rho + (gsl_rng_uniform(gsl_r) - 0.5) * d_rho;
733 if (rho < rho_min) {
734 rho = 2 * rho_min - rho;
735 }
736 if (rho > rho_max) {
737 rho = 2 * rho_max - rho;
738 }
739 }
740 cHyp_new.h = h;
741 cHyp_new.rho = rho;
742 return 0.0;
743 }
744
ProposePi(const class HYPBSLMM & cHyp_old,class HYPBSLMM & cHyp_new,const size_t & repeat)745 double BSLMM::ProposePi(const class HYPBSLMM &cHyp_old,
746 class HYPBSLMM &cHyp_new, const size_t &repeat) {
747 double logp_old = cHyp_old.logp, logp_new = cHyp_old.logp;
748 double log_ratio = 0.0;
749
750 double d_logp = min(0.1, (logp_max - logp_min) * logp_scale);
751
752 for (size_t i = 0; i < repeat; ++i) {
753 logp_new = logp_old + (gsl_rng_uniform(gsl_r) - 0.5) * d_logp;
754 if (logp_new < logp_min) {
755 logp_new = 2 * logp_min - logp_new;
756 }
757 if (logp_new > logp_max) {
758 logp_new = 2 * logp_max - logp_new;
759 }
760 log_ratio += logp_new - logp_old;
761 logp_old = logp_new;
762 }
763 cHyp_new.logp = logp_new;
764
765 return log_ratio;
766 }
767
comp_vec(size_t a,size_t b)768 bool comp_vec(size_t a, size_t b) { return (a < b); }
769
ProposeGamma(const vector<size_t> & rank_old,vector<size_t> & rank_new,const double * p_gamma,const class HYPBSLMM & cHyp_old,class HYPBSLMM & cHyp_new,const size_t & repeat)770 double BSLMM::ProposeGamma(const vector<size_t> &rank_old,
771 vector<size_t> &rank_new, const double *p_gamma,
772 const class HYPBSLMM &cHyp_old,
773 class HYPBSLMM &cHyp_new, const size_t &repeat) {
774 map<size_t, int> mapRank2in;
775 size_t r;
776 double unif, logp = 0.0;
777 int flag_gamma;
778 size_t r_add, r_remove, col_id;
779
780 rank_new.clear();
781 if (cHyp_old.n_gamma != rank_old.size()) {
782 cout << "size wrong" << endl;
783 }
784
785 if (cHyp_old.n_gamma != 0) {
786 for (size_t i = 0; i < rank_old.size(); ++i) {
787 r = rank_old[i];
788 rank_new.push_back(r);
789 mapRank2in[r] = 1;
790 }
791 }
792 cHyp_new.n_gamma = cHyp_old.n_gamma;
793
794 for (size_t i = 0; i < repeat; ++i) {
795 unif = gsl_rng_uniform(gsl_r);
796
797 if (unif < 0.40 && cHyp_new.n_gamma < s_max) {
798 flag_gamma = 1;
799 } else if (unif >= 0.40 && unif < 0.80 && cHyp_new.n_gamma > s_min) {
800 flag_gamma = 2;
801 } else if (unif >= 0.80 && cHyp_new.n_gamma > 0 &&
802 cHyp_new.n_gamma < ns_test) {
803 flag_gamma = 3;
804 } else {
805 flag_gamma = 4;
806 }
807
808 if (flag_gamma == 1) {
809
810 // Add a SNP.
811 do {
812 r_add = gsl_ran_discrete(gsl_r, gsl_t);
813 } while (mapRank2in.count(r_add) != 0);
814
815 double prob_total = 1.0;
816 for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
817 r = rank_new[i];
818 prob_total -= p_gamma[r];
819 }
820
821 mapRank2in[r_add] = 1;
822 rank_new.push_back(r_add);
823 cHyp_new.n_gamma++;
824 logp += -log(p_gamma[r_add] / prob_total) - log((double)cHyp_new.n_gamma);
825 } else if (flag_gamma == 2) {
826
827 // Delete a SNP.
828 col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
829 r_remove = rank_new[col_id];
830
831 double prob_total = 1.0;
832 for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
833 r = rank_new[i];
834 prob_total -= p_gamma[r];
835 }
836 prob_total += p_gamma[r_remove];
837
838 mapRank2in.erase(r_remove);
839 rank_new.erase(rank_new.begin() + col_id);
840 logp +=
841 log(p_gamma[r_remove] / prob_total) + log((double)cHyp_new.n_gamma);
842 cHyp_new.n_gamma--;
843 } else if (flag_gamma == 3) {
844
845 // Switch a SNP.
846 col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
847 r_remove = rank_new[col_id];
848
849 // Be careful with the proposal.
850 do {
851 r_add = gsl_ran_discrete(gsl_r, gsl_t);
852 } while (mapRank2in.count(r_add) != 0);
853
854 double prob_total = 1.0;
855 for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
856 r = rank_new[i];
857 prob_total -= p_gamma[r];
858 }
859
860 logp += log(p_gamma[r_remove] /
861 (prob_total + p_gamma[r_remove] - p_gamma[r_add]));
862 logp -= log(p_gamma[r_add] / prob_total);
863
864 mapRank2in.erase(r_remove);
865 mapRank2in[r_add] = 1;
866 rank_new.erase(rank_new.begin() + col_id);
867 rank_new.push_back(r_add);
868 } else {
869 logp += 0;
870 } // Do not change.
871 }
872
873 stable_sort(rank_new.begin(), rank_new.end(), comp_vec);
874
875 mapRank2in.clear();
876 return logp;
877 }
878
comp_lr(pair<size_t,double> a,pair<size_t,double> b)879 bool comp_lr(pair<size_t, double> a, pair<size_t, double> b) {
880 return (a.second > b.second);
881 }
882
883 // If a_mode==13 then Uty==y.
MCMC(const gsl_matrix * U,const gsl_matrix * UtX,const gsl_vector * Uty,const gsl_vector * K_eval,const gsl_vector * y)884 void BSLMM::MCMC(const gsl_matrix *U, const gsl_matrix *UtX,
885 const gsl_vector *Uty, const gsl_vector *K_eval,
886 const gsl_vector *y) {
887 clock_t time_start;
888
889 class HYPBSLMM cHyp_old, cHyp_new;
890
891 gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6);
892 gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max);
893
894 gsl_vector *alpha_prime = gsl_vector_alloc(ni_test);
895 gsl_vector *alpha_new = gsl_vector_alloc(ni_test);
896 gsl_vector *alpha_old = gsl_vector_alloc(ni_test);
897 gsl_vector *Utu = gsl_vector_alloc(ni_test);
898 gsl_vector *Utu_new = gsl_vector_alloc(ni_test);
899 gsl_vector *Utu_old = gsl_vector_alloc(ni_test);
900
901 gsl_vector *UtXb_new = gsl_vector_alloc(ni_test);
902 gsl_vector *UtXb_old = gsl_vector_alloc(ni_test);
903
904 gsl_vector *z_hat = gsl_vector_alloc(ni_test);
905 gsl_vector *z = gsl_vector_alloc(ni_test);
906 gsl_vector *Utz = gsl_vector_alloc(ni_test);
907
908 gsl_vector_memcpy(Utz, Uty);
909
910 double logPost_new, logPost_old;
911 double logMHratio;
912 double mean_z = 0.0;
913
914 gsl_matrix_set_zero(Result_gamma);
915 gsl_vector_set_zero(Utu);
916 gsl_vector_set_zero(alpha_prime);
917 if (a_mode == 13) {
918 pheno_mean = 0.0;
919 }
920
921 vector<pair<double, double>> beta_g;
922 for (size_t i = 0; i < ns_test; i++) {
923 beta_g.push_back(make_pair(0.0, 0.0));
924 }
925
926 vector<size_t> rank_new, rank_old;
927 vector<double> beta_new, beta_old;
928
929 vector<pair<size_t, double>> pos_loglr;
930
931 time_start = clock();
932 MatrixCalcLR(U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr);
933 time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
934
935 stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr);
936 for (size_t i = 0; i < ns_test; ++i) {
937 mapRank2pos[i] = pos_loglr[i].first;
938 }
939
940 // Calculate proposal distribution for gamma (unnormalized),
941 // and set up gsl_r and gsl_t.
942
943 double *p_gamma = new double[ns_test];
944 CalcPgamma(p_gamma);
945
946 gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma);
947
948 // Initial parameters.
949 InitialMCMC(UtX, Utz, rank_old, cHyp_old, pos_loglr);
950
951 cHyp_initial = cHyp_old;
952
953 if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
954 logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
955
956 beta_old.clear();
957 for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
958 beta_old.push_back(0);
959 }
960 } else {
961 gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma);
962 gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma);
963 SetXgamma(UtXgamma, UtX, rank_old);
964 logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old,
965 alpha_old, beta, cHyp_old);
966
967 beta_old.clear();
968 for (size_t i = 0; i < beta->size; ++i) {
969 beta_old.push_back(gsl_vector_get(beta, i));
970 }
971 gsl_matrix_free(UtXgamma);
972 gsl_vector_free(beta);
973 }
974
975 // Calculate centered z_hat, and pve.
976 if (a_mode == 13) {
977 time_start = clock();
978 if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
979 CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old);
980 } else {
981 CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old);
982 }
983 time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
984 }
985
986 // Start MCMC.
987 int accept;
988 size_t total_step = w_step + s_step;
989 size_t w = 0, w_col, pos;
990 size_t repeat = 0;
991
992 for (size_t t = 0; t < total_step; ++t) {
993 if (t % d_pace == 0 || t == total_step - 1) {
994 ProgressBar("Running MCMC ", t, total_step - 1,
995 (double)n_accept / (double)(t * n_mh + 1));
996 }
997
998 if (a_mode == 13) {
999 SampleZ(y, z_hat, z);
1000 mean_z = CenterVector(z);
1001
1002 time_start = clock();
1003 gsl_blas_dgemv(CblasTrans, 1.0, U, z, 0.0, Utz);
1004 time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1005
1006 // First proposal.
1007 if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
1008 logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
1009 beta_old.clear();
1010 for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
1011 beta_old.push_back(0);
1012 }
1013 } else {
1014 gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma);
1015 gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma);
1016 SetXgamma(UtXgamma, UtX, rank_old);
1017 logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old,
1018 alpha_old, beta, cHyp_old);
1019
1020 beta_old.clear();
1021 for (size_t i = 0; i < beta->size; ++i) {
1022 beta_old.push_back(gsl_vector_get(beta, i));
1023 }
1024 gsl_matrix_free(UtXgamma);
1025 gsl_vector_free(beta);
1026 }
1027 }
1028
1029 // M-H steps.
1030 for (size_t i = 0; i < n_mh; ++i) {
1031 if (gsl_rng_uniform(gsl_r) < 0.33) {
1032 repeat = 1 + gsl_rng_uniform_int(gsl_r, 20);
1033 } else {
1034 repeat = 1;
1035 }
1036
1037 logMHratio = 0.0;
1038 logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat);
1039 logMHratio +=
1040 ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat);
1041 logMHratio += ProposePi(cHyp_old, cHyp_new, repeat);
1042
1043 if (cHyp_new.n_gamma == 0 || cHyp_new.rho == 0) {
1044 logPost_new = CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new);
1045 beta_new.clear();
1046 for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
1047 beta_new.push_back(0);
1048 }
1049 } else {
1050 gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_new.n_gamma);
1051 gsl_vector *beta = gsl_vector_alloc(cHyp_new.n_gamma);
1052 SetXgamma(UtXgamma, UtX, rank_new);
1053 logPost_new = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_new, Utu_new,
1054 alpha_new, beta, cHyp_new);
1055 beta_new.clear();
1056 for (size_t i = 0; i < beta->size; ++i) {
1057 beta_new.push_back(gsl_vector_get(beta, i));
1058 }
1059 gsl_matrix_free(UtXgamma);
1060 gsl_vector_free(beta);
1061 }
1062
1063 logMHratio += logPost_new - logPost_old;
1064
1065 if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) {
1066 accept = 1;
1067 n_accept++;
1068 } else {
1069 accept = 0;
1070 }
1071
1072 if (accept == 1) {
1073 logPost_old = logPost_new;
1074 rank_old.clear();
1075 beta_old.clear();
1076 if (rank_new.size() != 0) {
1077 for (size_t i = 0; i < rank_new.size(); ++i) {
1078 rank_old.push_back(rank_new[i]);
1079 beta_old.push_back(beta_new[i]);
1080 }
1081 }
1082 cHyp_old = cHyp_new;
1083 gsl_vector_memcpy(alpha_old, alpha_new);
1084 gsl_vector_memcpy(UtXb_old, UtXb_new);
1085 gsl_vector_memcpy(Utu_old, Utu_new);
1086 } else {
1087 cHyp_new = cHyp_old;
1088 }
1089 }
1090
1091 // Calculate z_hat, and pve.
1092 if (a_mode == 13) {
1093 time_start = clock();
1094 if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
1095 CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old);
1096 } else {
1097 CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old);
1098 }
1099
1100 // Sample mu and update z_hat.
1101 gsl_vector_sub(z, z_hat);
1102 mean_z += CenterVector(z);
1103 mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test));
1104 gsl_vector_add_constant(z_hat, mean_z);
1105
1106 time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1107 }
1108
1109 // Save data.
1110 if (t < w_step) {
1111 continue;
1112 } else {
1113 if (t % r_pace == 0) {
1114 w_col = w % w_pace;
1115 if (w_col == 0) {
1116 if (w == 0) {
1117 WriteResult(0, Result_hyp, Result_gamma, w_col);
1118 } else {
1119 WriteResult(1, Result_hyp, Result_gamma, w_col);
1120 gsl_matrix_set_zero(Result_hyp);
1121 gsl_matrix_set_zero(Result_gamma);
1122 }
1123 }
1124
1125 gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h);
1126 gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve);
1127 gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho);
1128 gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge);
1129 gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp);
1130 gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma);
1131
1132 for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
1133 pos = mapRank2pos[rank_old[i]] + 1;
1134
1135 gsl_matrix_set(Result_gamma, w_col, i, pos);
1136
1137 beta_g[pos - 1].first += beta_old[i];
1138 beta_g[pos - 1].second += 1.0;
1139 }
1140
1141 gsl_vector_add(alpha_prime, alpha_old);
1142 gsl_vector_add(Utu, Utu_old);
1143
1144 if (a_mode == 13) {
1145 pheno_mean += mean_z;
1146 }
1147
1148 w++;
1149 }
1150 }
1151 }
1152 cout << endl;
1153
1154 w_col = w % w_pace;
1155 WriteResult(1, Result_hyp, Result_gamma, w_col);
1156
1157 gsl_matrix_free(Result_hyp);
1158 gsl_matrix_free(Result_gamma);
1159
1160 gsl_vector_free(z_hat);
1161 gsl_vector_free(z);
1162 gsl_vector_free(Utz);
1163 gsl_vector_free(UtXb_new);
1164 gsl_vector_free(UtXb_old);
1165 gsl_vector_free(alpha_new);
1166 gsl_vector_free(alpha_old);
1167 gsl_vector_free(Utu_new);
1168 gsl_vector_free(Utu_old);
1169
1170 gsl_vector_scale(alpha_prime, 1.0 / (double)w);
1171 gsl_vector_scale(Utu, 1.0 / (double)w);
1172 if (a_mode == 13) {
1173 pheno_mean /= (double)w;
1174 }
1175
1176 gsl_vector *alpha = gsl_vector_alloc(ns_test);
1177 gsl_blas_dgemv(CblasTrans, 1.0 / (double)ns_test, UtX, alpha_prime, 0.0,
1178 alpha);
1179 WriteParam(beta_g, alpha, w);
1180 gsl_vector_free(alpha);
1181
1182 gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime);
1183 WriteBV(alpha_prime);
1184
1185 gsl_vector_free(alpha_prime);
1186 gsl_vector_free(Utu);
1187
1188 delete[] p_gamma;
1189 beta_g.clear();
1190
1191 return;
1192 }
1193
RidgeR(const gsl_matrix * U,const gsl_matrix * UtX,const gsl_vector * Uty,const gsl_vector * eval,const double lambda)1194 void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
1195 const gsl_vector *Uty, const gsl_vector *eval,
1196 const double lambda) {
1197 gsl_vector *beta = gsl_vector_alloc(UtX->size2);
1198 gsl_vector *H_eval = gsl_vector_alloc(Uty->size);
1199 gsl_vector *bv = gsl_vector_alloc(Uty->size);
1200
1201 gsl_vector_memcpy(H_eval, eval);
1202 gsl_vector_scale(H_eval, lambda);
1203 gsl_vector_add_constant(H_eval, 1.0);
1204
1205 gsl_vector_memcpy(bv, Uty);
1206 gsl_vector_div(bv, H_eval);
1207
1208 gsl_blas_dgemv(CblasTrans, lambda / (double)UtX->size2, UtX, bv, 0.0, beta);
1209 gsl_vector_add_constant(H_eval, -1.0);
1210 gsl_vector_mul(H_eval, bv);
1211 gsl_blas_dgemv(CblasNoTrans, 1.0, U, H_eval, 0.0, bv);
1212
1213 WriteParam(beta);
1214 WriteBV(bv);
1215
1216 gsl_vector_free(H_eval);
1217 gsl_vector_free(beta);
1218 gsl_vector_free(bv);
1219
1220 return;
1221 }
1222
1223 // Below fits MCMC for rho=1.
CalcXtX(const gsl_matrix * X,const gsl_vector * y,const size_t s_size,gsl_matrix * XtX,gsl_vector * Xty)1224 void BSLMM::CalcXtX(const gsl_matrix *X, const gsl_vector *y,
1225 const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) {
1226 time_t time_start = clock();
1227 gsl_matrix_const_view X_sub =
1228 gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size);
1229 gsl_matrix_view XtX_sub = gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
1230 gsl_vector_view Xty_sub = gsl_vector_subvector(Xty, 0, s_size);
1231
1232 lapack_dgemm((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0,
1233 &XtX_sub.matrix);
1234 gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector);
1235
1236 time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1237
1238 return;
1239 }
1240
SetXgamma(const gsl_matrix * X,const gsl_matrix * X_old,const gsl_matrix * XtX_old,const gsl_vector * Xty_old,const gsl_vector * y,const vector<size_t> & rank_old,const vector<size_t> & rank_new,gsl_matrix * X_new,gsl_matrix * XtX_new,gsl_vector * Xty_new)1241 void BSLMM::SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old,
1242 const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
1243 const gsl_vector *y, const vector<size_t> &rank_old,
1244 const vector<size_t> &rank_new, gsl_matrix *X_new,
1245 gsl_matrix *XtX_new, gsl_vector *Xty_new) {
1246 double d;
1247
1248 // rank_old and rank_new are sorted already inside PorposeGamma
1249 // calculate vectors rank_remove and rank_add.
1250 // make sure that v_size is larger than repeat.
1251 size_t v_size = 20;
1252 vector<size_t> rank_remove(v_size), rank_add(v_size),
1253 rank_union(s_max + v_size);
1254 vector<size_t>::iterator it;
1255
1256 it = set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(),
1257 rank_new.end(), rank_remove.begin());
1258 rank_remove.resize(it - rank_remove.begin());
1259
1260 it = set_difference(rank_new.begin(), rank_new.end(), rank_old.begin(),
1261 rank_old.end(), rank_add.begin());
1262 rank_add.resize(it - rank_add.begin());
1263
1264 it = set_union(rank_new.begin(), rank_new.end(), rank_old.begin(),
1265 rank_old.end(), rank_union.begin());
1266 rank_union.resize(it - rank_union.begin());
1267
1268 // Map rank_remove and rank_add.
1269 map<size_t, int> mapRank2in_remove, mapRank2in_add;
1270 for (size_t i = 0; i < rank_remove.size(); i++) {
1271 mapRank2in_remove[rank_remove[i]] = 1;
1272 }
1273 for (size_t i = 0; i < rank_add.size(); i++) {
1274 mapRank2in_add[rank_add[i]] = 1;
1275 }
1276
1277 // Obtain the subset of matrix/vector.
1278 gsl_matrix_const_view Xold_sub =
1279 gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size());
1280 gsl_matrix_const_view XtXold_sub = gsl_matrix_const_submatrix(
1281 XtX_old, 0, 0, rank_old.size(), rank_old.size());
1282 gsl_vector_const_view Xtyold_sub =
1283 gsl_vector_const_subvector(Xty_old, 0, rank_old.size());
1284
1285 gsl_matrix_view Xnew_sub =
1286 gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size());
1287 gsl_matrix_view XtXnew_sub =
1288 gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size());
1289 gsl_vector_view Xtynew_sub =
1290 gsl_vector_subvector(Xty_new, 0, rank_new.size());
1291
1292 // Get X_new and calculate XtX_new.
1293 if (rank_remove.size() == 0 && rank_add.size() == 0) {
1294 gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix);
1295 gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix);
1296 gsl_vector_memcpy(&Xtynew_sub.vector, &Xtyold_sub.vector);
1297 } else {
1298 size_t i_old, j_old, i_new, j_new, i_add, j_add, i_flag, j_flag;
1299 if (rank_add.size() == 0) {
1300 i_old = 0;
1301 i_new = 0;
1302 for (size_t i = 0; i < rank_union.size(); i++) {
1303 if (mapRank2in_remove.count(rank_old[i_old]) != 0) {
1304 i_old++;
1305 continue;
1306 }
1307
1308 gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new);
1309 gsl_vector_const_view Xcopy_col = gsl_matrix_const_column(X_old, i_old);
1310 gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
1311
1312 d = gsl_vector_get(Xty_old, i_old);
1313 gsl_vector_set(Xty_new, i_new, d);
1314
1315 j_old = i_old;
1316 j_new = i_new;
1317 for (size_t j = i; j < rank_union.size(); j++) {
1318 if (mapRank2in_remove.count(rank_old[j_old]) != 0) {
1319 j_old++;
1320 continue;
1321 }
1322
1323 d = gsl_matrix_get(XtX_old, i_old, j_old);
1324
1325 gsl_matrix_set(XtX_new, i_new, j_new, d);
1326 if (i_new != j_new) {
1327 gsl_matrix_set(XtX_new, j_new, i_new, d);
1328 }
1329
1330 j_old++;
1331 j_new++;
1332 }
1333 i_old++;
1334 i_new++;
1335 }
1336 } else {
1337 gsl_matrix *X_add = gsl_matrix_alloc(X_old->size1, rank_add.size());
1338 gsl_matrix *XtX_aa = gsl_matrix_alloc(X_add->size2, X_add->size2);
1339 gsl_matrix *XtX_ao = gsl_matrix_alloc(X_add->size2, X_old->size2);
1340 gsl_vector *Xty_add = gsl_vector_alloc(X_add->size2);
1341
1342 // Get X_add.
1343 SetXgamma(X_add, X, rank_add);
1344
1345 // Get t(X_add)X_add and t(X_add)X_temp.
1346 clock_t time_start = clock();
1347
1348 // Somehow the lapack_dgemm does not work here.
1349 gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa);
1350 gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_old, 0.0, XtX_ao);
1351 gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add);
1352
1353 time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1354
1355 // Save to X_new, XtX_new and Xty_new.
1356 i_old = 0;
1357 i_new = 0;
1358 i_add = 0;
1359 for (size_t i = 0; i < rank_union.size(); i++) {
1360 if (mapRank2in_remove.count(rank_old[i_old]) != 0) {
1361 i_old++;
1362 continue;
1363 }
1364 if (mapRank2in_add.count(rank_new[i_new]) != 0) {
1365 i_flag = 1;
1366 } else {
1367 i_flag = 0;
1368 }
1369
1370 gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new);
1371 if (i_flag == 1) {
1372 gsl_vector_view Xcopy_col = gsl_matrix_column(X_add, i_add);
1373 gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
1374 } else {
1375 gsl_vector_const_view Xcopy_col =
1376 gsl_matrix_const_column(X_old, i_old);
1377 gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
1378 }
1379
1380 if (i_flag == 1) {
1381 d = gsl_vector_get(Xty_add, i_add);
1382 } else {
1383 d = gsl_vector_get(Xty_old, i_old);
1384 }
1385 gsl_vector_set(Xty_new, i_new, d);
1386
1387 j_old = i_old;
1388 j_new = i_new;
1389 j_add = i_add;
1390 for (size_t j = i; j < rank_union.size(); j++) {
1391 if (mapRank2in_remove.count(rank_old[j_old]) != 0) {
1392 j_old++;
1393 continue;
1394 }
1395 if (mapRank2in_add.count(rank_new[j_new]) != 0) {
1396 j_flag = 1;
1397 } else {
1398 j_flag = 0;
1399 }
1400
1401 if (i_flag == 1 && j_flag == 1) {
1402 d = gsl_matrix_get(XtX_aa, i_add, j_add);
1403 } else if (i_flag == 1) {
1404 d = gsl_matrix_get(XtX_ao, i_add, j_old);
1405 } else if (j_flag == 1) {
1406 d = gsl_matrix_get(XtX_ao, j_add, i_old);
1407 } else {
1408 d = gsl_matrix_get(XtX_old, i_old, j_old);
1409 }
1410
1411 gsl_matrix_set(XtX_new, i_new, j_new, d);
1412 if (i_new != j_new) {
1413 gsl_matrix_set(XtX_new, j_new, i_new, d);
1414 }
1415
1416 j_new++;
1417 if (j_flag == 1) {
1418 j_add++;
1419 } else {
1420 j_old++;
1421 }
1422 }
1423 i_new++;
1424 if (i_flag == 1) {
1425 i_add++;
1426 } else {
1427 i_old++;
1428 }
1429 }
1430
1431 gsl_matrix_free(X_add);
1432 gsl_matrix_free(XtX_aa);
1433 gsl_matrix_free(XtX_ao);
1434 gsl_vector_free(Xty_add);
1435 }
1436 }
1437
1438 rank_remove.clear();
1439 rank_add.clear();
1440 rank_union.clear();
1441 mapRank2in_remove.clear();
1442 mapRank2in_add.clear();
1443
1444 return;
1445 }
1446
CalcPosterior(const double yty,class HYPBSLMM & cHyp)1447 double BSLMM::CalcPosterior(const double yty, class HYPBSLMM &cHyp) {
1448 double logpost = 0.0;
1449
1450 // For quantitative traits, calculate pve and pge.
1451 // Pve and pge for case/control data are calculted in CalcCC_PVEnZ.
1452 if (a_mode == 11) {
1453 cHyp.pve = 0.0;
1454 cHyp.pge = 1.0;
1455 }
1456
1457 // Calculate likelihood.
1458 if (a_mode == 11) {
1459 logpost -= 0.5 * (double)ni_test * log(yty);
1460 } else {
1461 logpost -= 0.5 * yty;
1462 }
1463
1464 logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
1465 ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp));
1466
1467 return logpost;
1468 }
1469
CalcPosterior(const gsl_matrix * Xgamma,const gsl_matrix * XtX,const gsl_vector * Xty,const double yty,const size_t s_size,gsl_vector * Xb,gsl_vector * beta,class HYPBSLMM & cHyp)1470 double BSLMM::CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX,
1471 const gsl_vector *Xty, const double yty,
1472 const size_t s_size, gsl_vector *Xb,
1473 gsl_vector *beta, class HYPBSLMM &cHyp) {
1474 double sigma_a2 = cHyp.h / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
1475 double logpost = 0.0;
1476 double d, P_yy = yty, logdet_O = 0.0;
1477
1478 gsl_matrix_const_view Xgamma_sub =
1479 gsl_matrix_const_submatrix(Xgamma, 0, 0, Xgamma->size1, s_size);
1480 gsl_matrix_const_view XtX_sub =
1481 gsl_matrix_const_submatrix(XtX, 0, 0, s_size, s_size);
1482 gsl_vector_const_view Xty_sub = gsl_vector_const_subvector(Xty, 0, s_size);
1483
1484 gsl_matrix *Omega = gsl_matrix_alloc(s_size, s_size);
1485 gsl_matrix *M_temp = gsl_matrix_alloc(s_size, s_size);
1486 gsl_vector *beta_hat = gsl_vector_alloc(s_size);
1487 gsl_vector *Xty_temp = gsl_vector_alloc(s_size);
1488
1489 gsl_vector_memcpy(Xty_temp, &Xty_sub.vector);
1490
1491 // Calculate Omega.
1492 gsl_matrix_memcpy(Omega, &XtX_sub.matrix);
1493 gsl_matrix_scale(Omega, sigma_a2);
1494 gsl_matrix_set_identity(M_temp);
1495 gsl_matrix_add(Omega, M_temp);
1496
1497 // Calculate beta_hat.
1498 logdet_O = CholeskySolve(Omega, Xty_temp, beta_hat);
1499 gsl_vector_scale(beta_hat, sigma_a2);
1500
1501 gsl_blas_ddot(Xty_temp, beta_hat, &d);
1502 P_yy -= d;
1503
1504 // Sample tau.
1505 double tau = 1.0;
1506 if (a_mode == 11) {
1507 tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy);
1508 }
1509
1510 // Sample beta.
1511 for (size_t i = 0; i < s_size; i++) {
1512 d = gsl_ran_gaussian(gsl_r, 1);
1513 gsl_vector_set(beta, i, d);
1514 }
1515 gsl_vector_view beta_sub = gsl_vector_subvector(beta, 0, s_size);
1516 gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega,
1517 &beta_sub.vector);
1518
1519 // This computes inv(L^T(Omega)) %*% beta.
1520 gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2 / tau));
1521 gsl_vector_add(&beta_sub.vector, beta_hat);
1522 gsl_blas_dgemv(CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0,
1523 Xb);
1524
1525 // For quantitative traits, calculate pve and pge.
1526 if (a_mode == 11) {
1527 gsl_blas_ddot(Xb, Xb, &d);
1528 cHyp.pve = d / (double)ni_test;
1529 cHyp.pve /= cHyp.pve + 1.0 / tau;
1530 cHyp.pge = 1.0;
1531 }
1532
1533 logpost = -0.5 * logdet_O;
1534 if (a_mode == 11) {
1535 logpost -= 0.5 * (double)ni_test * log(P_yy);
1536 } else {
1537 logpost -= 0.5 * P_yy;
1538 }
1539
1540 logpost +=
1541 ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
1542 ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
1543
1544 gsl_matrix_free(Omega);
1545 gsl_matrix_free(M_temp);
1546 gsl_vector_free(beta_hat);
1547 gsl_vector_free(Xty_temp);
1548
1549 return logpost;
1550 }
1551
1552 // Calculate pve and pge, and calculate z_hat for case-control data.
CalcCC_PVEnZ(gsl_vector * z_hat,class HYPBSLMM & cHyp)1553 void BSLMM::CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp) {
1554 gsl_vector_set_zero(z_hat);
1555 cHyp.pve = 0.0;
1556 cHyp.pge = 1.0;
1557 return;
1558 }
1559
1560 // Calculate pve and pge, and calculate z_hat for case-control data.
CalcCC_PVEnZ(const gsl_vector * Xb,gsl_vector * z_hat,class HYPBSLMM & cHyp)1561 void BSLMM::CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat,
1562 class HYPBSLMM &cHyp) {
1563 double d;
1564
1565 gsl_blas_ddot(Xb, Xb, &d);
1566 cHyp.pve = d / (double)ni_test;
1567 cHyp.pve /= cHyp.pve + 1.0;
1568 cHyp.pge = 1.0;
1569
1570 gsl_vector_memcpy(z_hat, Xb);
1571
1572 return;
1573 }
1574
1575 // If a_mode==13, then run probit model.
MCMC(const gsl_matrix * X,const gsl_vector * y)1576 void BSLMM::MCMC(const gsl_matrix *X, const gsl_vector *y) {
1577 clock_t time_start;
1578 double time_set = 0, time_post = 0;
1579
1580 class HYPBSLMM cHyp_old, cHyp_new;
1581
1582 gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6);
1583 gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max);
1584
1585 gsl_vector *Xb_new = gsl_vector_alloc(ni_test);
1586 gsl_vector *Xb_old = gsl_vector_alloc(ni_test);
1587 gsl_vector *z_hat = gsl_vector_alloc(ni_test);
1588 gsl_vector *z = gsl_vector_alloc(ni_test);
1589
1590 gsl_matrix *Xgamma_old = gsl_matrix_alloc(ni_test, s_max);
1591 gsl_matrix *XtX_old = gsl_matrix_alloc(s_max, s_max);
1592 gsl_vector *Xtz_old = gsl_vector_alloc(s_max);
1593 gsl_vector *beta_old = gsl_vector_alloc(s_max);
1594
1595 gsl_matrix *Xgamma_new = gsl_matrix_alloc(ni_test, s_max);
1596 gsl_matrix *XtX_new = gsl_matrix_alloc(s_max, s_max);
1597 gsl_vector *Xtz_new = gsl_vector_alloc(s_max);
1598 gsl_vector *beta_new = gsl_vector_alloc(s_max);
1599
1600 double ztz = 0.0;
1601 gsl_vector_memcpy(z, y);
1602
1603 // For quantitative traits, y is centered already in
1604 // gemma.cpp, but just in case.
1605 double mean_z = CenterVector(z);
1606 gsl_blas_ddot(z, z, &ztz);
1607
1608 double logPost_new, logPost_old;
1609 double logMHratio;
1610
1611 gsl_matrix_set_zero(Result_gamma);
1612 if (a_mode == 13) {
1613 pheno_mean = 0.0;
1614 }
1615
1616 vector<pair<double, double>> beta_g;
1617 for (size_t i = 0; i < ns_test; i++) {
1618 beta_g.push_back(make_pair(0.0, 0.0));
1619 }
1620
1621 vector<size_t> rank_new, rank_old;
1622 vector<pair<size_t, double>> pos_loglr;
1623
1624 time_start = clock();
1625 MatrixCalcLmLR(X, z, pos_loglr);
1626 time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1627
1628 stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr);
1629 for (size_t i = 0; i < ns_test; ++i) {
1630 mapRank2pos[i] = pos_loglr[i].first;
1631 }
1632
1633 // Calculate proposal distribution for gamma (unnormalized),
1634 double *p_gamma = new double[ns_test];
1635 CalcPgamma(p_gamma);
1636
1637 gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma);
1638
1639 // Initial parameters.
1640 InitialMCMC(X, z, rank_old, cHyp_old, pos_loglr);
1641
1642 cHyp_initial = cHyp_old;
1643
1644 if (cHyp_old.n_gamma == 0) {
1645 logPost_old = CalcPosterior(ztz, cHyp_old);
1646 } else {
1647 SetXgamma(Xgamma_old, X, rank_old);
1648 CalcXtX(Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old);
1649 logPost_old = CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz,
1650 rank_old.size(), Xb_old, beta_old, cHyp_old);
1651 }
1652
1653 // Calculate centered z_hat, and pve.
1654 if (a_mode == 13) {
1655 if (cHyp_old.n_gamma == 0) {
1656 CalcCC_PVEnZ(z_hat, cHyp_old);
1657 } else {
1658 CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old);
1659 }
1660 }
1661
1662 // Start MCMC.
1663 int accept;
1664 size_t total_step = w_step + s_step;
1665 size_t w = 0, w_col, pos;
1666 size_t repeat = 0;
1667
1668 for (size_t t = 0; t < total_step; ++t) {
1669 if (t % d_pace == 0 || t == total_step - 1) {
1670 ProgressBar("Running MCMC ", t, total_step - 1,
1671 (double)n_accept / (double)(t * n_mh + 1));
1672 }
1673
1674 if (a_mode == 13) {
1675 SampleZ(y, z_hat, z);
1676 mean_z = CenterVector(z);
1677 gsl_blas_ddot(z, z, &ztz);
1678
1679 // First proposal.
1680 if (cHyp_old.n_gamma == 0) {
1681 logPost_old = CalcPosterior(ztz, cHyp_old);
1682 } else {
1683 gsl_matrix_view Xold_sub =
1684 gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size());
1685 gsl_vector_view Xtz_sub =
1686 gsl_vector_subvector(Xtz_old, 0, rank_old.size());
1687 gsl_blas_dgemv(CblasTrans, 1.0, &Xold_sub.matrix, z, 0.0,
1688 &Xtz_sub.vector);
1689 logPost_old =
1690 CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(),
1691 Xb_old, beta_old, cHyp_old);
1692 }
1693 }
1694
1695 // M-H steps.
1696 for (size_t i = 0; i < n_mh; ++i) {
1697 if (gsl_rng_uniform(gsl_r) < 0.33) {
1698 repeat = 1 + gsl_rng_uniform_int(gsl_r, 20);
1699 } else {
1700 repeat = 1;
1701 }
1702
1703 logMHratio = 0.0;
1704 logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat);
1705 logMHratio +=
1706 ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat);
1707 logMHratio += ProposePi(cHyp_old, cHyp_new, repeat);
1708
1709 if (cHyp_new.n_gamma == 0) {
1710 logPost_new = CalcPosterior(ztz, cHyp_new);
1711 } else {
1712
1713 // This makes sure that rank_old.size() ==
1714 // rank_remove.size() does not happen.
1715 if (cHyp_new.n_gamma <= 20 || cHyp_old.n_gamma <= 20) {
1716 time_start = clock();
1717 SetXgamma(Xgamma_new, X, rank_new);
1718 CalcXtX(Xgamma_new, z, rank_new.size(), XtX_new, Xtz_new);
1719 time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1720 } else {
1721 time_start = clock();
1722 SetXgamma(X, Xgamma_old, XtX_old, Xtz_old, z, rank_old, rank_new,
1723 Xgamma_new, XtX_new, Xtz_new);
1724 time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1725 }
1726 time_start = clock();
1727 logPost_new =
1728 CalcPosterior(Xgamma_new, XtX_new, Xtz_new, ztz, rank_new.size(),
1729 Xb_new, beta_new, cHyp_new);
1730 time_post += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1731 }
1732 logMHratio += logPost_new - logPost_old;
1733
1734 if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) {
1735 accept = 1;
1736 n_accept++;
1737 } else {
1738 accept = 0;
1739 }
1740
1741 if (accept == 1) {
1742 logPost_old = logPost_new;
1743 cHyp_old = cHyp_new;
1744 gsl_vector_memcpy(Xb_old, Xb_new);
1745
1746 rank_old.clear();
1747 if (rank_new.size() != 0) {
1748 for (size_t i = 0; i < rank_new.size(); ++i) {
1749 rank_old.push_back(rank_new[i]);
1750 }
1751
1752 gsl_matrix_view Xold_sub =
1753 gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size());
1754 gsl_matrix_view XtXold_sub = gsl_matrix_submatrix(
1755 XtX_old, 0, 0, rank_new.size(), rank_new.size());
1756 gsl_vector_view Xtzold_sub =
1757 gsl_vector_subvector(Xtz_old, 0, rank_new.size());
1758 gsl_vector_view betaold_sub =
1759 gsl_vector_subvector(beta_old, 0, rank_new.size());
1760
1761 gsl_matrix_view Xnew_sub =
1762 gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size());
1763 gsl_matrix_view XtXnew_sub = gsl_matrix_submatrix(
1764 XtX_new, 0, 0, rank_new.size(), rank_new.size());
1765 gsl_vector_view Xtznew_sub =
1766 gsl_vector_subvector(Xtz_new, 0, rank_new.size());
1767 gsl_vector_view betanew_sub =
1768 gsl_vector_subvector(beta_new, 0, rank_new.size());
1769
1770 gsl_matrix_memcpy(&Xold_sub.matrix, &Xnew_sub.matrix);
1771 gsl_matrix_memcpy(&XtXold_sub.matrix, &XtXnew_sub.matrix);
1772 gsl_vector_memcpy(&Xtzold_sub.vector, &Xtznew_sub.vector);
1773 gsl_vector_memcpy(&betaold_sub.vector, &betanew_sub.vector);
1774 }
1775 } else {
1776 cHyp_new = cHyp_old;
1777 }
1778 }
1779
1780 // Calculate z_hat, and pve.
1781 if (a_mode == 13) {
1782 if (cHyp_old.n_gamma == 0) {
1783 CalcCC_PVEnZ(z_hat, cHyp_old);
1784 } else {
1785 CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old);
1786 }
1787
1788 // Sample mu and update z_hat.
1789 gsl_vector_sub(z, z_hat);
1790 mean_z += CenterVector(z);
1791 mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test));
1792
1793 gsl_vector_add_constant(z_hat, mean_z);
1794 }
1795
1796 // Save data.
1797 if (t < w_step) {
1798 continue;
1799 } else {
1800 if (t % r_pace == 0) {
1801 w_col = w % w_pace;
1802 if (w_col == 0) {
1803 if (w == 0) {
1804 WriteResult(0, Result_hyp, Result_gamma, w_col);
1805 } else {
1806 WriteResult(1, Result_hyp, Result_gamma, w_col);
1807 gsl_matrix_set_zero(Result_hyp);
1808 gsl_matrix_set_zero(Result_gamma);
1809 }
1810 }
1811
1812 gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h);
1813 gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve);
1814 gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho);
1815 gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge);
1816 gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp);
1817 gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma);
1818
1819 for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
1820 pos = mapRank2pos[rank_old[i]] + 1;
1821 gsl_matrix_set(Result_gamma, w_col, i, pos);
1822
1823 beta_g[pos - 1].first += gsl_vector_get(beta_old, i);
1824 beta_g[pos - 1].second += 1.0;
1825 }
1826
1827 if (a_mode == 13) {
1828 pheno_mean += mean_z;
1829 }
1830
1831 w++;
1832 }
1833 }
1834 }
1835 cout << endl;
1836
1837 cout << "time on selecting Xgamma: " << time_set << endl;
1838 cout << "time on calculating posterior: " << time_post << endl;
1839
1840 w_col = w % w_pace;
1841 WriteResult(1, Result_hyp, Result_gamma, w_col);
1842
1843 gsl_vector *alpha = gsl_vector_alloc(ns_test);
1844 gsl_vector_set_zero(alpha);
1845 WriteParam(beta_g, alpha, w);
1846 gsl_vector_free(alpha);
1847
1848 gsl_matrix_free(Result_hyp);
1849 gsl_matrix_free(Result_gamma);
1850
1851 gsl_vector_free(z_hat);
1852 gsl_vector_free(z);
1853 gsl_vector_free(Xb_new);
1854 gsl_vector_free(Xb_old);
1855
1856 gsl_matrix_free(Xgamma_old);
1857 gsl_matrix_free(XtX_old);
1858 gsl_vector_free(Xtz_old);
1859 gsl_vector_free(beta_old);
1860
1861 gsl_matrix_free(Xgamma_new);
1862 gsl_matrix_free(XtX_new);
1863 gsl_vector_free(Xtz_new);
1864 gsl_vector_free(beta_new);
1865
1866 delete[] p_gamma;
1867 beta_g.clear();
1868
1869 return;
1870 }
1871