1 /*
2     Genome-wide Efficient Mixed Model Association (GEMMA)
3     Copyright © 2011-2017, Xiang Zhou
4     Copyright © 2017, Peter Carbonetto
5     Copyright © 2017-2018 Pjotr Prins
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <fstream>
22 #include <iostream>
23 #include <sstream>
24 
25 #include <assert.h>
26 #include <bitset>
27 #include <cmath>
28 #include <cstring>
29 #include <iomanip>
30 #include <iostream>
31 #include <regex>
32 #include <stdio.h>
33 #include <stdlib.h>
34 
35 #include "gsl/gsl_blas.h"
36 #include "gsl/gsl_cdf.h"
37 #include "gsl/gsl_integration.h"
38 #include "gsl/gsl_linalg.h"
39 #include "gsl/gsl_matrix.h"
40 #include "gsl/gsl_min.h"
41 #include "gsl/gsl_roots.h"
42 #include "gsl/gsl_vector.h"
43 
44 #include "gzstream.h"
45 #include "gemma_io.h"
46 #include "fastblas.h"
47 #include "lapack.h"
48 #include "lmm.h"
49 #include "mathfunc.h"
50 
51 using namespace std;
52 
CopyFromParam(PARAM & cPar)53 void LMM::CopyFromParam(PARAM &cPar) {
54   a_mode = cPar.a_mode;
55   d_pace = cPar.d_pace;
56 
57   file_bfile = cPar.file_bfile;
58   file_geno = cPar.file_geno;
59   file_out = cPar.file_out;
60   path_out = cPar.path_out;
61   file_gene = cPar.file_gene;
62 
63   l_min = cPar.l_min;
64   l_max = cPar.l_max;
65   n_region = cPar.n_region;
66   l_mle_null = cPar.l_mle_null;
67   logl_mle_H0 = cPar.logl_mle_H0;
68 
69   time_UtX = 0.0;
70   time_opt = 0.0;
71 
72   ni_total = cPar.ni_total;
73   ns_total = cPar.ns_total;
74   ni_test = cPar.ni_test;
75   ns_test = cPar.ns_test;
76   n_cvt = cPar.n_cvt;
77 
78   ng_total = cPar.ng_total;
79   ng_test = 0;
80 
81   indicator_idv = cPar.indicator_idv;
82   indicator_snp = cPar.indicator_snp;
83   snpInfo = cPar.snpInfo;
84   setGWASnps = cPar.setGWASnps;
85 
86   return;
87 }
88 
CopyToParam(PARAM & cPar)89 void LMM::CopyToParam(PARAM &cPar) {
90   cPar.time_UtX = time_UtX;
91   cPar.time_opt = time_opt;
92 
93   cPar.ng_test = ng_test;
94 
95   return;
96 }
97 
WriteFiles()98 void LMM::WriteFiles() {
99   string file_str;
100   debug_msg("LMM::WriteFiles");
101 
102   file_str = path_out + "/" + file_out;
103   file_str += ".assoc.txt";
104 
105   ofstream outfile(file_str.c_str(), ofstream::out);
106   if (!outfile) {
107     cout << "error writing file: " << file_str.c_str() << endl;
108     return;
109   }
110 
111   auto common_header = [&] () {
112     if (a_mode != 2) {
113       outfile << "beta" << "\t";
114       outfile << "se" << "\t";
115     }
116 
117     if (!is_legacy_mode())
118       outfile << "logl_H1" << "\t";  // we may make this an option
119 
120     switch(a_mode) {
121     case 1:
122       outfile << "l_remle" << "\t"
123               << "p_wald" << endl;
124       break;
125     case 2:
126       outfile << "l_mle" << "\t"
127               << "p_lrt" << endl;
128       break;
129     case 3:
130       outfile << "p_score" << endl;
131       break;
132     case 4:
133       outfile << "l_remle" << "\t"
134               << "l_mle" << "\t"
135               << "p_wald" << "\t"
136               << "p_lrt" << "\t"
137               << "p_score" << endl;
138       break;
139     }
140   };
141 
142   auto sumstats = [&] (SUMSTAT st) {
143     outfile << scientific << setprecision(6);
144 
145     if (a_mode != 2) {
146       outfile << st.beta << "\t";
147       outfile << st.se << "\t";
148     }
149 
150     if (!is_legacy_mode())
151       outfile << st.logl_H1 << "\t";
152 
153     switch(a_mode) {
154     case 1:
155       outfile << st.lambda_remle << "\t"
156               << st.p_wald << endl;
157       break;
158     case 2:
159       outfile << st.lambda_mle << "\t"
160               << st.p_lrt << endl;
161       break;
162     case 3:
163       outfile << st.p_score << endl;
164       break;
165     case 4:
166       outfile << st.lambda_remle << "\t"
167               << st.lambda_mle << "\t"
168               << st.p_wald << "\t"
169               << st.p_lrt << "\t"
170               << st.p_score << endl;
171       break;
172     }
173   };
174 
175 
176   if (!file_gene.empty()) {
177     outfile << "geneID" << "\t";
178 
179     common_header();
180 
181     for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) {
182       outfile << snpInfo[t].rs_number << "\t";
183       sumstats(sumStat[t]);
184     }
185   } else {
186     bool process_gwasnps = setGWASnps.size();
187 
188     outfile << "chr" << "\t"
189             << "rs" << "\t"
190             << "ps" << "\t"
191             << "n_miss" << "\t"
192             << "allele1" << "\t"
193             << "allele0" << "\t"
194             << "af" << "\t";
195 
196     common_header();
197 
198     size_t t = 0;
199     for (size_t i = 0; i < snpInfo.size(); ++i) {
200       if (indicator_snp[i] == 0)
201         continue;
202       auto snp = snpInfo[i].rs_number;
203       if (process_gwasnps && setGWASnps.count(snp) == 0)
204         continue;
205       // cout << t << endl;
206       outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
207               << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
208               << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t"
209               << fixed << setprecision(3) << snpInfo[i].maf << "\t";
210 
211       sumstats(sumStat[t]);
212       t++;
213     }
214   }
215 
216   outfile.close();
217   outfile.clear();
218   return;
219 }
220 
221 /*
222    As explained in
223    https://github.com/genetics-statistics/GEMMA/issues/94 CalcPab
224    returns the Pab matrix. As described For pab, it stores all
225    variables in the form of v_a P_p v_b. (Similarly, ppab stores all
226    v_a P_p P_p v_b, while pppab stores all v_a P_p P_p P_p v_b. These
227    quantities are defined according to the page 6 of this
228    supplementary information
229    http://xzlab.org/papers/2012_Zhou&Stephens_NG_SI.pdf).
230 
231    In the code, p, a, b are indexes: when p=n_cvt+1, P_p is P_x as in
232    that supplementary information; when a=n_cvt+1, v_a=x; and when
233    a=n_cvt+2, v_a=y.
234 
235    e_mode determines which model the algorithm is fitting: when
236    e_mode==1, it computes all the above quantities for the alternative
237    model (with the random effects term); when e_mode==0, it compute
238    these quantities for the null model (without the random effects
239    term).  Note that e==0 is only used here.
240 
241    These quantities were computed based on the initial GEMMA paper,
242    and the goal is to finally compute y P_x y, y P_x P_xy,
243    y P_x P_x P_x y and the few trace forms (section 3.1.4 on page 5 of
244    the supplementary information). Sometimes I was wondering if we
245    should compute all these final quantities directly, instead of
246    through these complicated recursions. Direct computation may only
247    make computation a little slower, but will make the code much
248    easier to follow and easier to modify
249 
250    a typical call sends n_cvt a vector
251    Hi_eval, a vector ab and a matrix Uab.
252 
253   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
254 
255 (gdb) p Uab->size1
256 $1 = 247
257 (gdb) p n_cvt
258 $2 = 1
259 (gdb) p e_mode
260 $3 = 0
261 (gdb) p Uab->size2
262 $4 = 6
263 (gdb) p Hi_eval->size
264 $5 = 247
265 (gdb) p ab->size
266 $6 = 6
267 (gdb) p Pab->size1
268 $7 = 3
269 (gdb) p Pab->size2
270 $8 = 6
271 
272   Hi_eval [0..ind] x Uab [ind, n_index] x ab [n_index]
273 
274 Iterating through a dataset Hi_eval differs and Uab (last row)
275  */
276 
CalcPab(const size_t n_cvt,const size_t e_mode,const gsl_vector * Hi_eval,const gsl_matrix * Uab,const gsl_vector * unused,gsl_matrix * Pab)277 void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval,
278              const gsl_matrix *Uab, const gsl_vector *unused, gsl_matrix *Pab) {
279 
280 #if !defined NDEBUG
281   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; // result size
282   auto ni_test = Uab->size1; // inds
283   assert(Uab->size1 == Hi_eval->size);
284   assert(Uab->size2 == n_index);
285 
286   assert(Pab->size1 == n_cvt+2);
287   assert(Pab->size2 == n_index);
288   assert(Hi_eval->size == ni_test);
289   // assert(ab->size == n_index);
290 #endif // DEBUG
291 
292   // compute Hi_eval (inds)  * Uab  (inds x n_index) * ab (n_index) and return in Pab (cvt x n_index).
293 
294   double p_ab = 0.0;
295 
296   write("CalcPab");
297   write(Hi_eval,"Hi_eval");
298   write(Uab,"Uab");
299   // write(ab,"ab");
300   if (is_check_mode())
301     assert(!has_nan(Hi_eval));
302   assert(!has_nan(Uab));
303   // assert(!has_nan(ab));
304 
305   for (size_t p = 0; p <= n_cvt + 1; ++p) {        // p walks rows phenotypes + covariates
306     for (size_t a = p + 1; a <= n_cvt + 2; ++a) {  // a walks cols in p+1..rest
307       for (size_t b = a; b <= n_cvt + 2; ++b) {    // b in a..rest
308         size_t index_ab = GetabIndex(a, b, n_cvt); // index in top half matrix, see above
309         if (p == 0) { // fills row 0 for each a,b using dot product of Hi_eval . Uab(a)
310           // cout << "p is 0 " << index_ab; // walk row 0
311           gsl_vector_const_view Uab_col = gsl_matrix_const_column(Uab, index_ab); // get the column
312           gsl_blas_ddot(Hi_eval, &Uab_col.vector, &p_ab); // dot product with H_eval
313           if (e_mode != 0) { // if not null model (defunct right now)
314             if (! is_legacy_mode()) assert(false); // disabling to see when it is used; allow with legacy mode
315             p_ab = gsl_vector_get(unused, index_ab) - p_ab; // was ab
316           }
317           // cout << p << "r, index_ab " << index_ab << ":" << p_ab << endl;
318           gsl_matrix_set(Pab, 0, index_ab, p_ab);
319           write(Pab,"Pab int");
320         } else {
321           // walk the rest of the upper triangle of the matrix (row 1..n). Cols jump with 2 at a time
322           // cout << "a" << a << "b" << b << "p" << p << "n_cvt" << n_cvt << endl;
323           write(Pab,"Pab int");
324           size_t index_aw = GetabIndex(a, p, n_cvt);
325           size_t index_bw = GetabIndex(b, p, n_cvt);
326           size_t index_ww = GetabIndex(p, p, n_cvt);
327 
328           // auto rows = Pab->size1; // n_cvt+2
329           double ps_ab = gsl_matrix_safe_get(Pab, p - 1, index_ab);
330           double ps_aw = gsl_matrix_safe_get(Pab, p - 1, index_aw);
331           double ps_bw = gsl_matrix_safe_get(Pab, p - 1, index_bw);
332           double ps_ww = gsl_matrix_safe_get(Pab, p - 1, index_ww);
333 
334           // cout << "unsafe " << p-1 << "," << index_ww << ":" << gsl_matrix_get(Pab,p-1,index_ww) << endl;
335           // if (is_check_mode() || is_debug_mode()) assert(ps_ww != 0.0);
336           if (ps_ww != 0)
337             p_ab = ps_ab - ps_aw * ps_bw / ps_ww;
338           else
339             p_ab = ps_ab;
340 
341           // cout << "set " << p << "r, index_ab " << index_ab << "c: " << p_ab << endl;
342           gsl_matrix_set(Pab, p, index_ab, p_ab);
343         }
344       }
345     }
346   }
347   write(Pab,"Pab");
348   // if (is_strict_mode() && (has_nan(Uab) || has_nan(Pab) || has_nan(Hi_eval)))
349   //   exit(2);
350   return;
351 }
352 
CalcPPab(const size_t n_cvt,const size_t e_mode,const gsl_vector * HiHi_eval,const gsl_matrix * Uab,const gsl_vector * unused_ab,const gsl_matrix * Pab,gsl_matrix * PPab)353 void CalcPPab(const size_t n_cvt, const size_t e_mode,
354               const gsl_vector *HiHi_eval, const gsl_matrix *Uab,
355               const gsl_vector *unused_ab, const gsl_matrix *Pab, gsl_matrix *PPab) {
356   size_t index_ab, index_aw, index_bw, index_ww;
357   double p2_ab;
358   double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww;
359 
360   write("CalcPPab");
361   write(HiHi_eval,"Hi_eval");
362   write(Uab,"Uab");
363   // write(ab,"ab");
364 
365   for (size_t p = 0; p <= n_cvt + 1; ++p) {
366     for (size_t a = p + 1; a <= n_cvt + 2; ++a) {
367       for (size_t b = a; b <= n_cvt + 2; ++b) {
368         index_ab = GetabIndex(a, b, n_cvt);
369         if (p == 0) {
370           gsl_vector_const_view Uab_col =
371               gsl_matrix_const_column(Uab, index_ab);
372           gsl_blas_ddot(HiHi_eval, &Uab_col.vector, &p2_ab);
373           if (e_mode != 0) {
374             assert(false);
375             p2_ab = p2_ab - gsl_vector_get(unused_ab, index_ab) +
376                     2.0 * gsl_matrix_safe_get(Pab, 0, index_ab);
377           }
378           gsl_matrix_set(PPab, 0, index_ab, p2_ab);
379         } else {
380           index_aw = GetabIndex(a, p, n_cvt);
381           index_bw = GetabIndex(b, p, n_cvt);
382           index_ww = GetabIndex(p, p, n_cvt);
383 
384           ps2_ab = gsl_matrix_safe_get(PPab, p - 1, index_ab);
385           ps_aw = gsl_matrix_safe_get(Pab, p - 1, index_aw);
386           ps_bw = gsl_matrix_safe_get(Pab, p - 1, index_bw);
387           ps_ww = gsl_matrix_safe_get(Pab, p - 1, index_ww);
388           ps2_aw = gsl_matrix_safe_get(PPab, p - 1, index_aw);
389           ps2_bw = gsl_matrix_safe_get(PPab, p - 1, index_bw);
390           ps2_ww = gsl_matrix_safe_get(PPab, p - 1, index_ww);
391 
392           // if (is_check_mode() || is_debug_mode()) assert(ps_ww != 0.0);
393           if (ps_ww != 0) {
394             p2_ab = ps2_ab + ps_aw * ps_bw * ps2_ww / (ps_ww * ps_ww);
395             p2_ab -= (ps_aw * ps2_bw + ps_bw * ps2_aw) / ps_ww;
396           }
397           else {
398             p2_ab = ps2_ab;
399           }
400           gsl_matrix_set(PPab, p, index_ab, p2_ab);
401 
402         }
403       }
404     }
405   }
406   write(PPab,"PPab");
407   // if (is_strict_mode() && (has_nan(Uab) || has_nan(PPab) || has_nan(HiHi_eval)))
408   //   exit(2);
409   return;
410 }
411 
CalcPPPab(const size_t n_cvt,const size_t e_mode,const gsl_vector * HiHiHi_eval,const gsl_matrix * Uab,const gsl_vector * unused_ab,const gsl_matrix * Pab,const gsl_matrix * PPab,gsl_matrix * PPPab)412 void CalcPPPab(const size_t n_cvt, const size_t e_mode,
413                const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab,
414                const gsl_vector *unused_ab, const gsl_matrix *Pab,
415                const gsl_matrix *PPab, gsl_matrix *PPPab) {
416   size_t index_ab, index_aw, index_bw, index_ww;
417   double p3_ab;
418   double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw,
419       ps3_ww;
420   write("CalcPPPab");
421   write(HiHiHi_eval,"HiHiHi_eval");
422   write(Uab,"Uab");
423 
424   for (size_t p = 0; p <= n_cvt + 1; ++p) {
425     for (size_t a = p + 1; a <= n_cvt + 2; ++a) {
426       for (size_t b = a; b <= n_cvt + 2; ++b) {
427         index_ab = GetabIndex(a, b, n_cvt);
428         if (p == 0) {
429           gsl_vector_const_view Uab_col =
430               gsl_matrix_const_column(Uab, index_ab);
431           gsl_blas_ddot(HiHiHi_eval, &Uab_col.vector, &p3_ab);
432           if (e_mode != 0) {
433             assert(false);
434             p3_ab = gsl_vector_get(unused_ab, index_ab) - p3_ab +
435                     3.0 * gsl_matrix_get(PPab, 0, index_ab) -
436                     3.0 * gsl_matrix_get(Pab, 0, index_ab);
437           }
438           gsl_matrix_set(PPPab, 0, index_ab, p3_ab);
439         } else {
440           index_aw = GetabIndex(a, p, n_cvt);
441           index_bw = GetabIndex(b, p, n_cvt);
442           index_ww = GetabIndex(p, p, n_cvt);
443 
444           ps3_ab = gsl_matrix_safe_get(PPPab, p - 1, index_ab);
445           ps_aw = gsl_matrix_safe_get(Pab, p - 1, index_aw);
446           ps_bw = gsl_matrix_safe_get(Pab, p - 1, index_bw);
447           ps_ww = gsl_matrix_safe_get(Pab, p - 1, index_ww);
448           ps2_aw = gsl_matrix_safe_get(PPab, p - 1, index_aw);
449           ps2_bw = gsl_matrix_safe_get(PPab, p - 1, index_bw);
450           ps2_ww = gsl_matrix_safe_get(PPab, p - 1, index_ww);
451           ps3_aw = gsl_matrix_safe_get(PPPab, p - 1, index_aw);
452           ps3_bw = gsl_matrix_safe_get(PPPab, p - 1, index_bw);
453           ps3_ww = gsl_matrix_safe_get(PPPab, p - 1, index_ww);
454 
455           // if (is_check_mode() || is_debug_mode()) assert(ps_ww != 0.0);
456           if (ps_ww != 0) {
457             p3_ab = ps3_ab -
458               ps_aw * ps_bw * ps2_ww * ps2_ww / (ps_ww * ps_ww * ps_ww);
459             p3_ab -= (ps_aw * ps3_bw + ps_bw * ps3_aw + ps2_aw * ps2_bw) / ps_ww;
460             p3_ab += (ps_aw * ps2_bw * ps2_ww + ps_bw * ps2_aw * ps2_ww +
461                       ps_aw * ps_bw * ps3_ww) /
462               (ps_ww * ps_ww);
463           }
464           else {
465             p3_ab = ps3_ab;
466           }
467           gsl_matrix_set(PPPab, p, index_ab, p3_ab);
468         }
469       }
470     }
471   }
472   write(PPPab,"PPPab");
473   // if (is_strict_mode() && (has_nan(Uab) || has_nan(PPPab) || has_nan(HiHiHi_eval)))
474   //  exit(2);
475   return;
476 }
477 
LogL_f(double l,void * params)478 double LogL_f(double l, void *params) {
479   FUNC_PARAM *p = (FUNC_PARAM *)params;
480   size_t n_cvt = p->n_cvt;
481   size_t ni_test = p->ni_test;
482   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
483 
484   size_t nc_total;
485   if (p->calc_null == true) {
486     nc_total = n_cvt;
487   } else {
488     nc_total = n_cvt + 1;
489   }
490 
491   double f = 0.0, logdet_h = 0.0, d;
492   size_t index_yy;
493 
494   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
495   gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
496   gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
497 
498   gsl_vector_safe_memcpy(v_temp, p->eval);
499   gsl_vector_scale(v_temp, l);
500   if (p->e_mode == 0) {
501     gsl_vector_set_all(Hi_eval, 1.0);
502   } else {
503     gsl_vector_safe_memcpy(Hi_eval, v_temp);
504   }
505   gsl_vector_add_constant(v_temp, 1.0);
506   gsl_vector_div(Hi_eval, v_temp);
507 
508   for (size_t i = 0; i < (p->eval)->size; ++i) {
509     d = gsl_vector_get(v_temp, i);
510     logdet_h += safe_log(fabs(d));
511   }
512 
513   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
514 
515   double c =
516       0.5 * (double)ni_test * (safe_log((double)ni_test) - safe_log(2 * M_PI) - 1.0);
517 
518   index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
519   double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_yy);
520 
521   if (is_check_mode() || is_debug_mode()) {
522     // cerr << "P_yy is" << P_yy << endl;
523     assert(!is_nan(P_yy));
524     assert(P_yy > 0);
525   }
526   f = c - 0.5 * logdet_h - 0.5 * (double)ni_test * safe_log(P_yy);
527   if (is_check_mode() || is_debug_mode()) {
528     assert(!is_nan(f));
529   }
530   gsl_matrix_free(Pab); // FIXME
531   gsl_vector_safe_free(Hi_eval);
532   gsl_vector_safe_free(v_temp);
533   return f;
534 }
535 
LogL_dev1(double l,void * params)536 double LogL_dev1(double l, void *params) {
537   FUNC_PARAM *p = (FUNC_PARAM *)params;
538   size_t n_cvt = p->n_cvt;
539   size_t ni_test = p->ni_test;
540   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; // represents top half of covariate matrix
541 
542   size_t nc_total;
543   if (p->calc_null == true) {
544     nc_total = n_cvt;
545   } else {
546     nc_total = n_cvt + 1;
547   }
548 
549   double dev1 = 0.0, trace_Hi = 0.0;
550   size_t index_yy;
551 
552   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
553   gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
554   gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
555   gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
556   gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
557 
558   gsl_vector_safe_memcpy(v_temp, p->eval);
559   gsl_vector_scale(v_temp, l);
560   if (p->e_mode == 0) {
561     gsl_vector_set_all(Hi_eval, 1.0);
562   } else {
563     gsl_vector_safe_memcpy(Hi_eval, v_temp);
564   }
565   gsl_vector_add_constant(v_temp, 1.0);
566   gsl_vector_div(Hi_eval, v_temp);
567 
568   gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
569   gsl_vector_mul(HiHi_eval, Hi_eval);
570 
571   gsl_vector_set_all(v_temp, 1.0);
572   gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
573 
574   if (p->e_mode != 0) {
575     trace_Hi = (double)ni_test - trace_Hi;
576   }
577 
578   /*
579 (gdb) p Uab->size1
580 $1 = 247
581 (gdb) p n_cvt
582 $2 = 1
583 (gdb) p e_mode
584 $3 = 0
585 (gdb) p Uab->size2
586 $4 = 6
587 (gdb) p Hi_eval->size
588 $5 = 247
589 (gdb) p ab->size
590 $6 = 6
591 (gdb) p Pab->size1
592 $7 = 3
593 (gdb) p Pab->size2
594 $8 = 6
595   */
596 
597 #if !defined NDEBUG
598   auto Uab = p->Uab;
599   auto ab = p->ab;
600   assert(n_index == (n_cvt + 2 + 1) * (n_cvt + 2) / 2);
601   assert(Uab->size1 == ni_test);
602   assert(Uab->size2 == n_index); // n_cvt == 1 -> n_index == 6?
603 
604   assert(Pab->size1 == n_cvt+2);
605   assert(Pab->size2 == n_index);
606 
607   assert(ab->size == n_index);
608 
609   assert(p->e_mode == 0);
610   assert(Hi_eval->size == ni_test);
611 #endif // DEBUG
612 
613   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
614   CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
615 
616   double trace_HiK = ((double)ni_test - trace_Hi) / l;
617 
618   index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
619 
620   double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_yy);
621   double PP_yy = gsl_matrix_safe_get(PPab, nc_total, index_yy);
622   double yPKPy = (P_yy - PP_yy) / l;
623   dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy;
624 
625   gsl_matrix_free(Pab);   // FIXME: may contain NaN
626   gsl_matrix_free(PPab);  // FIXME: may contain NaN
627   gsl_vector_safe_free(Hi_eval);
628   gsl_vector_safe_free(HiHi_eval);
629   gsl_vector_safe_free(v_temp);
630 
631   return dev1;
632 }
633 
LogL_dev2(double l,void * params)634 double LogL_dev2(double l, void *params) {
635   FUNC_PARAM *p = (FUNC_PARAM *)params;
636   size_t n_cvt = p->n_cvt;
637   size_t ni_test = p->ni_test;
638   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
639 
640   size_t nc_total;
641   if (p->calc_null == true) {
642     nc_total = n_cvt;
643   } else {
644     nc_total = n_cvt + 1;
645   }
646 
647   double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
648   size_t index_yy;
649 
650   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
651   gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
652   gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
653   gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
654   gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
655   gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
656   gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
657 
658   gsl_vector_safe_memcpy(v_temp, p->eval);
659   gsl_vector_scale(v_temp, l);
660   if (p->e_mode == 0) {
661     gsl_vector_set_all(Hi_eval, 1.0);
662   } else {
663     gsl_vector_safe_memcpy(Hi_eval, v_temp);
664   }
665   gsl_vector_add_constant(v_temp, 1.0);
666   gsl_vector_div(Hi_eval, v_temp);
667 
668   gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
669   gsl_vector_mul(HiHi_eval, Hi_eval);
670   gsl_vector_safe_memcpy(HiHiHi_eval, HiHi_eval);
671   gsl_vector_mul(HiHiHi_eval, Hi_eval);
672 
673   gsl_vector_set_all(v_temp, 1.0);
674   gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
675   gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
676 
677   if (p->e_mode != 0) {
678     trace_Hi = (double)ni_test - trace_Hi;
679     trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
680   }
681 
682   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
683   CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
684   CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
685 
686   double trace_HiKHiK = ((double)ni_test + trace_HiHi - 2 * trace_Hi) / (l * l);
687 
688   index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
689   double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_yy);
690   double PP_yy = gsl_matrix_safe_get(PPab, nc_total, index_yy);
691   double PPP_yy = gsl_matrix_safe_get(PPPab, nc_total, index_yy);
692 
693   double yPKPy = (P_yy - PP_yy) / l;
694   double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
695 
696   dev2 = 0.5 * trace_HiKHiK -
697          0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) /
698              (P_yy * P_yy);
699 
700   gsl_matrix_free(Pab);  // FIXME
701   gsl_matrix_free(PPab);
702   gsl_matrix_free(PPPab);
703   gsl_vector_safe_free(Hi_eval);
704   gsl_vector_safe_free(HiHi_eval);
705   gsl_vector_safe_free(HiHiHi_eval);
706   gsl_vector_safe_free(v_temp);
707 
708   return dev2;
709 }
710 
LogL_dev12(double l,void * params,double * dev1,double * dev2)711 void LogL_dev12(double l, void *params, double *dev1, double *dev2) {
712   FUNC_PARAM *p = (FUNC_PARAM *)params;
713   size_t n_cvt = p->n_cvt;
714   size_t ni_test = p->ni_test;
715   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
716 
717   size_t nc_total;
718   if (p->calc_null == true) {
719     nc_total = n_cvt;
720   } else {
721     nc_total = n_cvt + 1;
722   }
723 
724   double trace_Hi = 0.0, trace_HiHi = 0.0;
725   size_t index_yy;
726 
727   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
728   gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
729   gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
730   gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
731   gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
732   gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
733   gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
734 
735   gsl_vector_safe_memcpy(v_temp, p->eval);
736   gsl_vector_scale(v_temp, l);
737   if (p->e_mode == 0) {
738     gsl_vector_set_all(Hi_eval, 1.0);
739   } else {
740     gsl_vector_safe_memcpy(Hi_eval, v_temp);
741   }
742   gsl_vector_add_constant(v_temp, 1.0);
743   gsl_vector_div(Hi_eval, v_temp);
744 
745   gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
746   gsl_vector_mul(HiHi_eval, Hi_eval);
747   gsl_vector_safe_memcpy(HiHiHi_eval, HiHi_eval);
748   gsl_vector_mul(HiHiHi_eval, Hi_eval);
749 
750   gsl_vector_set_all(v_temp, 1.0);
751   gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
752   gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
753 
754   if (p->e_mode != 0) {
755     trace_Hi = (double)ni_test - trace_Hi;
756     trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
757   }
758 
759   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
760   CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
761   CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
762 
763   double trace_HiK = ((double)ni_test - trace_Hi) / l;
764   double trace_HiKHiK = ((double)ni_test + trace_HiHi - 2 * trace_Hi) / (l * l);
765 
766   index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
767 
768   double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_yy);
769   double PP_yy = gsl_matrix_safe_get(PPab, nc_total, index_yy);
770   double PPP_yy = gsl_matrix_safe_get(PPPab, nc_total, index_yy);
771 
772   double yPKPy = (P_yy - PP_yy) / l;
773   double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
774 
775   *dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy;
776   *dev2 = 0.5 * trace_HiKHiK -
777           0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) /
778               (P_yy * P_yy);
779 
780   gsl_matrix_free(Pab);   // FIXME: may contain NaN
781   gsl_matrix_free(PPab);  // FIXME: may contain NaN
782   gsl_matrix_free(PPPab); // FIXME: may contain NaN
783   gsl_vector_safe_free(Hi_eval);
784   gsl_vector_safe_free(HiHi_eval);
785   gsl_vector_safe_free(HiHiHi_eval);
786   gsl_vector_safe_free(v_temp);
787 
788   return;
789 }
790 
LogRL_f(double l,void * params)791 double LogRL_f(double l, void *params) {
792   FUNC_PARAM *p = (FUNC_PARAM *)params;
793   size_t n_cvt = p->n_cvt;
794   size_t ni_test = p->ni_test;
795   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
796 
797   double df;
798   size_t nc_total;
799   if (p->calc_null == true) {
800     nc_total = n_cvt;
801     df = (double)ni_test - (double)n_cvt;
802   } else {
803     nc_total = n_cvt + 1;
804     df = (double)ni_test - (double)n_cvt - 1.0;
805   }
806 
807   double f = 0.0, logdet_h = 0.0, logdet_hiw = 0.0, d;
808   size_t index_ww;
809 
810   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
811   gsl_matrix *Iab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
812   gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
813   gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
814 
815   gsl_vector_safe_memcpy(v_temp, p->eval);
816   gsl_vector_scale(v_temp, l);
817   if (p->e_mode == 0) {
818     gsl_vector_set_all(Hi_eval, 1.0);
819   } else {
820     gsl_vector_safe_memcpy(Hi_eval, v_temp);
821   }
822   gsl_vector_add_constant(v_temp, 1.0);
823   gsl_vector_div(Hi_eval, v_temp);
824 
825   for (size_t i = 0; i < (p->eval)->size; ++i) {
826     d = gsl_vector_get(v_temp, i);
827     logdet_h += safe_log(fabs(d));
828   }
829 
830   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
831   gsl_vector_set_all(v_temp, 1.0);
832   CalcPab(n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
833 
834   // Calculate |WHiW|-|WW|.
835   logdet_hiw = 0.0;
836   for (size_t i = 0; i < nc_total; ++i) {
837     index_ww = GetabIndex(i + 1, i + 1, n_cvt);
838     d = gsl_matrix_safe_get(Pab, i, index_ww);
839     logdet_hiw += safe_log(d);
840     d = gsl_matrix_safe_get(Iab, i, index_ww);
841     logdet_hiw -= safe_log(d);
842   }
843   index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
844   double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_ww);
845 
846   double c = 0.5 * df * (safe_log(df) - safe_log(2 * M_PI) - 1.0);
847   f = c - 0.5 * logdet_h - 0.5 * logdet_hiw - 0.5 * df * safe_log(P_yy);
848 
849   gsl_matrix_free(Pab);
850   gsl_matrix_free(Iab); // contains NaN
851   gsl_vector_safe_free(Hi_eval);
852   gsl_vector_safe_free(v_temp);
853   return f;
854 }
855 
LogRL_dev1(double l,void * params)856 double LogRL_dev1(double l, void *params) {
857   FUNC_PARAM *p = (FUNC_PARAM *)params;
858   size_t n_cvt = p->n_cvt;
859   size_t ni_test = p->ni_test;
860   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
861 
862   double df;
863   size_t nc_total;
864   if (p->calc_null == true) {
865     nc_total = n_cvt;
866     df = (double)ni_test - (double)n_cvt;
867   } else {
868     nc_total = n_cvt + 1;
869     df = (double)ni_test - (double)n_cvt - 1.0;
870   }
871 
872   double dev1 = 0.0, trace_Hi = 0.0;
873   size_t index_ww;
874 
875   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
876   gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
877   gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
878   gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
879   gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
880 
881   // write(p->eval, "p->eval");
882   gsl_vector_safe_memcpy(v_temp, p->eval); // initialize with eval
883   gsl_vector_scale(v_temp, l);
884   if (p->e_mode == 0) {
885     gsl_vector_set_all(Hi_eval, 1.0);
886   } else {
887     gsl_vector_safe_memcpy(Hi_eval, v_temp);
888   }
889   gsl_vector_add_constant(v_temp, 1.0);
890   gsl_vector_div(Hi_eval, v_temp);
891 
892   gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
893   gsl_vector_mul(HiHi_eval, Hi_eval);
894 
895   gsl_vector_set_all(v_temp, 1.0);
896   gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
897 
898   if (p->e_mode != 0) {
899     trace_Hi = (double)ni_test - trace_Hi;
900   }
901 
902   write(p->eval, "p->eval2");
903   write(p->ab, "p->ab");
904   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
905   CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
906 
907   // Calculate tracePK and trace PKPK.
908   double trace_P = trace_Hi;
909   double ps_ww, ps2_ww;
910   for (size_t i = 0; i < nc_total; ++i) {
911     index_ww = GetabIndex(i + 1, i + 1, n_cvt);
912     ps_ww = gsl_matrix_safe_get(Pab, i, index_ww);
913     ps2_ww = gsl_matrix_safe_get(PPab, i, index_ww);
914     trace_P -= ps2_ww / ps_ww;
915   }
916   double trace_PK = (df - trace_P) / l;
917 
918   // Calculate yPKPy, yPKPKPy.
919   index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
920   double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_ww);
921   double PP_yy = gsl_matrix_safe_get(PPab, nc_total, index_ww);
922   double yPKPy = (P_yy - PP_yy) / l;
923 
924   dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy;
925 
926   gsl_matrix_free(Pab);  // FIXME: may contain NaN
927   gsl_matrix_free(PPab); // FIXME: may contain NaN
928   gsl_vector_safe_free(Hi_eval);
929   gsl_vector_safe_free(HiHi_eval);
930   gsl_vector_safe_free(v_temp);
931 
932   return dev1;
933 }
934 
LogRL_dev2(double l,void * params)935 double LogRL_dev2(double l, void *params) {
936   FUNC_PARAM *p = (FUNC_PARAM *)params;
937   size_t n_cvt = p->n_cvt;
938   size_t ni_test = p->ni_test;
939   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
940 
941   double df;
942   size_t nc_total;
943   if (p->calc_null == true) {
944     nc_total = n_cvt;
945     df = (double)ni_test - (double)n_cvt;
946   } else {
947     nc_total = n_cvt + 1;
948     df = (double)ni_test - (double)n_cvt - 1.0;
949   }
950 
951   double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
952   size_t index_ww;
953 
954   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
955   gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
956   gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
957   gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
958   gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
959   gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
960   gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
961 
962   gsl_vector_safe_memcpy(v_temp, p->eval);
963   gsl_vector_scale(v_temp, l);
964   if (p->e_mode == 0) {
965     gsl_vector_set_all(Hi_eval, 1.0);
966   } else {
967     gsl_vector_safe_memcpy(Hi_eval, v_temp);
968   }
969   gsl_vector_add_constant(v_temp, 1.0);
970   gsl_vector_div(Hi_eval, v_temp);
971 
972   gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
973   gsl_vector_mul(HiHi_eval, Hi_eval);
974   gsl_vector_safe_memcpy(HiHiHi_eval, HiHi_eval);
975   gsl_vector_mul(HiHiHi_eval, Hi_eval);
976 
977   gsl_vector_set_all(v_temp, 1.0);
978   gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
979   gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
980 
981   if (p->e_mode != 0) {
982     trace_Hi = (double)ni_test - trace_Hi;
983     trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
984   }
985 
986   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
987   CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
988   CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
989 
990   // Calculate tracePK and trace PKPK.
991   double trace_P = trace_Hi, trace_PP = trace_HiHi;
992   double ps_ww, ps2_ww, ps3_ww;
993   for (size_t i = 0; i < nc_total; ++i) {
994     index_ww = GetabIndex(i + 1, i + 1, n_cvt);
995     ps_ww = gsl_matrix_safe_get(Pab, i, index_ww);
996     ps2_ww = gsl_matrix_safe_get(PPab, i, index_ww);
997     ps3_ww = gsl_matrix_safe_get(PPPab, i, index_ww);
998     trace_P -= ps2_ww / ps_ww;
999     trace_PP += ps2_ww * ps2_ww / (ps_ww * ps_ww) - 2.0 * ps3_ww / ps_ww;
1000   }
1001   double trace_PKPK = (df + trace_PP - 2.0 * trace_P) / (l * l);
1002 
1003   // Calculate yPKPy, yPKPKPy.
1004   index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
1005   double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_ww);
1006   double PP_yy = gsl_matrix_safe_get(PPab, nc_total, index_ww);
1007   double PPP_yy = gsl_matrix_safe_get(PPPab, nc_total, index_ww);
1008   double yPKPy = (P_yy - PP_yy) / l;
1009   double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
1010 
1011   dev2 = 0.5 * trace_PKPK -
1012          0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy);
1013 
1014   gsl_matrix_free(Pab);  // FIXME
1015   gsl_matrix_free(PPab);
1016   gsl_matrix_free(PPPab);
1017   gsl_vector_safe_free(Hi_eval);
1018   gsl_vector_safe_free(HiHi_eval);
1019   gsl_vector_safe_free(HiHiHi_eval);
1020   gsl_vector_safe_free(v_temp);
1021 
1022   return dev2;
1023 }
1024 
LogRL_dev12(double l,void * params,double * dev1,double * dev2)1025 void LogRL_dev12(double l, void *params, double *dev1, double *dev2) {
1026   FUNC_PARAM *p = (FUNC_PARAM *)params;
1027   size_t n_cvt = p->n_cvt;
1028   size_t ni_test = p->ni_test;
1029   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
1030 
1031   double df;
1032   size_t nc_total;
1033   if (p->calc_null == true) {
1034     nc_total = n_cvt;
1035     df = (double)ni_test - (double)n_cvt;
1036   } else {
1037     nc_total = n_cvt + 1;
1038     df = (double)ni_test - (double)n_cvt - 1.0;
1039   }
1040 
1041   double trace_Hi = 0.0, trace_HiHi = 0.0;
1042   size_t index_ww;
1043 
1044   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
1045   gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
1046   gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
1047   gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
1048   gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
1049   gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
1050   gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
1051 
1052   gsl_vector_safe_memcpy(v_temp, p->eval);
1053   gsl_vector_scale(v_temp, l);
1054   if (p->e_mode == 0) {
1055     gsl_vector_set_all(Hi_eval, 1.0);
1056   } else {
1057     gsl_vector_safe_memcpy(Hi_eval, v_temp);
1058   }
1059   gsl_vector_add_constant(v_temp, 1.0);
1060   gsl_vector_div(Hi_eval, v_temp);
1061 
1062   gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
1063   gsl_vector_mul(HiHi_eval, Hi_eval);
1064   gsl_vector_safe_memcpy(HiHiHi_eval, HiHi_eval);
1065   gsl_vector_mul(HiHiHi_eval, Hi_eval);
1066 
1067   gsl_vector_set_all(v_temp, 1.0);
1068   gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
1069   gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
1070 
1071   if (p->e_mode != 0) {
1072     trace_Hi = (double)ni_test - trace_Hi;
1073     trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
1074   }
1075 
1076   CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
1077   CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
1078   CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
1079 
1080   // Calculate tracePK and trace PKPK.
1081   double trace_P = trace_Hi, trace_PP = trace_HiHi;
1082   double ps_ww, ps2_ww, ps3_ww;
1083   for (size_t i = 0; i < nc_total; ++i) {
1084     index_ww = GetabIndex(i + 1, i + 1, n_cvt);
1085     ps_ww = gsl_matrix_safe_get(Pab, i, index_ww);
1086     ps2_ww = gsl_matrix_safe_get(PPab, i, index_ww);
1087     ps3_ww = gsl_matrix_safe_get(PPPab, i, index_ww);
1088     trace_P -= ps2_ww / ps_ww;
1089     trace_PP += ps2_ww * ps2_ww / (ps_ww * ps_ww) - 2.0 * ps3_ww / ps_ww;
1090   }
1091   double trace_PK = (df - trace_P) / l;
1092   double trace_PKPK = (df + trace_PP - 2.0 * trace_P) / (l * l);
1093 
1094   // Calculate yPKPy, yPKPKPy.
1095   index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
1096   double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_ww);
1097   double PP_yy = gsl_matrix_safe_get(PPab, nc_total, index_ww);
1098   double PPP_yy = gsl_matrix_safe_get(PPPab, nc_total, index_ww);
1099   double yPKPy = (P_yy - PP_yy) / l;
1100   double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
1101 
1102   *dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy;
1103   *dev2 = 0.5 * trace_PKPK -
1104           0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy);
1105 
1106   gsl_matrix_free(Pab);  // FIXME
1107   gsl_matrix_free(PPab);
1108   gsl_matrix_free(PPPab);
1109   gsl_vector_safe_free(Hi_eval);
1110   gsl_vector_safe_free(HiHi_eval);
1111   gsl_vector_safe_free(HiHiHi_eval);
1112   gsl_vector_safe_free(v_temp);
1113 
1114   return;
1115 }
1116 
CalcRLWald(const double & l,const FUNC_PARAM & params,double & beta,double & se,double & p_wald)1117 void LMM::CalcRLWald(const double &l, const FUNC_PARAM &params, double &beta,
1118                      double &se, double &p_wald) {
1119   size_t n_cvt = params.n_cvt;
1120   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
1121 
1122   int df = (int)ni_test - (int)n_cvt - 1;
1123 
1124   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
1125   gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size);
1126   gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size);
1127 
1128   gsl_vector_safe_memcpy(v_temp, params.eval);
1129   gsl_vector_scale(v_temp, l);
1130   if (params.e_mode == 0) {
1131     gsl_vector_set_all(Hi_eval, 1.0);
1132   } else {
1133     gsl_vector_safe_memcpy(Hi_eval, v_temp);
1134   }
1135   gsl_vector_add_constant(v_temp, 1.0);
1136   gsl_vector_div(Hi_eval, v_temp);
1137 
1138   CalcPab(n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
1139 
1140   size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
1141   size_t index_xx = GetabIndex(n_cvt + 1, n_cvt + 1, n_cvt);
1142   size_t index_xy = GetabIndex(n_cvt + 2, n_cvt + 1, n_cvt);
1143   double P_yy = gsl_matrix_safe_get(Pab, n_cvt, index_yy);
1144   double P_xx = gsl_matrix_safe_get(Pab, n_cvt, index_xx);
1145   double P_xy = gsl_matrix_safe_get(Pab, n_cvt, index_xy);
1146   double Px_yy = gsl_matrix_safe_get(Pab, n_cvt + 1, index_yy);
1147 
1148   beta = P_xy / P_xx;
1149   double tau = (double)df / Px_yy;
1150   se = safe_sqrt(1.0 / (tau * P_xx));
1151   p_wald = gsl_cdf_fdist_Q((P_yy - Px_yy) * tau, 1.0, df);
1152 
1153   gsl_matrix_free(Pab);
1154   gsl_vector_safe_free(Hi_eval);
1155   gsl_vector_safe_free(v_temp);
1156   return;
1157 }
1158 
CalcRLScore(const double & l,const FUNC_PARAM & params,double & beta,double & se,double & p_score)1159 void LMM::CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
1160                       double &se, double &p_score) {
1161   size_t n_cvt = params.n_cvt;
1162   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
1163 
1164   int df = (int)ni_test - (int)n_cvt - 1;
1165 
1166   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
1167   gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size);
1168   gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size);
1169 
1170   gsl_vector_safe_memcpy(v_temp, params.eval);
1171   gsl_vector_scale(v_temp, l);
1172   if (params.e_mode == 0) {
1173     gsl_vector_set_all(Hi_eval, 1.0);
1174   } else {
1175     gsl_vector_safe_memcpy(Hi_eval, v_temp);
1176   }
1177   gsl_vector_add_constant(v_temp, 1.0);
1178   gsl_vector_div(Hi_eval, v_temp);
1179 
1180   CalcPab(n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
1181 
1182   size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
1183   size_t index_xx = GetabIndex(n_cvt + 1, n_cvt + 1, n_cvt);
1184   size_t index_xy = GetabIndex(n_cvt + 2, n_cvt + 1, n_cvt);
1185   double P_yy = gsl_matrix_safe_get(Pab, n_cvt, index_yy);
1186   double P_xx = gsl_matrix_safe_get(Pab, n_cvt, index_xx);
1187   double P_xy = gsl_matrix_safe_get(Pab, n_cvt, index_xy);
1188   double Px_yy = gsl_matrix_safe_get(Pab, n_cvt + 1, index_yy);
1189 
1190   beta = P_xy / P_xx;
1191   double tau = (double)df / Px_yy;
1192   se = safe_sqrt(1.0 / (tau * P_xx));
1193 
1194   p_score =
1195       gsl_cdf_fdist_Q((double)ni_test * P_xy * P_xy / (P_yy * P_xx), 1.0, df);
1196 
1197   gsl_matrix_free(Pab);
1198   gsl_vector_safe_free(Hi_eval);
1199   gsl_vector_safe_free(v_temp);
1200   return;
1201 }
1202 
CalcUab(const gsl_matrix * UtW,const gsl_vector * Uty,gsl_matrix * Uab)1203 void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
1204   size_t index_ab;
1205   size_t n_cvt = UtW->size2;
1206 
1207   // debug_msg("entering");
1208 
1209   gsl_vector *u_a = gsl_vector_safe_alloc(Uty->size);
1210 
1211   for (size_t a = 1; a <= n_cvt + 2; ++a) { // walk columns of pheno+cvt
1212     if (a == n_cvt + 1) {
1213       continue;
1214     }
1215 
1216     if (a == n_cvt + 2) {
1217       gsl_vector_safe_memcpy(u_a, Uty); // last column is phenotype
1218     } else {
1219       gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, a - 1);
1220       gsl_vector_safe_memcpy(u_a, &UtW_col.vector);
1221     }
1222 
1223     for (size_t b = a; b >= 1; --b) { // back fill other columns
1224       if (b == n_cvt + 1) {
1225         continue;
1226       }
1227 
1228       index_ab = GetabIndex(a, b, n_cvt);
1229       gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab);
1230 
1231       if (b == n_cvt + 2) {
1232         gsl_vector_safe_memcpy(&Uab_col.vector, Uty);
1233       } else {
1234         gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1);
1235         gsl_vector_safe_memcpy(&Uab_col.vector, &UtW_col.vector);
1236       }
1237 
1238       gsl_vector_mul(&Uab_col.vector, u_a);
1239     }
1240     // cout << "a" << a << endl;
1241     write(Uab,"Uab iteration");
1242   }
1243 
1244   gsl_vector_safe_free(u_a);
1245   return;
1246 }
1247 
CalcUab(const gsl_matrix * UtW,const gsl_vector * Uty,const gsl_vector * Utx,gsl_matrix * Uab)1248 void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty,
1249              const gsl_vector *Utx, gsl_matrix *Uab) {
1250   size_t index_ab;
1251   size_t n_cvt = UtW->size2;
1252 
1253   for (size_t b = 1; b <= n_cvt + 2; ++b) {
1254     index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
1255     gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab);
1256 
1257     if (b == n_cvt + 2) {
1258       gsl_vector_safe_memcpy(&Uab_col.vector, Uty);
1259     } else if (b == n_cvt + 1) {
1260       gsl_vector_safe_memcpy(&Uab_col.vector, Utx);
1261     } else {
1262       gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1);
1263       gsl_vector_safe_memcpy(&Uab_col.vector, &UtW_col.vector);
1264     }
1265 
1266     gsl_vector_mul(&Uab_col.vector, Utx);
1267   }
1268 
1269   return;
1270 }
1271 
Calcab(const gsl_matrix * W,const gsl_vector * y,gsl_vector * ab)1272 void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
1273   write(W,"W");
1274   write(y,"y");
1275 
1276   gsl_vector_set_zero(ab); // not sure, but emulates v95 behaviour
1277 
1278   size_t n_cvt = W->size2;
1279 
1280   gsl_vector *v_a = gsl_vector_safe_alloc(y->size);
1281   gsl_vector *v_b = gsl_vector_safe_alloc(y->size);
1282 
1283   double d;
1284 
1285   for (size_t a = 1; a <= n_cvt + 2; ++a) {
1286     if (a == n_cvt + 1) {
1287       continue;
1288     }
1289 
1290     if (a == n_cvt + 2) {
1291       gsl_vector_safe_memcpy(v_a, y);
1292     } else {
1293       gsl_vector_const_view W_col = gsl_matrix_const_column(W, a - 1);
1294       gsl_vector_safe_memcpy(v_a, &W_col.vector);
1295     }
1296     write(v_a,"v_a");
1297 
1298     for (size_t b = a; b >= 1; --b) {
1299       if (b == n_cvt + 1) {
1300         continue;
1301       }
1302 
1303       auto index_ab = GetabIndex(a, b, n_cvt);
1304 
1305       if (b == n_cvt + 2) {
1306         gsl_vector_safe_memcpy(v_b, y);
1307       } else {
1308         gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1);
1309         gsl_vector_safe_memcpy(v_b, &W_col.vector);
1310       }
1311 
1312       write(v_b,"v_b");
1313       gsl_blas_ddot(v_a, v_b, &d);
1314       gsl_vector_set(ab, index_ab, d);
1315     }
1316   }
1317 
1318   write(ab,"ab");
1319 
1320   gsl_vector_safe_free(v_a);
1321   gsl_vector_safe_free(v_b);
1322   return;
1323 }
1324 
Calcab(const gsl_matrix * W,const gsl_vector * y,const gsl_vector * x,gsl_vector * ab)1325 void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
1326             gsl_vector *ab) {
1327   size_t index_ab;
1328   size_t n_cvt = W->size2;
1329 
1330   gsl_vector_set_zero(ab); // not sure, but emulates v95 behaviour
1331 
1332   double d;
1333   gsl_vector *v_b = gsl_vector_safe_alloc(y->size);
1334 
1335   for (size_t b = 1; b <= n_cvt + 2; ++b) {
1336     index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
1337 
1338     if (b == n_cvt + 2) {
1339       gsl_vector_safe_memcpy(v_b, y);
1340     } else if (b == n_cvt + 1) {
1341       gsl_vector_safe_memcpy(v_b, x);
1342     } else {
1343       gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1);
1344       gsl_vector_safe_memcpy(v_b, &W_col.vector);
1345     }
1346 
1347     gsl_blas_ddot(x, v_b, &d);
1348     gsl_vector_set(ab, index_ab, d);
1349   }
1350 
1351   gsl_vector_safe_free(v_b);
1352   return;
1353 }
1354 
AnalyzeGene(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Utx,const gsl_matrix * W,const gsl_vector * x)1355 void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
1356                       const gsl_matrix *UtW, const gsl_vector *Utx,
1357                       const gsl_matrix *W, const gsl_vector *x) {
1358   debug_msg(file_gene);
1359   igzstream infile(file_gene.c_str(), igzstream::in);
1360   if (!infile) {
1361     cout << "error reading gene expression file:" << file_gene << endl;
1362     return;
1363   }
1364 
1365   clock_t time_start = clock();
1366 
1367   string line;
1368   char *ch_ptr;
1369 
1370   double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
1371   double p_lrt = 0, p_score = 0;
1372   double logl_H1 = 0.0, logl_H0 = 0.0, l_H0;
1373   int c_phen;
1374   string rs; // Gene id.
1375   double d;
1376 
1377   // Calculate basic quantities.
1378   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
1379 
1380   gsl_vector *y = gsl_vector_safe_alloc(U->size1);
1381   gsl_vector *Uty = gsl_vector_safe_alloc(U->size2);
1382   gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
1383   gsl_vector *ab = gsl_vector_safe_alloc(n_index);
1384 
1385   // Header.
1386   getline(infile, line);
1387 
1388   for (size_t t = 0; t < ng_total; t++) {
1389     safeGetline(infile, line).eof();
1390     if (t % d_pace == 0 || t == ng_total - 1) {
1391       ProgressBar("Performing Analysis", t, ng_total - 1);
1392     }
1393     ch_ptr = strtok_safe2((char *)line.c_str(), " , \t",file_gene.c_str());
1394     rs = ch_ptr;
1395 
1396     c_phen = 0;
1397     for (size_t i = 0; i < indicator_idv.size(); ++i) {
1398       ch_ptr = strtok_safe2(NULL, " , \t",file_gene.c_str());
1399       if (indicator_idv[i] == 0) {
1400         continue;
1401       }
1402 
1403       d = atof(ch_ptr);
1404       gsl_vector_set(y, c_phen, d);
1405 
1406       c_phen++;
1407     }
1408 
1409     time_start = clock();
1410     gsl_blas_dgemv(CblasTrans, 1.0, U, y, 0.0, Uty);
1411     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1412 
1413     // Calculate null.
1414     time_start = clock();
1415 
1416     gsl_matrix_set_zero(Uab);
1417 
1418     CalcUab(UtW, Uty, Uab);
1419     FUNC_PARAM param0 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
1420 
1421     if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
1422       CalcLambda('L', param0, l_min, l_max, n_region, l_H0, logl_H0);
1423     }
1424 
1425     // Calculate alternative.
1426     CalcUab(UtW, Uty, Utx, Uab);
1427     FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
1428 
1429     // 3 is before 1.
1430     if (a_mode == 3 || a_mode == 4) {
1431       CalcRLScore(l_H0, param1, beta, se, p_score);
1432     }
1433 
1434     if (a_mode == 1 || a_mode == 4) {
1435       CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
1436       CalcRLWald(lambda_remle, param1, beta, se, p_wald);
1437     }
1438 
1439     if (a_mode == 2 || a_mode == 4) {
1440       CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
1441       p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
1442     }
1443 
1444     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1445 
1446     // Store summary data.
1447     SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
1448     sumStat.push_back(SNPs);
1449   }
1450   cout << endl;
1451 
1452   gsl_vector_safe_free(y);
1453   gsl_vector_safe_free(Uty);
1454   gsl_matrix_safe_free(Uab);
1455   gsl_vector_free(ab); // unused
1456 
1457   infile.close();
1458   infile.clear();
1459 
1460   return;
1461 }
1462 
1463 
Analyze(std::function<SnpNameValues (size_t)> & fetch_snp,const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Uty,const gsl_matrix * W,const gsl_vector * y,const set<string> gwasnps)1464 void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
1465                   const gsl_matrix *U, const gsl_vector *eval,
1466                   const gsl_matrix *UtW, const gsl_vector *Uty,
1467                   const gsl_matrix *W, const gsl_vector *y,
1468                   const set<string> gwasnps) {
1469   clock_t time_start = clock();
1470 
1471   // Subset/LOCO support
1472   bool process_gwasnps = gwasnps.size();
1473   if (process_gwasnps)
1474     debug_msg("Analyze subset of SNPs (LOCO)");
1475 
1476   // Calculate basic quantities.
1477   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
1478 
1479   const size_t inds = U->size1;
1480   enforce(inds == ni_test);
1481   gsl_vector *x = gsl_vector_safe_alloc(inds); // #inds
1482   gsl_vector *x_miss = gsl_vector_safe_alloc(inds);
1483   gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
1484   gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
1485   gsl_vector *ab = gsl_vector_safe_alloc(n_index);
1486 
1487   // Create a large matrix with LMM_BATCH_SIZE columns for batched processing
1488   // const size_t msize=(process_gwasnps ? 1 : LMM_BATCH_SIZE);
1489   const size_t msize = LMM_BATCH_SIZE;
1490   gsl_matrix *Xlarge = gsl_matrix_safe_alloc(inds, msize);
1491   gsl_matrix *UtXlarge = gsl_matrix_safe_alloc(inds, msize);
1492   enforce_msg(Xlarge && UtXlarge, "Xlarge memory check"); // just to be sure
1493   enforce(Xlarge->size1 == inds);
1494   gsl_matrix_set_zero(Xlarge);
1495   gsl_matrix_set_zero(Uab);
1496   CalcUab(UtW, Uty, Uab);
1497 
1498   // start reading genotypes and analyze
1499   size_t c = 0;
1500 
1501   auto batch_compute = [&](size_t l) { // using a C++ closure
1502     // Compute SNPs in batch, note the computations are independent per SNP
1503     // debug_msg("enter batch_compute");
1504     gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l);
1505     gsl_matrix_view UtXlarge_sub =
1506         gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l);
1507 
1508     time_start = clock();
1509     fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
1510                    &UtXlarge_sub.matrix);
1511     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1512 
1513     gsl_matrix_set_zero(Xlarge);
1514     for (size_t i = 0; i < l; i++) {
1515       // for every batch...
1516       gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
1517       gsl_vector_safe_memcpy(Utx, &UtXlarge_col.vector);
1518 
1519       CalcUab(UtW, Uty, Utx, Uab);
1520 
1521       time_start = clock();
1522       FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
1523 
1524       double lambda_mle = 0.0, lambda_remle = 0.0, beta = 0.0, se = 0.0, p_wald = 0.0;
1525       double p_lrt = 0.0, p_score = 0.0;
1526       double logl_H1 = 0.0;
1527 
1528       // 3 is before 1.
1529       if (a_mode == 3 || a_mode == 4) {
1530         CalcRLScore(l_mle_null, param1, beta, se, p_score);
1531       }
1532 
1533       if (a_mode == 1 || a_mode == 4) {
1534         // for univariate a_mode is 1
1535         CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
1536         CalcRLWald(lambda_remle, param1, beta, se, p_wald);
1537       }
1538 
1539       if (a_mode == 2 || a_mode == 4) {
1540         CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
1541         p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
1542       }
1543 
1544       time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1545 
1546       // Store summary data.
1547       SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
1548                       p_wald, p_lrt, p_score, logl_H1};
1549       sumStat.push_back(SNPs);
1550     }
1551     // debug_msg("exit batch_compute");
1552   };
1553 
1554   const auto num_snps = indicator_snp.size();
1555   enforce_msg(num_snps > 0,"Zero SNPs to process - data corrupt?");
1556   if (num_snps < 50) {
1557     cerr << num_snps << " SNPs" << endl;
1558     warning_msg("very few SNPs processed");
1559   }
1560   const size_t progress_step = (num_snps/50>d_pace ? num_snps/50 : d_pace);
1561 
1562   for (size_t t = 0; t < num_snps; ++t) {
1563     if (t % progress_step == 0 || t == (num_snps - 1)) {
1564       ProgressBar("Reading SNPs", t, num_snps - 1);
1565     }
1566     if (indicator_snp[t] == 0)
1567       continue;
1568 
1569     auto tup = fetch_snp(t);
1570     auto snp = get<0>(tup);
1571     auto gs = get<1>(tup);
1572 
1573     // check whether SNP is included in gwasnps (used by LOCO)
1574     if (process_gwasnps && gwasnps.count(snp) == 0)
1575       continue;
1576 
1577     // drop missing idv and plug mean values for missing geno
1578     double x_total = 0.0; // sum genotype values to compute x_mean
1579     uint pos = 0;         // position in target vector
1580     uint n_miss = 0;
1581     gsl_vector_set_zero(x_miss);
1582     for (size_t i = 0; i < ni_total; ++i) {
1583       // get the genotypes per individual and compute stats per SNP
1584       if (indicator_idv[i] == 0) // skip individual
1585         continue;
1586 
1587       double geno = gs[i];
1588       if (isnan(geno)) {
1589         gsl_vector_set(x_miss, pos, 1.0);
1590         n_miss++;
1591       } else {
1592         gsl_vector_set(x, pos, geno);
1593         x_total += geno;
1594       }
1595       pos++;
1596     }
1597     enforce(pos == ni_test);
1598 
1599     const double x_mean = x_total/(double)(ni_test - n_miss);
1600 
1601     // plug x_mean back into missing values
1602     for (size_t i = 0; i < ni_test; ++i) {
1603       if (gsl_vector_get(x_miss, i) == 1.0) {
1604         gsl_vector_set(x, i, x_mean);
1605       }
1606     }
1607 
1608     /* this is what below GxE does
1609     for (size_t i = 0; i < ni_test; ++i) {
1610       auto geno = gsl_vector_get(x, i);
1611       if (std::isnan(geno)) {
1612         gsl_vector_set(x, i, x_mean);
1613         geno = x_mean;
1614       }
1615       if (x_mean > 1.0) {
1616         gsl_vector_set(x, i, 2 - geno);
1617       }
1618     }
1619     */
1620     enforce(x->size == ni_test);
1621 
1622     // copy genotype values for SNP into Xlarge cache
1623     gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
1624     gsl_vector_safe_memcpy(&Xlarge_col.vector, x);
1625     c++; // count SNPs going in
1626 
1627     if (c % msize == 0) {
1628       batch_compute(msize);
1629     }
1630   }
1631 
1632   batch_compute(c % msize);
1633   ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1);
1634   // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl;
1635   cout << endl;
1636 
1637   gsl_vector_safe_free(x);
1638   gsl_vector_safe_free(x_miss);
1639   gsl_vector_safe_free(Utx);
1640   gsl_matrix_safe_free(Uab);
1641   gsl_vector_free(ab); // unused
1642 
1643   gsl_matrix_safe_free(Xlarge);
1644   gsl_matrix_safe_free(UtXlarge);
1645 
1646 }
1647 
AnalyzeBimbam(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Uty,const gsl_matrix * W,const gsl_vector * y,const set<string> gwasnps)1648 void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
1649                         const gsl_matrix *UtW, const gsl_vector *Uty,
1650                         const gsl_matrix *W, const gsl_vector *y,
1651                         const set<string> gwasnps) {
1652   debug_msg(file_geno);
1653   auto infilen = file_geno.c_str();
1654 
1655   igzstream infile(infilen, igzstream::in);
1656   enforce_msg(infile, "error reading genotype file");
1657   size_t prev_line = 0;
1658 
1659   std::vector <double> gs;
1660   gs.resize(ni_total);
1661 
1662   // fetch_snp is a callback function for every SNP row
1663   std::function<SnpNameValues(size_t)>  fetch_snp = [&](size_t num) {
1664     string line;
1665     while (prev_line <= num) {
1666       // also read SNPs that were skipped
1667       safeGetline(infile, line);
1668       prev_line++;
1669     }
1670     char *ch_ptr = strtok_safe2((char *)line.c_str(), " , \t",infilen);
1671     // enforce_msg(ch_ptr, "Parsing BIMBAM genofile"); // ch_ptr should not be NULL
1672 
1673     auto snp = string(ch_ptr);
1674     ch_ptr = strtok_safe2(NULL, " , \t",infilen); // skip column
1675     ch_ptr = strtok_safe2(NULL, " , \t",infilen); // skip column
1676 
1677     gs.assign (ni_total,nan("")); // wipe values
1678 
1679     for (size_t i = 0; i < ni_total; ++i) {
1680       ch_ptr = strtok_safe2(NULL, " , \t",infilen);
1681       if (strcmp(ch_ptr, "NA") != 0) {
1682         gs[i] = atof(ch_ptr);
1683         if (is_strict_mode() && gs[i] == 0.0)
1684           enforce_is_float(std::string(ch_ptr)); // only allow for NA and valid numbers
1685       }
1686     }
1687     return std::make_tuple(snp,gs);
1688   };
1689 
1690   LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps);
1691 
1692   infile.close();
1693   infile.clear();
1694 }
1695 
1696 #include "eigenlib.h"
1697 
AnalyzePlink(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Uty,const gsl_matrix * W,const gsl_vector * y,const set<string> gwasnps)1698 void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
1699                        const gsl_matrix *UtW, const gsl_vector *Uty,
1700                        const gsl_matrix *W, const gsl_vector *y,
1701                        const set<string> gwasnps) {
1702   string file_bed = file_bfile + ".bed";
1703   debug_msg(file_bed);
1704 
1705   ifstream infile(file_bed.c_str(), ios::binary);
1706   enforce_msg(infile,"error reading genotype (.bed) file");
1707 
1708   clock_t time_start = clock();
1709 
1710   char ch[1];
1711   bitset<8> b;
1712 
1713   double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
1714   double p_lrt = 0, p_score = 0;
1715   double logl_H1 = 0.0;
1716   int n_bit, n_miss, ci_total, ci_test;
1717   double geno, x_mean;
1718 
1719   // Calculate basic quantities.
1720   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
1721 
1722   gsl_vector *x = gsl_vector_alloc(U->size1);
1723   gsl_vector *Utx = gsl_vector_alloc(U->size2);
1724   gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
1725   gsl_vector *ab = gsl_vector_alloc(n_index);
1726 
1727   // Create a large matrix.
1728   size_t msize = LMM_BATCH_SIZE;
1729   gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
1730   gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
1731   gsl_matrix_set_zero(Xlarge);
1732 
1733   gsl_matrix_set_zero(Uab);
1734   CalcUab(UtW, Uty, Uab);
1735 
1736   // Calculate n_bit and c, the number of bit for each SNP.
1737   if (ni_total % 4 == 0) {
1738     n_bit = ni_total / 4;
1739   } else {
1740     n_bit = ni_total / 4 + 1;
1741   }
1742 
1743   // Print the first three magic numbers.
1744   for (int i = 0; i < 3; ++i) {
1745     infile.read(ch, 1);
1746     b = ch[0];
1747   }
1748 
1749   size_t c = 0, t_last = 0;
1750   for (size_t t = 0; t < snpInfo.size(); ++t) {
1751     if (indicator_snp[t] == 0)
1752       continue;
1753     t_last++;
1754   }
1755   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
1756     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
1757       ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
1758     }
1759     if (indicator_snp[t] == 0) {
1760       continue;
1761     }
1762 
1763     // n_bit, and 3 is the number of magic numbers.
1764     infile.seekg(t * n_bit + 3);
1765 
1766     // Read genotypes.
1767     x_mean = 0.0;
1768     n_miss = 0;
1769     ci_total = 0;
1770     ci_test = 0;
1771     for (int i = 0; i < n_bit; ++i) {
1772       infile.read(ch, 1);
1773       b = ch[0];
1774 
1775       // Minor allele homozygous: 2.0; major: 0.0.
1776       for (size_t j = 0; j < 4; ++j) {
1777         if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
1778           break;
1779         }
1780         if (indicator_idv[ci_total] == 0) {
1781           ci_total++;
1782           continue;
1783         }
1784 
1785         if (b[2 * j] == 0) {
1786           if (b[2 * j + 1] == 0) {
1787             gsl_vector_set(x, ci_test, 2);
1788             x_mean += 2.0;
1789           } else {
1790             gsl_vector_set(x, ci_test, 1);
1791             x_mean += 1.0;
1792           }
1793         } else {
1794           if (b[2 * j + 1] == 1) {
1795             gsl_vector_set(x, ci_test, 0);
1796           } else {
1797             gsl_vector_set(x, ci_test, -9);
1798             n_miss++;
1799           }
1800         }
1801 
1802         ci_total++;
1803         ci_test++;
1804       }
1805     }
1806 
1807     x_mean /= (double)(ni_test - n_miss);
1808 
1809     for (size_t i = 0; i < ni_test; ++i) {
1810       geno = gsl_vector_get(x, i);
1811       if (geno == -9) {
1812         gsl_vector_set(x, i, x_mean);
1813         geno = x_mean;
1814       }
1815     }
1816 
1817     gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
1818     gsl_vector_memcpy(&Xlarge_col.vector, x);
1819     c++;
1820 
1821     if (c % msize == 0 || c == t_last) {
1822       size_t l = 0;
1823       if (c % msize == 0) {
1824         l = msize;
1825       } else {
1826         l = c % msize;
1827       }
1828 
1829       gsl_matrix_view Xlarge_sub =
1830           gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
1831       gsl_matrix_view UtXlarge_sub =
1832           gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
1833 
1834       time_start = clock();
1835       fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
1836                      &UtXlarge_sub.matrix);
1837       time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1838 
1839       gsl_matrix_set_zero(Xlarge);
1840 
1841       for (size_t i = 0; i < l; i++) {
1842         gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
1843         gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
1844 
1845         CalcUab(UtW, Uty, Utx, Uab);
1846 
1847         time_start = clock();
1848         FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
1849 
1850         // 3 is before 1, for beta.
1851         if (a_mode == 3 || a_mode == 4) {
1852           CalcRLScore(l_mle_null, param1, beta, se, p_score);
1853         }
1854 
1855         if (a_mode == 1 || a_mode == 4) {
1856           CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
1857                      logl_H1);
1858           CalcRLWald(lambda_remle, param1, beta, se, p_wald);
1859         }
1860 
1861         if (a_mode == 2 || a_mode == 4) {
1862           CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
1863           p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
1864         }
1865 
1866         time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1867 
1868         // Store summary data.
1869         SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
1870                         p_wald, p_lrt, p_score, logl_H1};
1871         sumStat.push_back(SNPs);
1872       }
1873     }
1874   }
1875   cout << endl;
1876 
1877   gsl_vector_free(x);
1878   gsl_vector_free(Utx);
1879   gsl_matrix_free(Uab);
1880   gsl_vector_free(ab);
1881 
1882   gsl_matrix_free(Xlarge);
1883   gsl_matrix_free(UtXlarge);
1884 
1885   infile.close();
1886   infile.clear();
1887 }
1888 
MatrixCalcLR(const gsl_matrix * U,const gsl_matrix * UtX,const gsl_vector * Uty,const gsl_vector * K_eval,const double l_min,const double l_max,const size_t n_region,vector<pair<size_t,double>> & pos_loglr)1889 void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
1890                   const gsl_vector *Uty, const gsl_vector *K_eval,
1891                   const double l_min, const double l_max, const size_t n_region,
1892                   vector<pair<size_t, double>> &pos_loglr) {
1893   double logl_H0, logl_H1, log_lr, lambda0, lambda1;
1894 
1895   gsl_vector *w = gsl_vector_safe_alloc(Uty->size);
1896   gsl_matrix *Utw = gsl_matrix_safe_alloc(Uty->size, 1);
1897   gsl_matrix *Uab = gsl_matrix_safe_alloc(Uty->size, 6);
1898   gsl_vector *ab = gsl_vector_safe_alloc(6);
1899 
1900   gsl_vector_set_zero(ab);
1901   gsl_vector_set_all(w, 1.0);
1902   gsl_vector_view Utw_col = gsl_matrix_column(Utw, 0);
1903   gsl_blas_dgemv(CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
1904 
1905   CalcUab(Utw, Uty, Uab);
1906   FUNC_PARAM param0 = {true, Uty->size, 1, K_eval, Uab, ab, 0};
1907 
1908   CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0);
1909 
1910   for (size_t i = 0; i < UtX->size2; ++i) {
1911     gsl_vector_const_view UtX_col = gsl_matrix_const_column(UtX, i);
1912     CalcUab(Utw, Uty, &UtX_col.vector, Uab);
1913     FUNC_PARAM param1 = {false, UtX->size1, 1, K_eval, Uab, ab, 0};
1914 
1915     CalcLambda('L', param1, l_min, l_max, n_region, lambda1, logl_H1);
1916     log_lr = logl_H1 - logl_H0;
1917 
1918     pos_loglr.push_back(make_pair(i, log_lr));
1919   }
1920 
1921   gsl_vector_safe_free(w);
1922   gsl_matrix_safe_free(Utw);
1923   gsl_matrix_safe_free(Uab);
1924   gsl_vector_free(ab); // unused
1925 
1926   return;
1927 }
1928 
CalcLambda(const char func_name,FUNC_PARAM & params,const double l_min,const double l_max,const size_t n_region,double & lambda,double & logf)1929 void CalcLambda(const char func_name, FUNC_PARAM &params, const double l_min,
1930                 const double l_max, const size_t n_region, double &lambda,
1931                 double &logf) {
1932   if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
1933       func_name != 'l') {
1934     cout << "func_name only takes 'R' or 'L': 'R' for "
1935          << "log-restricted likelihood, 'L' for log-likelihood." << endl;
1936     return;
1937   }
1938 
1939   vector<pair<double, double>> lambda_lh;
1940 
1941   // Evaluate first-order derivates in different intervals.
1942   double lambda_l, lambda_h,
1943       lambda_interval = safe_log(l_max / l_min) / (double)n_region;
1944   double dev1_l, dev1_h, logf_l, logf_h;
1945 
1946   for (size_t i = 0; i < n_region; ++i) {
1947     lambda_l = l_min * exp(lambda_interval * i);
1948     lambda_h = l_min * exp(lambda_interval * (i + 1.0));
1949 
1950     if (func_name == 'R' || func_name == 'r') { // log-restricted likelihood
1951       dev1_l = LogRL_dev1(lambda_l, &params);
1952       dev1_h = LogRL_dev1(lambda_h, &params);
1953     } else {
1954       dev1_l = LogL_dev1(lambda_l, &params);
1955       dev1_h = LogL_dev1(lambda_h, &params);
1956     }
1957 
1958     if (dev1_l * dev1_h <= 0) {
1959       lambda_lh.push_back(make_pair(lambda_l, lambda_h));
1960     }
1961   }
1962 
1963   // If derivates do not change signs in any interval.
1964   if (lambda_lh.empty()) {
1965     if (func_name == 'R' || func_name == 'r') {
1966       logf_l = LogRL_f(l_min, &params);
1967       logf_h = LogRL_f(l_max, &params);
1968     } else {
1969       logf_l = LogL_f(l_min, &params);
1970       logf_h = LogL_f(l_max, &params);
1971     }
1972 
1973     if (logf_l >= logf_h) {
1974       lambda = l_min;
1975       logf = logf_l;
1976     } else {
1977       lambda = l_max;
1978       logf = logf_h;
1979     }
1980   } else {
1981 
1982     // If derivates change signs.
1983     int status;
1984     int iter = 0, max_iter = 100;
1985     double l, l_temp;
1986 
1987     gsl_function F;
1988     gsl_function_fdf FDF;
1989 
1990     F.params = &params;
1991     FDF.params = &params;
1992 
1993     if (func_name == 'R' || func_name == 'r') {
1994       F.function = &LogRL_dev1;
1995       FDF.f = &LogRL_dev1;
1996       FDF.df = &LogRL_dev2;
1997       FDF.fdf = &LogRL_dev12;
1998     } else {
1999       F.function = &LogL_dev1;
2000       FDF.f = &LogL_dev1;
2001       FDF.df = &LogL_dev2;
2002       FDF.fdf = &LogL_dev12;
2003     }
2004 
2005     const gsl_root_fsolver_type *T_f;
2006     gsl_root_fsolver *s_f;
2007     T_f = gsl_root_fsolver_brent;
2008     s_f = gsl_root_fsolver_alloc(T_f);
2009 
2010     const gsl_root_fdfsolver_type *T_fdf;
2011     gsl_root_fdfsolver *s_fdf;
2012     T_fdf = gsl_root_fdfsolver_newton;
2013     s_fdf = gsl_root_fdfsolver_alloc(T_fdf);
2014 
2015     for (vector<double>::size_type i = 0; i < lambda_lh.size(); ++i) {
2016       lambda_l = lambda_lh[i].first;
2017       lambda_h = lambda_lh[i].second;
2018       gsl_root_fsolver_set(s_f, &F, lambda_l, lambda_h);
2019 
2020       do {
2021         iter++;
2022         status = gsl_root_fsolver_iterate(s_f);
2023         l = gsl_root_fsolver_root(s_f);
2024         lambda_l = gsl_root_fsolver_x_lower(s_f);
2025         lambda_h = gsl_root_fsolver_x_upper(s_f);
2026         status = gsl_root_test_interval(lambda_l, lambda_h, 0, 1e-1);
2027       } while (status == GSL_CONTINUE && iter < max_iter);
2028 
2029       iter = 0;
2030 
2031       gsl_root_fdfsolver_set(s_fdf, &FDF, l);
2032 
2033       do {
2034         iter++;
2035         status = gsl_root_fdfsolver_iterate(s_fdf);
2036         l_temp = l;
2037         l = gsl_root_fdfsolver_root(s_fdf);
2038         status = gsl_root_test_delta(l, l_temp, 0, 1e-5);
2039       } while (status == GSL_CONTINUE && iter < max_iter && l > l_min &&
2040                l < l_max);
2041 
2042       l = l_temp;
2043       if (l < l_min) {
2044         l = l_min;
2045       }
2046       if (l > l_max) {
2047         l = l_max;
2048       }
2049       if (func_name == 'R' || func_name == 'r') {
2050         logf_l = LogRL_f(l, &params);
2051       } else {
2052         logf_l = LogL_f(l, &params);
2053       }
2054 
2055       if (i == 0) {
2056         logf = logf_l;
2057         lambda = l;
2058       } else if (logf < logf_l) {
2059         logf = logf_l;
2060         lambda = l;
2061       } else {
2062       }
2063     }
2064     gsl_root_fsolver_free(s_f);
2065     gsl_root_fdfsolver_free(s_fdf);
2066 
2067     if (func_name == 'R' || func_name == 'r') {
2068       logf_l = LogRL_f(l_min, &params);
2069       logf_h = LogRL_f(l_max, &params);
2070     } else {
2071       logf_l = LogL_f(l_min, &params);
2072       logf_h = LogL_f(l_max, &params);
2073     }
2074 
2075     if (logf_l > logf) {
2076       lambda = l_min;
2077       logf = logf_l;
2078     }
2079     if (logf_h > logf) {
2080       lambda = l_max;
2081       logf = logf_h;
2082     }
2083   }
2084 
2085   return;
2086 }
2087 
2088 // Calculate lambda in the null model.
CalcLambda(const char func_name,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Uty,const double l_min,const double l_max,const size_t n_region,double & lambda,double & logl_H0)2089 void CalcLambda(const char func_name, const gsl_vector *eval,
2090                 const gsl_matrix *UtW, const gsl_vector *Uty,
2091                 const double l_min, const double l_max, const size_t n_region,
2092                 double &lambda, double &logl_H0) {
2093 
2094   write(eval,"eval6");
2095 
2096   if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
2097       func_name != 'l') {
2098     cout << "func_name only takes 'R' or 'L': 'R' for "
2099          << "log-restricted likelihood, 'L' for log-likelihood." << endl;
2100     return;
2101   }
2102 
2103   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
2104   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
2105 
2106   // cout << "n_cvt " << n_cvt << ", ni_test " << ni_test << ", n_index " << n_index << endl;
2107 
2108   gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
2109   gsl_vector *ab = gsl_vector_safe_alloc(n_index);
2110 
2111   gsl_matrix_set_zero(Uab);
2112   write(UtW,"UtW");
2113   write(UtW,"Uty");
2114   CalcUab(UtW, Uty, Uab);
2115   write(Uab,"Uab");
2116   Calcab(UtW, Uty, ab);
2117 
2118   FUNC_PARAM param0 = {true, ni_test, n_cvt, eval, Uab, ab, 0};
2119 
2120   CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0);
2121 
2122   gsl_matrix_safe_free(Uab);
2123   gsl_vector_free(ab); // unused
2124 
2125   return;
2126 }
2127 
2128 // Obtain REMLE estimate for PVE using lambda_remle.
CalcPve(const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Uty,const double lambda,const double trace_G,double & pve,double & pve_se)2129 void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
2130              const gsl_vector *Uty, const double lambda, const double trace_G,
2131              double &pve, double &pve_se) {
2132   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
2133   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
2134 
2135   gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
2136   gsl_vector *ab = gsl_vector_safe_alloc(n_index);
2137 
2138   gsl_matrix_set_zero(Uab);
2139   CalcUab(UtW, Uty, Uab);
2140 
2141   FUNC_PARAM param0 = {true, ni_test, n_cvt, eval, Uab, ab, 0};
2142 
2143   double se = safe_sqrt(-1.0 / LogRL_dev2(lambda, &param0));
2144 
2145   pve = trace_G * lambda / (trace_G * lambda + 1.0);
2146   pve_se = trace_G / ((trace_G * lambda + 1.0) * (trace_G * lambda + 1.0)) * se;
2147 
2148   gsl_matrix_safe_free(Uab);
2149   gsl_vector_free(ab); // unused
2150   return;
2151 }
2152 
2153 // Obtain REML estimate for Vg and Ve using lambda_remle.
2154 // Obtain beta and se(beta) for coefficients.
2155 // ab is not used when e_mode==0.
CalcLmmVgVeBeta(const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Uty,const double lambda,double & vg,double & ve,gsl_vector * beta,gsl_vector * se_beta)2156 void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
2157                      const gsl_vector *Uty, const double lambda, double &vg,
2158                      double &ve, gsl_vector *beta, gsl_vector *se_beta) {
2159   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
2160   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
2161 
2162   gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
2163   gsl_vector *ab = gsl_vector_safe_alloc(n_index);
2164   gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
2165   gsl_vector *Hi_eval = gsl_vector_safe_alloc(eval->size);
2166   gsl_vector *v_temp = gsl_vector_safe_alloc(eval->size);
2167   gsl_matrix *HiW = gsl_matrix_safe_alloc(eval->size, UtW->size2);
2168   gsl_matrix *WHiW = gsl_matrix_safe_alloc(UtW->size2, UtW->size2);
2169   gsl_vector *WHiy = gsl_vector_safe_alloc(UtW->size2);
2170   gsl_matrix *Vbeta = gsl_matrix_safe_alloc(UtW->size2, UtW->size2);
2171 
2172   gsl_matrix_set_zero(Uab);
2173   CalcUab(UtW, Uty, Uab);
2174 
2175   gsl_vector_safe_memcpy(v_temp, eval);
2176   gsl_vector_scale(v_temp, lambda);
2177   gsl_vector_set_all(Hi_eval, 1.0);
2178   gsl_vector_add_constant(v_temp, 1.0);
2179   gsl_vector_div(Hi_eval, v_temp);
2180 
2181   // Calculate beta.
2182   gsl_matrix_safe_memcpy(HiW, UtW);
2183   for (size_t i = 0; i < UtW->size2; i++) {
2184     gsl_vector_view HiW_col = gsl_matrix_column(HiW, i);
2185     gsl_vector_mul(&HiW_col.vector, Hi_eval);
2186   }
2187   fast_dgemm("T", "N", 1.0, HiW, UtW, 0.0, WHiW);
2188   gsl_blas_dgemv(CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
2189 
2190   int sig;
2191   gsl_permutation *pmt = gsl_permutation_alloc(UtW->size2);
2192   LUDecomp(WHiW, pmt, &sig);
2193   LUSolve(WHiW, pmt, WHiy, beta);
2194   LUInvert(WHiW, pmt, Vbeta);
2195 
2196   // Calculate vg and ve.
2197   CalcPab(n_cvt, 0, Hi_eval, Uab, ab, Pab);
2198 
2199   size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
2200   double P_yy = gsl_matrix_safe_get(Pab, n_cvt, index_yy);
2201 
2202   ve = P_yy / (double)(ni_test - n_cvt);
2203   vg = ve * lambda;
2204 
2205   // With ve, calculate se(beta).
2206   gsl_matrix_scale(Vbeta, ve);
2207 
2208   // Obtain se_beta.
2209   for (size_t i = 0; i < Vbeta->size1; i++) {
2210     gsl_vector_set(se_beta, i, safe_sqrt(gsl_matrix_get(Vbeta, i, i)));
2211   }
2212 
2213   gsl_matrix_safe_free(Uab);
2214   gsl_matrix_free(Pab);
2215   gsl_vector_free(ab); // ab is unused
2216   gsl_vector_safe_free(Hi_eval);
2217   gsl_vector_safe_free(v_temp);
2218   gsl_matrix_safe_free(HiW);
2219   gsl_matrix_safe_free(WHiW);
2220   gsl_vector_safe_free(WHiy);
2221   gsl_matrix_safe_free(Vbeta);
2222 
2223   gsl_permutation_free(pmt);
2224   return;
2225 }
2226 
AnalyzeBimbamGXE(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Uty,const gsl_matrix * W,const gsl_vector * y,const gsl_vector * env)2227 void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
2228                            const gsl_matrix *UtW, const gsl_vector *Uty,
2229                            const gsl_matrix *W, const gsl_vector *y,
2230                            const gsl_vector *env) {
2231   debug_msg("entering");
2232   auto infilen = file_gene.c_str();
2233   igzstream infile(infilen, igzstream::in);
2234   if (!infile) {
2235     cout << "error reading genotype file:" << file_geno << endl;
2236     return;
2237   }
2238 
2239   clock_t time_start = clock();
2240 
2241   string line;
2242   char *ch_ptr;
2243 
2244   double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
2245   double p_lrt = 0, p_score = 0;
2246   double logl_H1 = 0.0, logl_H0 = 0.0;
2247   int n_miss, c_phen;
2248   double geno, x_mean;
2249 
2250   // Calculate basic quantities.
2251   size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
2252 
2253   gsl_vector *x = gsl_vector_safe_alloc(U->size1);
2254   gsl_vector *x_miss = gsl_vector_safe_alloc(U->size1);
2255   gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
2256   gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
2257   gsl_vector *ab = gsl_vector_safe_alloc(n_index);
2258 
2259   gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2);
2260   gsl_matrix_view UtW_expand_mat =
2261       gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
2262   gsl_matrix_safe_memcpy(&UtW_expand_mat.matrix, UtW);
2263   gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2);
2264   gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
2265   gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1);
2266 
2267   // Start reading genotypes and analyze.
2268   for (size_t t = 0; t < indicator_snp.size(); ++t) {
2269     safeGetline(infile, line).eof();
2270     if (t % d_pace == 0 || t == (ns_total - 1)) {
2271       ProgressBar("Reading SNPs", t, ns_total - 1);
2272     }
2273     if (indicator_snp[t] == 0) {
2274       continue;
2275     }
2276 
2277     ch_ptr = strtok_safe2((char *)line.c_str(), " , \t",infilen);
2278     ch_ptr = strtok_safe2(NULL, " , \t",infilen);
2279     ch_ptr = strtok_safe2(NULL, " , \t",infilen);
2280 
2281     x_mean = 0.0;
2282     c_phen = 0;
2283     n_miss = 0;
2284     gsl_vector_set_zero(x_miss);
2285     for (size_t i = 0; i < ni_total; ++i) {
2286       ch_ptr = strtok_safe2(NULL, " , \t",infilen);
2287       if (indicator_idv[i] == 0) {
2288         continue;
2289       }
2290 
2291       if (strcmp(ch_ptr, "NA") == 0) {
2292         gsl_vector_set(x_miss, c_phen, 0.0);
2293         n_miss++;
2294       } else {
2295         geno = atof(ch_ptr);
2296 
2297         gsl_vector_set(x, c_phen, geno);
2298         gsl_vector_set(x_miss, c_phen, 1.0);
2299         x_mean += geno;
2300       }
2301       c_phen++;
2302     }
2303 
2304     x_mean /= (double)(ni_test - n_miss);
2305 
2306     for (size_t i = 0; i < ni_test; ++i) {
2307       if (gsl_vector_get(x_miss, i) == 0) {
2308         gsl_vector_set(x, i, x_mean);
2309       }
2310       geno = gsl_vector_get(x, i);
2311       if (x_mean > 1) {
2312         gsl_vector_set(x, i, 2 - geno);
2313       }
2314     }
2315 
2316     // Calculate statistics.
2317     time_start = clock();
2318     gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
2319     gsl_vector_mul(x, env);
2320     gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
2321     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2322 
2323     gsl_matrix_set_zero(Uab);
2324     CalcUab(UtW_expand, Uty, Uab);
2325 
2326     if (a_mode == 2 || a_mode == 4) {
2327       FUNC_PARAM param0 = {true, ni_test, n_cvt + 2, eval, Uab, ab, 0};
2328       CalcLambda('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
2329     }
2330 
2331     CalcUab(UtW_expand, Uty, Utx, Uab);
2332 
2333     time_start = clock();
2334     FUNC_PARAM param1 = {false, ni_test, n_cvt + 2, eval, Uab, ab, 0};
2335 
2336     // 3 is before 1.
2337     if (a_mode == 3 || a_mode == 4) {
2338       CalcRLScore(l_mle_null, param1, beta, se, p_score);
2339     }
2340 
2341     if (a_mode == 1 || a_mode == 4) {
2342       CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
2343       CalcRLWald(lambda_remle, param1, beta, se, p_wald);
2344     }
2345 
2346     if (a_mode == 2 || a_mode == 4) {
2347       CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
2348       p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
2349     }
2350 
2351     if (x_mean > 1) {
2352       beta *= -1;
2353     }
2354 
2355     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2356 
2357     // Store summary data.
2358     SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
2359     sumStat.push_back(SNPs);
2360   }
2361   cout << endl;
2362 
2363   gsl_vector_safe_free(x);
2364   gsl_vector_safe_free(x_miss);
2365   gsl_vector_safe_free(Utx);
2366   gsl_matrix_safe_free(Uab);
2367   gsl_vector_free(ab); // unused
2368 
2369   gsl_matrix_safe_free(UtW_expand);
2370 
2371   infile.close();
2372   infile.clear();
2373 
2374   return;
2375 }
2376 
AnalyzePlinkGXE(const gsl_matrix * U,const gsl_vector * eval,const gsl_matrix * UtW,const gsl_vector * Uty,const gsl_matrix * W,const gsl_vector * y,const gsl_vector * env)2377 void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
2378                           const gsl_matrix *UtW, const gsl_vector *Uty,
2379                           const gsl_matrix *W, const gsl_vector *y,
2380                           const gsl_vector *env) {
2381   string file_bed = file_bfile + ".bed";
2382   debug_msg(file_bed);
2383   ifstream infile(file_bed.c_str(), ios::binary);
2384   if (!infile) {
2385     cout << "error reading bed file:" << file_bed << endl;
2386     return;
2387   }
2388 
2389   clock_t time_start = clock();
2390 
2391   char ch[1];
2392   bitset<8> b;
2393 
2394   double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
2395   double p_lrt = 0, p_score = 0;
2396   double logl_H1 = 0.0, logl_H0 = 0.0;
2397   int n_bit, n_miss, ci_total, ci_test;
2398   double geno, x_mean;
2399 
2400   // Calculate basic quantities.
2401   size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
2402 
2403   gsl_vector *x = gsl_vector_safe_alloc(U->size1);
2404   gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
2405   gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
2406   gsl_vector *ab = gsl_vector_safe_alloc(n_index);
2407 
2408   gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2);
2409   gsl_matrix_view UtW_expand_mat =
2410       gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
2411   gsl_matrix_safe_memcpy(&UtW_expand_mat.matrix, UtW);
2412   gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2);
2413   gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
2414   gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1);
2415 
2416   // Calculate n_bit and c, the number of bit for each SNP.
2417   if (ni_total % 4 == 0) {
2418     n_bit = ni_total / 4;
2419   } else {
2420     n_bit = ni_total / 4 + 1;
2421   }
2422 
2423   // Print the first three magic numbers.
2424   for (int i = 0; i < 3; ++i) {
2425     infile.read(ch, 1);
2426     b = ch[0];
2427   }
2428 
2429   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
2430     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
2431       ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
2432     }
2433     if (indicator_snp[t] == 0) {
2434       continue;
2435     }
2436 
2437     // n_bit, and 3 is the number of magic numbers
2438     infile.seekg(t * n_bit + 3);
2439 
2440     // Read genotypes.
2441     x_mean = 0.0;
2442     n_miss = 0;
2443     ci_total = 0;
2444     ci_test = 0;
2445     for (int i = 0; i < n_bit; ++i) {
2446       infile.read(ch, 1);
2447       b = ch[0];
2448 
2449       // Minor allele homozygous: 2.0; major: 0.0.
2450       for (size_t j = 0; j < 4; ++j) {
2451         if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
2452           break;
2453         }
2454         if (indicator_idv[ci_total] == 0) {
2455           ci_total++;
2456           continue;
2457         }
2458 
2459         if (b[2 * j] == 0) {
2460           if (b[2 * j + 1] == 0) {
2461             gsl_vector_set(x, ci_test, 2);
2462             x_mean += 2.0;
2463           } else {
2464             gsl_vector_set(x, ci_test, 1);
2465             x_mean += 1.0;
2466           }
2467         } else {
2468           if (b[2 * j + 1] == 1) {
2469             gsl_vector_set(x, ci_test, 0);
2470           } else {
2471             gsl_vector_set(x, ci_test, -9);
2472             n_miss++;
2473           }
2474         }
2475 
2476         ci_total++;
2477         ci_test++;
2478       }
2479     }
2480 
2481     x_mean /= (double)(ni_test - n_miss);
2482 
2483     for (size_t i = 0; i < ni_test; ++i) {
2484       geno = gsl_vector_get(x, i);
2485       if (geno == -9) {
2486         gsl_vector_set(x, i, x_mean);
2487         geno = x_mean;
2488       }
2489       if (x_mean > 1) {
2490         gsl_vector_set(x, i, 2 - geno);
2491       }
2492     }
2493 
2494     // Calculate statistics.
2495     time_start = clock();
2496     gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
2497     gsl_vector_mul(x, env);
2498     gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
2499     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2500 
2501     gsl_matrix_set_zero(Uab);
2502     CalcUab(UtW_expand, Uty, Uab);
2503 
2504     if (a_mode == 2 || a_mode == 4) {
2505       FUNC_PARAM param0 = {true, ni_test, n_cvt + 2, eval, Uab, ab, 0};
2506       CalcLambda('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
2507     }
2508 
2509     CalcUab(UtW_expand, Uty, Utx, Uab);
2510 
2511     time_start = clock();
2512     FUNC_PARAM param1 = {false, ni_test, n_cvt + 2, eval, Uab, ab, 0};
2513 
2514     // 3 is before 1, for beta.
2515     if (a_mode == 3 || a_mode == 4) {
2516       CalcRLScore(l_mle_null, param1, beta, se, p_score);
2517     }
2518 
2519     if (a_mode == 1 || a_mode == 4) {
2520       CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
2521       CalcRLWald(lambda_remle, param1, beta, se, p_wald);
2522     }
2523 
2524     if (a_mode == 2 || a_mode == 4) {
2525       CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
2526       p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
2527     }
2528 
2529     if (x_mean > 1) {
2530       beta *= -1;
2531     }
2532 
2533     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2534 
2535     // Store summary data.
2536     SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
2537     sumStat.push_back(SNPs);
2538   }
2539   cout << endl;
2540 
2541   gsl_vector_safe_free(x);
2542   gsl_vector_safe_free(Utx);
2543   gsl_matrix_safe_free(Uab);
2544   gsl_vector_free(ab); // unused
2545 
2546   gsl_matrix_safe_free(UtW_expand);
2547 
2548   infile.close();
2549   infile.clear();
2550 
2551   return;
2552 }
2553