1 /*
2     Genome-wide Efficient Mixed Model Association (GEMMA)
3     Copyright © 2011-2017, Xiang Zhou
4     Copyright © 2017, Peter Carbonetto
5     Copyright © 2017-2018, Pjotr Prins
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <cmath>
22 #include <cstring>
23 #include <ctime>
24 #include <fstream>
25 #include <iostream>
26 #include <string>
27 #include <sys/stat.h>
28 #ifdef OPENBLAS
29 extern "C" {
30   // these functions are defined in cblas.h - but if we include that we
31   // conflicts with other BLAS includes (GSL)
32   int openblas_get_num_threads(void);
33   int openblas_get_parallel(void);
34   char* openblas_get_config(void);
35   char* openblas_get_corename(void);
36 }
37 #else
38 #pragma message "Not compiling with OPENBLAS"
39 #endif
40 
41 #include "gsl/gsl_blas.h"
42 #include "gsl/gsl_cdf.h"
43 #include "gsl/gsl_eigen.h"
44 #include "gsl/gsl_linalg.h"
45 #include "gsl/gsl_matrix.h"
46 #include "gsl/gsl_vector.h"
47 #include "gsl/gsl_version.h"
48 
49 #include "bslmm.h"
50 #include "bslmmdap.h"
51 #include <csignal> // for gsl_error_handler
52 #include "gemma.h"
53 #include "gemma_io.h"
54 #include "lapack.h"
55 #include "ldr.h"
56 #include "lm.h"
57 #include "lmm.h"
58 #include "mathfunc.h"
59 #include "mvlmm.h"
60 #include "prdt.h"
61 #include "varcov.h"
62 #include "vc.h"
63 #include "debug.h"
64 #include "version.h"
65 
66 using namespace std;
67 
GEMMA(void)68 GEMMA::GEMMA(void) : version(GEMMA_VERSION), date(GEMMA_DATE), year(GEMMA_YEAR) {}
69 
gemma_gsl_error_handler(const char * reason,const char * file,int line,int gsl_errno)70 void gemma_gsl_error_handler (const char * reason,
71                               const char * file,
72                               int line, int gsl_errno) {
73   cerr << "GSL ERROR: " << reason << " in " << file
74        << " at line " << line << " errno " << gsl_errno <<endl;
75   std::raise(SIGINT); // keep the stack trace for gdb
76 }
77 
78 //#if defined(OPENBLAS) && !defined(OPENBLAS_LEGACY)
79 //#include <openblas_config.h>
80 //#endif
81 
PrintHeader(void)82 void GEMMA::PrintHeader(void) {
83 
84   cout <<
85     "GEMMA " << version << " (" << date << ") by Xiang Zhou and team (C) 2012-" << year << endl;
86   return;
87 }
88 
PrintLicense(void)89 void GEMMA::PrintLicense(void) {
90   cout << endl;
91   cout << "The Software Is Distributed Under GNU General Public "
92        << "License, But May Also Require The Following Notifications." << endl;
93   cout << endl;
94 
95   cout << "Including Lapack Routines In The Software May Require"
96        << " The Following Notification:" << endl;
97   cout << "Copyright (c) 1992-2010 The University of Tennessee and "
98        << "The University of Tennessee Research Foundation.  All rights "
99        << "reserved." << endl;
100   cout << "Copyright (c) 2000-2010 The University of California "
101        << "Berkeley. All rights reserved." << endl;
102   cout << "Copyright (c) 2006-2010 The University of Colorado Denver. "
103        << "All rights reserved." << endl;
104   cout << endl;
105 
106   cout << "$COPYRIGHT$" << endl;
107   cout << "Additional copyrights may follow" << endl;
108   cout << "$HEADER$" << endl;
109   cout << "Redistribution and use in source and binary forms, with or "
110        << "without modification, are permitted provided that the following "
111        << " conditions are met:" << endl;
112   cout << "- Redistributions of source code must retain the above "
113        << "copyright notice, this list of conditions and the following "
114        << "disclaimer." << endl;
115   cout << "- Redistributions in binary form must reproduce the above "
116        << "copyright notice, this list of conditions and the following "
117        << "disclaimer listed in this license in the documentation and/or "
118        << "other materials provided with the distribution." << endl;
119   cout << "- Neither the name of the copyright holders nor the names "
120        << "of its contributors may be used to endorse or promote products "
121        << "derived from this software without specific prior written "
122        << "permission." << endl;
123   cout << "The copyright holders provide no reassurances that the "
124        << "source code provided does not infringe any patent, copyright, "
125        << "or any other "
126        << "intellectual property rights of third parties. "
127        << "The copyright holders disclaim any liability to any recipient "
128        << "for claims brought against "
129        << "recipient by any third party for infringement of that parties "
130        << "intellectual property rights. " << endl;
131   cout << "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "
132        << "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "
133        << "INCLUDING, BUT NOT "
134        << "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "
135        << "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "
136        << "SHALL THE COPYRIGHT "
137        << "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "
138        << "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "
139        << "(INCLUDING, BUT NOT "
140        << "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "
141        << "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "
142        << "HOWEVER CAUSED AND ON ANY "
143        << "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "
144        << "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "
145        << "OUT OF THE USE "
146        << "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "
147        << "SUCH DAMAGE." << endl;
148   cout << endl;
149 
150   return;
151 }
152 
PrintHelp(size_t option)153 void GEMMA::PrintHelp(size_t option) {
154 
155   if (option == 0) {
156     cout << endl;
157     cout << " type gemma -h [num] for detailed help" << endl;
158     cout << " options: " << endl;
159     cout << "  1: quick guide" << endl;
160     cout << "  2: file I/O related" << endl;
161     cout << "  3: SNP QC" << endl;
162     cout << "  4: calculate relatedness matrix" << endl;
163     cout << "  5: perform eigen decomposition" << endl;
164     cout << "  6: perform variance component estimation" << endl;
165     cout << "  7: fit a linear model" << endl;
166     cout << "  8: fit a linear mixed model" << endl;
167     cout << "  9: fit a multivariate linear mixed model" << endl;
168     cout << " 10: fit a Bayesian sparse linear mixed model" << endl;
169     cout << " 11: obtain predicted values" << endl;
170     cout << " 12: calculate snp variance covariance" << endl;
171     cout << " 13: note" << endl;
172     cout << " 14: debug options" << endl;
173     cout << endl;
174   }
175 
176   if (option == 1) {
177     cout << " QUICK GUIDE" << endl;
178     cout << " to generate a relatedness matrix: " << endl;
179     cout << "         gemma -bfile [prefix] -gk [num] -o [prefix]" << endl;
180     cout << "         gemma -g [filename] -p [filename] -gk [num] -o [prefix]"
181          << endl;
182     cout << " to generate the S matrix: " << endl;
183     cout << "         gemma -bfile [prefix] -gs -o [prefix]" << endl;
184     cout << "         gemma -p [filename] -g [filename] -gs -o [prefix]"
185          << endl;
186     cout << "         gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"
187          << endl;
188     cout << "         gemma -p [filename] -g [filename] -cat [filename] -gs "
189             "-o [prefix]"
190          << endl;
191     cout << "         gemma -bfile [prefix] -sample [num] -gs -o [prefix]"
192          << endl;
193     cout << "         gemma -p [filename] -g [filename] -sample [num] -gs -o "
194             "[prefix]"
195          << endl;
196     cout << " to generate the q vector: " << endl;
197     cout << "         gemma -beta [filename] -gq -o [prefix]" << endl;
198     cout << "         gemma -beta [filename] -cat [filename] -gq -o [prefix]"
199          << endl;
200     cout << " to generate the ldsc weigthts: " << endl;
201     cout << "         gemma -beta [filename] -gw -o [prefix]" << endl;
202     cout << "         gemma -beta [filename] -cat [filename] -gw -o [prefix]"
203          << endl;
204     cout << " to perform eigen decomposition of the relatedness matrix: "
205          << endl;
206     cout << "         gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"
207          << endl;
208     cout << "         gemma -g [filename] -p [filename] -k [filename] -eigen "
209             "-o [prefix]"
210          << endl;
211     cout << " to estimate variance components: " << endl;
212     cout << "         gemma -bfile [prefix] -k [filename] -vc [num] -o "
213             "[prefix]"
214          << endl;
215     cout << "         gemma -p [filename] -k [filename] -vc [num] -o [prefix]"
216          << endl;
217     cout << "         gemma -bfile [prefix] -mk [filename] -vc [num] -o "
218             "[prefix]"
219          << endl;
220     cout
221          << "         gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"
222          << endl;
223     cout << "         gemma -beta [filename] -cor [filename] -vc [num] -o "
224             "[prefix]"
225          << endl;
226     cout << "         gemma -beta [filename] -cor [filename] -cat [filename] "
227             "-vc [num] -o [prefix]"
228          << endl;
229     cout << "         options for the above two commands: -crt -windowbp [num]"
230          << endl;
231     cout << "         gemma -mq [filename] -ms [filename] -mv [filename] -vc "
232             "[num] -o [prefix]"
233          << endl;
234     cout << "         or with summary statistics, replace bfile with mbfile, "
235             "or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"
236          << endl;
237     cout << "         gemma -beta [filename] -bfile [filename] -cat "
238             "[filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"
239          << endl;
240     cout << "         gemma -beta [filename] -bfile [filename] -cat "
241             "[filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"
242          << endl;
243     cout << " to fit a linear mixed model: " << endl;
244     cout << "         gemma -bfile [prefix] -k [filename] -lmm [num] -o "
245             "[prefix]"
246          << endl;
247     cout << "         gemma -g [filename] -p [filename] -a [filename] -k "
248             "[filename] -lmm [num] -o [prefix]"
249          << endl;
250     cout << " to fit a linear mixed model to test g by e effects: " << endl;
251     cout << "         gemma -bfile [prefix] -gxe [filename] -k [filename] "
252             "-lmm [num] -o [prefix]"
253          << endl;
254     cout << "         gemma -g [filename] -p [filename] -a [filename] -gxe "
255             "[filename] -k [filename] -lmm [num] -o [prefix]"
256          << endl;
257     cout << " to fit a univariate linear mixed model with different residual "
258             "weights for different individuals: "
259          << endl;
260     cout << "         gemma -bfile [prefix] -weight [filename] -k [filename] "
261             "-lmm [num] -o [prefix]"
262          << endl;
263     cout << "         gemma -g [filename] -p [filename] -a [filename] "
264             "-weight [filename] -k [filename] -lmm [num] -o [prefix]"
265          << endl;
266     cout << " to fit a multivariate linear mixed model: " << endl;
267     cout << "         gemma -bfile [prefix] -k [filename] -lmm [num] -n "
268             "[pheno cols...] -o [prefix]"
269          << endl;
270     cout << "         gemma -g [filename] -p [filename] -a [filename] -k "
271             "[filename] -lmm [num] -n [pheno cols...] -o [prefix]"
272          << endl;
273     cout << " to fit a Bayesian sparse linear mixed model: " << endl;
274     cout << "         gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl;
275     cout << "         gemma -g [filename] -p [filename] -a [filename] -bslmm "
276             "[num] -o [prefix]"
277          << endl;
278     cout << " to obtain predicted values: " << endl;
279     cout << "         gemma -bfile [prefix] -epm [filename] -emu [filename] "
280             "-ebv [filename] -k [filename] -predict [num] -o [prefix]"
281          << endl;
282     cout << "         gemma -g [filename] -p [filename] -epm [filename] -emu "
283             "[filename] -ebv [filename] -k [filename] -predict [num] -o "
284             "[prefix]"
285          << endl;
286     cout << " to calculate correlations between SNPs: " << endl;
287     cout << "         gemma -bfile [prefix] -calccor -o [prefix]" << endl;
288     cout << "         gemma -g [filename] -p [filename] -calccor -o [prefix]"
289          << endl;
290     cout << endl;
291   }
292 
293   if (option == 2) {
294     cout << " FILE I/O RELATED OPTIONS" << endl;
295     cout << " -bfile    [prefix]       "
296          << " specify input PLINK binary ped file prefix." << endl;
297     cout << "          requires: *.fam, *.bim and *.bed files" << endl;
298     cout << "          missing value: -9" << endl;
299     cout << " -g        [filename]     "
300          << " specify input BIMBAM mean genotype file name" << endl;
301     cout << "          format: rs#1, allele0, allele1, genotype for individual "
302             "1, genotype for individual 2, ..."
303          << endl;
304     cout << "                  rs#2, allele0, allele1, genotype for individual "
305             "1, genotype for individual 2, ..."
306          << endl;
307     cout << "                  ..." << endl;
308     cout << "          missing value: NA" << endl;
309     cout << " -p        [filename]     "
310          << " specify input BIMBAM-style phenotype file name (when used with PLINK .fam phenotypes are ignored)" << endl;
311     cout << "          format: phenotype for individual 1" << endl;
312     cout << "                  phenotype for individual 2" << endl;
313     cout << "                  ..." << endl;
314     cout << "          missing value: NA" << endl;
315     cout << " -a        [filename]     "
316          << " specify input BIMBAM SNP annotation file name (optional)" << endl;
317     cout << "          format: rs#1, base_position, chr_number" << endl;
318     cout << "                  rs#2, base_position, chr_number" << endl;
319     cout << "                  ..." << endl;
320 
321     cout << " -gxe      [filename]     "
322          << " specify input file that contains a column of environmental "
323             "factor for g by e tests"
324          << endl;
325     cout << "          format: variable for individual 1" << endl;
326     cout << "                  variable for individual 2" << endl;
327     cout << "                  ..." << endl;
328     cout << "          missing value: NA" << endl;
329     cout << " -widv   [filename]     "
330          << " weight file contains a column of positive values to be used "
331          << "as weights for residuals---each weight corresponds to an "
332          << "individual, in which a high weight corresponds to high "
333          << "residual error variance for this individual (similar in "
334 	 << "format to phenotype file)"
335          << endl;
336     cout << "          format: variable for individual 1" << endl;
337     cout << "                  variable for individual 2" << endl;
338     cout << "                  ..." << endl;
339     cout << "          missing value: NA" << endl;
340     cout << " -k        [filename]     "
341          << " specify input kinship/relatedness matrix file name" << endl;
342     cout << " -mk       [filename]     "
343          << " specify input file which contains a list of kinship/relatedness "
344             "matrices"
345          << endl;
346     cout << " -u        [filename]     "
347          << " specify input file containing the eigen vectors of the "
348             "kinship/relatedness matrix"
349          << endl;
350     cout << " -d        [filename]     "
351          << " specify input file containing the eigen values of the "
352             "kinship/relatedness matrix"
353          << endl;
354     cout << " -c        [filename]     "
355          << " specify input covariates file name (optional)" << endl;
356     cout << " -cat      [filename]     "
357          << " specify input category file name (optional), which contains rs "
358             "cat1 cat2 ..."
359          << endl;
360     cout << " -beta     [filename]     "
361          << " specify input beta file name (optional), which contains rs beta "
362             "se_beta n_total (or n_mis and n_obs) estimates from a lm model"
363          << endl;
364     cout << " -cor      [filename]     "
365          << " specify input correlation file name (optional), which contains "
366             "rs window_size correlations from snps"
367          << endl;
368     cout << "          missing value: NA" << endl;
369     cout << "          note: the intercept (a column of 1s) may need to be "
370             "included"
371          << endl;
372     cout << " -epm      [filename]     "
373          << " specify input estimated parameter file name" << endl;
374     cout << " -en [n1] [n2] [n3] [n4]  "
375          << " specify values for the input estimated parameter file (with a "
376             "header)"
377          << endl;
378     cout << "          options: n1: rs column number" << endl;
379     cout << "                   n2: estimated alpha column number (0 to ignore)"
380          << endl;
381     cout << "                   n3: estimated beta column number (0 to ignore)"
382          << endl;
383     cout << "                   n4: estimated gamma column number (0 to ignore)"
384          << endl;
385     cout << "          default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if "
386             "-ebv is specified"
387          << endl;
388     cout << " -ebv      [filename]     "
389          << " specify input estimated random effect (breeding value) file name"
390          << endl;
391     cout << "          format: value for individual 1" << endl;
392     cout << "                  value for individual 2" << endl;
393     cout << "                  ..." << endl;
394     cout << "          missing value: NA" << endl;
395     cout << " -emu      [filename]     "
396          << " specify input log file name containing estimated mean" << endl;
397     cout << " -mu       [num]          "
398          << " specify input estimated mean value" << endl;
399     cout << " -gene     [filename]     "
400          << " specify input gene expression file name" << endl;
401     cout << "          format: header" << endl;
402     cout << "                  gene1, count for individual 1, count for "
403             "individual 2, ..."
404          << endl;
405     cout << "                  gene2, count for individual 1, count for "
406             "individual 2, ..."
407          << endl;
408     cout << "                  ..." << endl;
409     cout << "          missing value: not allowed" << endl;
410     cout << " -r        [filename]     "
411          << " specify input total read count file name" << endl;
412     cout << "          format: total read count for individual 1" << endl;
413     cout << "                  total read count for individual 2" << endl;
414     cout << "                  ..." << endl;
415     cout << "          missing value: NA" << endl;
416     cout
417         << " -snps     [filename]     "
418         << " specify input snps file name to only analyze a certain set of snps"
419         << endl;
420     cout << "          format: rs#1" << endl;
421     cout << "                  rs#2" << endl;
422     cout << "                  ..." << endl;
423     cout << "          missing value: NA" << endl;
424     cout << " -silence                 "
425          << " silent terminal display" << endl;
426     cout << " -km       [num]          "
427          << " specify input kinship/relatedness file type (default 1)." << endl;
428     cout << "          options: 1: \"n by n matrix\" format" << endl;
429     cout << "                   2: \"id  id  value\" format" << endl;
430     cout << " -n        [num]          "
431          << " specify phenotype column in the phenotype/*.fam file (optional; "
432             "default 1)"
433          << endl;
434     cout << " -pace     [num]          "
435          << " specify terminal display update pace (default 1,000 SNPs or "
436             "1,000 iterations)."
437          << endl;
438     cout << " -outdir   [path]         "
439          << " specify output directory path (default \"./output/\")" << endl;
440     cout << " -o        [prefix]       "
441          << " specify output file prefix (default \"result\")" << endl;
442     cout << "          output: prefix.cXX.txt or prefix.sXX.txt from "
443             "kinship/relatedness matrix estimation"
444          << endl;
445     cout << "          output: prefix.assoc.txt and prefix.log.txt form "
446             "association tests"
447          << endl;
448     cout << endl;
449   }
450 
451   if (option == 3) {
452     cout << " SNP QC OPTIONS" << endl;
453     cout << " -miss     [num]          "
454          << " specify missingness threshold (default 0.05)" << endl;
455     cout << " -maf      [num]          "
456          << " specify minor allele frequency threshold (default 0.01)" << endl;
457     cout << " -hwe      [num]          "
458          << " specify HWE test p value threshold (default 0; no test)" << endl;
459     cout << " -r2       [num]          "
460          << " specify r-squared threshold (default 0.9999)" << endl;
461     cout << " -notsnp                  "
462          << " minor allele frequency cutoff is not used" << endl;
463     cout << endl;
464   }
465 
466   if (option == 4) {
467     cout << " RELATEDNESS MATRIX (K) CALCULATION OPTIONS" << endl;
468     cout << " -ksnps    [filename]     "
469          << " specify input snps file name to compute K" << endl;
470     cout << " -loco     [chr]          "
471          << " leave one chromosome out (LOCO) by name (requires -a annotation "
472             "file)"
473          << endl;
474     cout << " -a        [filename]     "
475          << " specify input BIMBAM SNP annotation file name (LOCO only)"
476          << endl;
477     cout << " -gk       [num]          "
478          << " specify which type of kinship/relatedness matrix to generate "
479             "(default 1)"
480          << endl;
481     cout << "          options: 1: centered XX^T/p" << endl;
482     cout << "                   2: standardized XX^T/p" << endl;
483     cout << "          note: non-polymorphic SNPs are excluded " << endl;
484     cout << endl;
485   }
486 
487   if (option == 5) {
488     cout << " EIGEN-DECOMPOSITION OPTIONS" << endl;
489     cout << " -eigen                   "
490          << " specify to perform eigen decomposition of the loaded relatedness "
491             "matrix"
492          << endl;
493     cout << endl;
494   }
495 
496   if (option == 6) {
497     cout << " VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
498     cout << " -vc                      "
499          << " specify to perform variance component estimation for the loaded "
500             "relatedness matrix/matrices"
501          << endl;
502     cout
503         << "          options (with kinship file):   1: HE regression (default)"
504         << endl;
505     cout << "                                         2: REML" << endl;
506     cout << "          options (with beta/cor files): 1: Centered genotypes "
507             "(default)"
508          << endl;
509     cout << "                                         2: Standardized genotypes"
510          << endl;
511     cout << "                                         -crt -windowbp [num]"
512          << " specify the window size based on bp (default 1000000; 1Mb)"
513          << endl;
514     cout << "                                         -crt -windowcm [num]"
515          << " specify the window size based on cm (default 0)" << endl;
516     cout << "                                         -crt -windowns [num]"
517          << " specify the window size based on number of snps (default 0)"
518          << endl;
519     cout << endl;
520   }
521 
522   if (option == 7) {
523     cout << " LINEAR MODEL OPTIONS" << endl;
524     cout << " -lm       [num]         "
525          << " specify analysis options (default 1)." << endl;
526     cout << "          options: 1: Wald test" << endl;
527     cout << "                   2: Likelihood ratio test" << endl;
528     cout << "                   3: Score test" << endl;
529     cout << "                   4: 1-3" << endl;
530     cout << endl;
531   }
532 
533   if (option == 8) {
534     cout << " LINEAR MIXED MODEL OPTIONS" << endl;
535     cout << " -lmm      [num]         "
536          << " specify analysis options (default 1)." << endl;
537     cout << "          options: 1: Wald test" << endl;
538     cout << "                   2: Likelihood ratio test" << endl;
539     cout << "                   3: Score test" << endl;
540     cout << "                   4: 1-3" << endl;
541     cout << "                   5: Parameter estimation in the null model only"
542          << endl;
543     cout << " -lmin     [num]          "
544          << " specify minimal value for lambda (default 1e-5)" << endl;
545     cout << " -lmax     [num]          "
546          << " specify maximum value for lambda (default 1e+5)" << endl;
547     cout
548          << " -region   [num]          "
549          << " specify the number of regions used to evaluate lambda (default 10)"
550          << endl;
551     cout << " -loco     [chr]          "
552          << " leave one chromosome out (LOCO) by name (requires -a annotation "
553             "file)"
554          << endl;
555     cout << endl;
556   }
557 
558   if (option == 9) {
559     cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
560     cout << " -n [pheno cols...] - range of phenotypes" << endl;
561     cout << " -pnr				     "
562          << " specify the pvalue threshold to use the Newton-Raphson's method "
563             "(default 0.001)"
564          << endl;
565     cout << " -emi				     "
566          << " specify the maximum number of iterations for the PX-EM method in "
567             "the null (default 10000)"
568          << endl;
569     cout << " -nri				     "
570          << " specify the maximum number of iterations for the "
571             "Newton-Raphson's method in the null (default 100)"
572          << endl;
573     cout << " -emp				     "
574          << " specify the precision for the PX-EM method in the null (default "
575             "0.0001)"
576          << endl;
577     cout << " -nrp				     "
578          << " specify the precision for the Newton-Raphson's method in the "
579             "null (default 0.0001)"
580          << endl;
581     cout << " -crt				     "
582          << " specify to output corrected pvalues for these pvalues that are "
583             "below the -pnr threshold"
584          << endl;
585     cout << endl;
586   }
587 
588   if (option == 10) {
589     cout << " MULTI-LOCUS ANALYSIS OPTIONS" << endl;
590     cout << " -bslmm	  [num]			 "
591          << " specify analysis options (default 1)." << endl;
592     cout << "          options: 1: BSLMM" << endl;
593     cout << "                   2: standard ridge regression/GBLUP (no mcmc)"
594          << endl;
595     cout << "                   3: probit BSLMM (requires 0/1 phenotypes)"
596          << endl;
597     cout
598         << "                   4: BSLMM with DAP for Hyper Parameter Estimation"
599         << endl;
600     cout << "                   5: BSLMM with DAP for Fine Mapping" << endl;
601 
602     cout << " -ldr	  [num]			 "
603          << " specify analysis options (default 1)." << endl;
604     cout << "          options: 1: LDR" << endl;
605 
606     cout << "   MCMC OPTIONS" << endl;
607     cout << "   Prior" << endl;
608     cout << " -hmin     [num]          "
609          << " specify minimum value for h (default 0)" << endl;
610     cout << " -hmax     [num]          "
611          << " specify maximum value for h (default 1)" << endl;
612     cout << " -rmin     [num]          "
613          << " specify minimum value for rho (default 0)" << endl;
614     cout << " -rmax     [num]          "
615          << " specify maximum value for rho (default 1)" << endl;
616     cout << " -pmin     [num]          "
617          << " specify minimum value for log10(pi) (default log10(1/p), where p "
618             "is the number of analyzed SNPs )"
619          << endl;
620     cout << " -pmax     [num]          "
621          << " specify maximum value for log10(pi) (default log10(1) )" << endl;
622     cout << " -smin     [num]          "
623          << " specify minimum value for |gamma| (default 0)" << endl;
624     cout << " -smax     [num]          "
625          << " specify maximum value for |gamma| (default 300)" << endl;
626 
627     cout << "   Proposal" << endl;
628     cout << " -gmean    [num]          "
629          << " specify the mean for the geometric distribution (default: 2000)"
630          << endl;
631     cout << " -hscale   [num]          "
632          << " specify the step size scale for the proposal distribution of h "
633             "(value between 0 and 1, default min(10/sqrt(n),1) )"
634          << endl;
635     cout << " -rscale   [num]          "
636          << " specify the step size scale for the proposal distribution of rho "
637             "(value between 0 and 1, default min(10/sqrt(n),1) )"
638          << endl;
639     cout << " -pscale   [num]          "
640          << " specify the step size scale for the proposal distribution of "
641             "log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )"
642          << endl;
643 
644     cout << "   Others" << endl;
645     cout << " -w        [num]          "
646          << " specify burn-in steps (default 100,000)" << endl;
647     cout << " -s        [num]          "
648          << " specify sampling steps (default 1,000,000)" << endl;
649     cout << " -rpace    [num]          "
650          << " specify recording pace, record one state in every [num] steps "
651             "(default 10)"
652          << endl;
653     cout << " -wpace    [num]          "
654          << " specify writing pace, write values down in every [num] recorded "
655             "steps (default 1000)"
656          << endl;
657     cout << " -seed     [num]          "
658          << " specify random seed (a random seed is generated by default)"
659          << endl;
660     cout << " -mh       [num]          "
661          << " specify number of MH steps in each iteration (default 10)"
662          << endl;
663     cout << "          requires: 0/1 phenotypes and -bslmm 3 option" << endl;
664     cout << endl;
665   }
666 
667   if (option == 11) {
668     cout << " PREDICTION OPTIONS" << endl;
669     cout << " -predict  [num]			 "
670          << " specify prediction options (default 1)." << endl;
671     cout << "          options: 1: predict for individuals with missing "
672             "phenotypes"
673          << endl;
674     cout << "                   2: predict for individuals with missing "
675             "phenotypes, and convert the predicted values to probability "
676             "scale. Use only for files fitted with -bslmm 3 option"
677          << endl;
678     cout << endl;
679   }
680 
681   if (option == 12) {
682     cout << " CALC CORRELATION OPTIONS" << endl;
683     cout << " -calccor       			 " << endl;
684     cout << " -windowbp       [num]            "
685          << " specify the window size based on bp (default 1000000; 1Mb)"
686          << endl;
687     cout << " -windowcm       [num]            "
688          << " specify the window size based on cm (default 0; not used)"
689          << endl;
690     cout << " -windowns       [num]            "
691          << " specify the window size based on number of snps (default 0; not "
692             "used)"
693          << endl;
694     cout << endl;
695   }
696 
697   if (option == 13) {
698     cout << " NOTE" << endl;
699     cout << " 1. Only individuals with non-missing phenotoypes and covariates "
700             "will be analyzed."
701          << endl;
702     cout << " 2. Missing genotoypes will be repalced with the mean genotype of "
703             "that SNP."
704          << endl;
705     cout << " 3. For lmm analysis, memory should be large enough to hold the "
706             "relatedness matrix and to perform eigen decomposition."
707          << endl;
708     cout << " 4. For multivariate lmm analysis, use a large -pnr for each snp "
709             "will increase computation time dramatically."
710          << endl;
711     cout << " 5. For bslmm analysis, in addition to 3, memory should be large "
712             "enough to hold the whole genotype matrix."
713          << endl;
714     cout << endl;
715   }
716 
717   if (option == 14) {
718     cout << " DEBUG OPTIONS" << endl;
719     cout << " -check                   enable checks (slower)" << endl;
720     cout << " -no-fpe-check            disable hardware floating point checking" << endl;
721     cout << " -strict                  strict mode will stop when there is a problem" << endl;
722     cout << " -silence                 silent terminal display" << endl;
723     cout << " -debug                   debug output" << endl;
724     cout << " -debug-data              debug data output" << endl;
725     cout << " -nind       [num]        read up to num individuals" << endl;
726     cout << " -issue      [num]        enable tests relevant to issue tracker" << endl;
727     cout << " -legacy                  run gemma in legacy mode" << endl;
728     cout << endl;
729   }
730 
731   cout << "The GEMMA software is distributed under the GNU General Public v3" << endl;
732   cout << "   -license    show license information" << endl;
733   cout <<
734     "   see also http://www.xzlab.org/software.html, https://github.com/genetics-statistics" << endl;
735   return;
736 }
737 
738 // OPTIONS
739 // -------
740 // gk:      21-22
741 // gs:      25-26
742 // gq:      27-28
743 // eigen:   31-32
744 // lmm:     1-5
745 // bslmm:   11-15
746 // predict: 41-43
747 // lm:      51
748 // vc:      61
749 // ci:      66-67
750 // calccor: 71
751 // gw:      72
752 
Assign(int argc,char ** argv,PARAM & cPar)753 void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
754   string str;
755 
756   for (int i = 1; i < argc; i++) {
757     if (strcmp(argv[i], "-bfile") == 0 || strcmp(argv[i], "--bfile") == 0 ||
758         strcmp(argv[i], "-b") == 0) {
759       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
760         continue;
761       }
762       ++i;
763       str.clear();
764       str.assign(argv[i]);
765       cPar.file_bfile = str;
766     } else if (strcmp(argv[i], "-mbfile") == 0 ||
767                strcmp(argv[i], "--mbfile") == 0 ||
768                strcmp(argv[i], "-mb") == 0) {
769       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
770         continue;
771       }
772       ++i;
773       str.clear();
774       str.assign(argv[i]);
775       cPar.file_mbfile = str;
776     } else if (strcmp(argv[i], "-silence") == 0 || strcmp(argv[i], "--quiet") == 0) {
777       debug_set_quiet_mode(true);
778     } else if (strcmp(argv[i], "-g") == 0) {
779       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
780         continue;
781       }
782       ++i;
783       str.clear();
784       str.assign(argv[i]);
785       cPar.file_geno = str;
786     } else if (strcmp(argv[i], "-mg") == 0) {
787       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
788         continue;
789       }
790       ++i;
791       str.clear();
792       str.assign(argv[i]);
793       cPar.file_mgeno = str;
794     } else if (strcmp(argv[i], "-p") == 0) {
795       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
796         continue;
797       }
798       ++i;
799       str.clear();
800       str.assign(argv[i]);
801       cPar.file_pheno = str;
802     } else if (strcmp(argv[i], "-a") == 0) {
803       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
804         continue;
805       }
806       ++i;
807       str.clear();
808       str.assign(argv[i]);
809       cPar.file_anno = str;
810     } else if (strcmp(argv[i], "-gxe") == 0) {
811       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
812         continue;
813       }
814       ++i;
815       str.clear();
816       str.assign(argv[i]);
817       cPar.file_gxe = str;
818     } else if (strcmp(argv[i], "-widv") == 0) {
819       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
820         continue;
821       }
822       ++i;
823       str.clear();
824       str.assign(argv[i]);
825       cPar.file_weight = str;
826     } else if (strcmp(argv[i], "-wsnp") == 0) {
827       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
828         continue;
829       }
830       ++i;
831       str.clear();
832       str.assign(argv[i]);
833       cPar.file_wsnp = str;
834     } else if (strcmp(argv[i], "-wcat") == 0) {
835       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
836         continue;
837       }
838       ++i;
839       str.clear();
840       str.assign(argv[i]);
841       cPar.file_wcat = str;
842     } else if (strcmp(argv[i], "-k") == 0) {
843       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
844         continue;
845       }
846       ++i;
847       str.clear();
848       str.assign(argv[i]);
849       cPar.file_kin = str;
850     } else if (strcmp(argv[i], "-mk") == 0) {
851       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
852         continue;
853       }
854       ++i;
855       str.clear();
856       str.assign(argv[i]);
857       cPar.file_mk = str;
858     } else if (strcmp(argv[i], "-u") == 0) {
859       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
860         continue;
861       }
862       ++i;
863       str.clear();
864       str.assign(argv[i]);
865       cPar.file_ku = str;
866     } else if (strcmp(argv[i], "-d") == 0) {
867       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
868         continue;
869       }
870       ++i;
871       str.clear();
872       str.assign(argv[i]);
873       cPar.file_kd = str;
874     } else if (strcmp(argv[i], "-c") == 0) {
875       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
876         continue;
877       }
878       ++i;
879       str.clear();
880       str.assign(argv[i]);
881       cPar.file_cvt = str;
882     } else if (strcmp(argv[i], "-cat") == 0) {
883       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
884         continue;
885       }
886       ++i;
887       str.clear();
888       str.assign(argv[i]);
889       cPar.file_cat = str;
890     } else if (strcmp(argv[i], "-mcat") == 0) {
891       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
892         continue;
893       }
894       ++i;
895       str.clear();
896       str.assign(argv[i]);
897       cPar.file_mcat = str;
898     } else if (strcmp(argv[i], "-catc") == 0) {
899       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
900         continue;
901       }
902       ++i;
903       str.clear();
904       str.assign(argv[i]);
905       cPar.file_catc = str;
906     } else if (strcmp(argv[i], "-mcatc") == 0) {
907       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
908         continue;
909       }
910       ++i;
911       str.clear();
912       str.assign(argv[i]);
913       cPar.file_mcatc = str;
914     } else if (strcmp(argv[i], "-beta") == 0) {
915       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
916         continue;
917       }
918       ++i;
919       str.clear();
920       str.assign(argv[i]);
921       cPar.file_beta = str;
922     } else if (strcmp(argv[i], "-bf") == 0) {
923       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
924         continue;
925       }
926       ++i;
927       str.clear();
928       str.assign(argv[i]);
929       cPar.file_bf = str;
930     } else if (strcmp(argv[i], "-hyp") == 0) {
931       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
932         continue;
933       }
934       ++i;
935       str.clear();
936       str.assign(argv[i]);
937       cPar.file_hyp = str;
938     } else if (strcmp(argv[i], "-cor") == 0) {
939       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
940         continue;
941       }
942       ++i;
943       str.clear();
944       str.assign(argv[i]);
945       cPar.file_cor = str;
946     } else if (strcmp(argv[i], "-study") == 0) {
947       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
948         continue;
949       }
950       ++i;
951       str.clear();
952       str.assign(argv[i]);
953       cPar.file_study = str;
954     } else if (strcmp(argv[i], "-ref") == 0) {
955       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
956         continue;
957       }
958       ++i;
959       str.clear();
960       str.assign(argv[i]);
961       cPar.file_ref = str;
962     } else if (strcmp(argv[i], "-mstudy") == 0) {
963       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
964         continue;
965       }
966       ++i;
967       str.clear();
968       str.assign(argv[i]);
969       cPar.file_mstudy = str;
970     } else if (strcmp(argv[i], "-mref") == 0) {
971       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
972         continue;
973       }
974       ++i;
975       str.clear();
976       str.assign(argv[i]);
977       cPar.file_mref = str;
978     } else if (strcmp(argv[i], "-epm") == 0) {
979       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
980         continue;
981       }
982       ++i;
983       str.clear();
984       str.assign(argv[i]);
985       cPar.file_epm = str;
986     } else if (strcmp(argv[i], "-en") == 0) {
987       while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
988         ++i;
989         str.clear();
990         str.assign(argv[i]);
991         cPar.est_column.push_back(atoi(str.c_str()));
992       }
993     } else if (strcmp(argv[i], "-ebv") == 0) {
994       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
995         continue;
996       }
997       ++i;
998       str.clear();
999       str.assign(argv[i]);
1000       cPar.file_ebv = str;
1001     } else if (strcmp(argv[i], "-emu") == 0) {
1002       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1003         continue;
1004       }
1005       ++i;
1006       str.clear();
1007       str.assign(argv[i]);
1008       cPar.file_log = str;
1009     } else if (strcmp(argv[i], "-mu") == 0) {
1010       if (argv[i + 1] == NULL) {
1011         continue;
1012       }
1013       ++i;
1014       str.clear();
1015       str.assign(argv[i]);
1016       cPar.pheno_mean = atof(str.c_str());
1017     } else if (strcmp(argv[i], "-gene") == 0) {
1018       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1019         continue;
1020       }
1021       ++i;
1022       str.clear();
1023       str.assign(argv[i]);
1024       cPar.file_gene = str;
1025     } else if (strcmp(argv[i], "-r") == 0) {
1026       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1027         continue;
1028       }
1029       ++i;
1030       str.clear();
1031       str.assign(argv[i]);
1032       cPar.file_read = str;
1033     } else if (strcmp(argv[i], "-snps") == 0) {
1034       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1035         continue;
1036       }
1037       ++i;
1038       str.clear();
1039       str.assign(argv[i]);
1040       cPar.file_snps = str;
1041     } else if (strcmp(argv[i], "-km") == 0) {
1042       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1043         continue;
1044       }
1045       ++i;
1046       str.clear();
1047       str.assign(argv[i]);
1048       cPar.k_mode = atoi(str.c_str());
1049     } else if (strcmp(argv[i], "-n") == 0) { // set pheno column (list/range)
1050       (cPar.p_column).clear();
1051       while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
1052         ++i;
1053         str.clear();
1054         str.assign(argv[i]);
1055         (cPar.p_column).push_back(atoi(str.c_str()));
1056       }
1057     } else if (strcmp(argv[i], "-pace") == 0) {
1058       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1059         continue;
1060       }
1061       ++i;
1062       str.clear();
1063       str.assign(argv[i]);
1064       cPar.d_pace = atoi(str.c_str());
1065     } else if (strcmp(argv[i], "-outdir") == 0) {
1066       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1067         continue;
1068       }
1069       ++i;
1070       str.clear();
1071       str.assign(argv[i]);
1072       cPar.path_out = str;
1073     } else if (strcmp(argv[i], "-o") == 0) {
1074       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1075         continue;
1076       }
1077       ++i;
1078       str.clear();
1079       str.assign(argv[i]);
1080       cPar.file_out = str;
1081     } else if (strcmp(argv[i], "-miss") == 0) {
1082       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1083         continue;
1084       }
1085       ++i;
1086       str.clear();
1087       str.assign(argv[i]);
1088       cPar.miss_level = atof(str.c_str());
1089     } else if (strcmp(argv[i], "-maf") == 0) {
1090       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1091         continue;
1092       }
1093       ++i;
1094       str.clear();
1095       str.assign(argv[i]);
1096       if (cPar.maf_level != -1) {
1097         cPar.maf_level = atof(str.c_str());
1098       }
1099     } else if (strcmp(argv[i], "-hwe") == 0) {
1100       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1101         continue;
1102       }
1103       ++i;
1104       str.clear();
1105       str.assign(argv[i]);
1106       cPar.hwe_level = atof(str.c_str());
1107     } else if (strcmp(argv[i], "-r2") == 0) {
1108       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1109         continue;
1110       }
1111       ++i;
1112       str.clear();
1113       str.assign(argv[i]);
1114       cPar.r2_level = atof(str.c_str());
1115     } else if (strcmp(argv[i], "-notsnp") == 0) {
1116       cPar.maf_level = -1;
1117     } else if (strcmp(argv[i], "-loco") == 0) {
1118       assert(argv[i + 1]);
1119       ++i;
1120       str.clear();
1121       str.assign(argv[i]);
1122       cPar.loco = str;
1123     } else if (strcmp(argv[i], "-gk") == 0) {
1124       if (cPar.a_mode != 0) {
1125         cPar.error = true;
1126         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1127                 "-predict -calccor options is allowed."
1128              << endl;
1129         break;
1130       }
1131       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1132         cPar.a_mode = M_KIN; // default
1133         continue;
1134       }
1135       ++i;
1136       str.clear();
1137       str.assign(argv[i]);
1138       cPar.a_mode = 20 + atoi(str.c_str());
1139     } else if (strcmp(argv[i], "-gs") == 0) {
1140       if (cPar.a_mode != 0) {
1141         cPar.error = true;
1142         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1143                 "-predict -calccor options is allowed."
1144              << endl;
1145         break;
1146       }
1147       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1148         cPar.a_mode = 25;
1149         continue;
1150       }
1151       ++i;
1152       str.clear();
1153       str.assign(argv[i]);
1154       cPar.a_mode = 24 + atoi(str.c_str());
1155     } else if (strcmp(argv[i], "-gq") == 0) {
1156       if (cPar.a_mode != 0) {
1157         cPar.error = true;
1158         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1159                 "-predict -calccor options is allowed."
1160              << endl;
1161         break;
1162       }
1163       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1164         cPar.a_mode = 27;
1165         continue;
1166       }
1167       ++i;
1168       str.clear();
1169       str.assign(argv[i]);
1170       cPar.a_mode = 26 + atoi(str.c_str());
1171     } else if (strcmp(argv[i], "-gw") == 0) {
1172       if (cPar.a_mode != 0) {
1173         cPar.error = true;
1174         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1175                 "-predict -calccor options is allowed."
1176              << endl;
1177         break;
1178       }
1179       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1180         cPar.a_mode = 72;
1181         continue;
1182       }
1183       ++i;
1184       str.clear();
1185       str.assign(argv[i]);
1186       cPar.a_mode = 71 + atoi(str.c_str());
1187     } else if (strcmp(argv[i], "-sample") == 0) {
1188       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1189         continue;
1190       }
1191       ++i;
1192       str.clear();
1193       str.assign(argv[i]);
1194       cPar.ni_subsample = atoi(str.c_str());
1195     } else if (strcmp(argv[i], "-eigen") == 0) {
1196       if (cPar.a_mode != 0) {
1197         cPar.error = true;
1198         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1199                 "-predict -calccor options is allowed."
1200              << endl;
1201         break;
1202       }
1203       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1204         cPar.a_mode = 31;
1205         continue;
1206       }
1207       ++i;
1208       str.clear();
1209       str.assign(argv[i]);
1210       cPar.a_mode = 30 + atoi(str.c_str());
1211     } else if (strcmp(argv[i], "-calccor") == 0) {
1212       if (cPar.a_mode != 0) {
1213         cPar.error = true;
1214         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1215                 "-predict -calccor options is allowed."
1216              << endl;
1217         break;
1218       }
1219       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1220         cPar.a_mode = 71;
1221         continue;
1222       }
1223       ++i;
1224       str.clear();
1225       str.assign(argv[i]);
1226       cPar.a_mode = 70 + atoi(str.c_str());
1227     } else if (strcmp(argv[i], "-vc") == 0) {
1228       if (cPar.a_mode != 0) {
1229         cPar.error = true;
1230         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1231                 "-predict -calccor options is allowed."
1232              << endl;
1233         break;
1234       }
1235       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1236         cPar.a_mode = 61;
1237         continue;
1238       }
1239       ++i;
1240       str.clear();
1241       str.assign(argv[i]);
1242       cPar.a_mode = 60 + atoi(str.c_str());
1243     } else if (strcmp(argv[i], "-ci") == 0) {
1244       if (cPar.a_mode != 0) {
1245         cPar.error = true;
1246         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1247                 "-predict -calccor options is allowed."
1248              << endl;
1249         break;
1250       }
1251       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1252         cPar.a_mode = 66;
1253         continue;
1254       }
1255       ++i;
1256       str.clear();
1257       str.assign(argv[i]);
1258       cPar.a_mode = 65 + atoi(str.c_str());
1259     } else if (strcmp(argv[i], "-pve") == 0) {
1260       double s = 0;
1261       while (argv[i + 1] != NULL &&
1262              (argv[i + 1][0] != '-' || !isalpha(argv[i + 1][1]))) {
1263         ++i;
1264         str.clear();
1265         str.assign(argv[i]);
1266         cPar.v_pve.push_back(atof(str.c_str()));
1267         s += atof(str.c_str());
1268       }
1269       if (s == 1) {
1270         cout << "summation of pve equals one." << endl;
1271       }
1272     } else if (strcmp(argv[i], "-blocks") == 0) {
1273       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1274         continue;
1275       }
1276       ++i;
1277       str.clear();
1278       str.assign(argv[i]);
1279       cPar.n_block = atoi(str.c_str());
1280     } else if (strcmp(argv[i], "-noconstrain") == 0) {
1281       cPar.noconstrain = true;
1282     } else if (strcmp(argv[i], "-lm") == 0) {
1283       if (cPar.a_mode != 0) {
1284         cPar.error = true;
1285         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1286                 "-predict -calccor options is allowed."
1287              << endl;
1288         break;
1289       }
1290       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1291         cPar.a_mode = 51;
1292         continue;
1293       }
1294       ++i;
1295       str.clear();
1296       str.assign(argv[i]);
1297       cPar.a_mode = 50 + atoi(str.c_str());
1298     } else if (strcmp(argv[i], "-fa") == 0 || strcmp(argv[i], "-lmm") == 0) {
1299       if (cPar.a_mode != 0) {
1300         cPar.error = true;
1301         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1302                 "-predict -calccor options is allowed."
1303              << endl;
1304         break;
1305       }
1306       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1307         cPar.a_mode = 1;
1308         continue;
1309       }
1310       ++i;
1311       str.clear();
1312       str.assign(argv[i]);
1313       cPar.a_mode = atoi(str.c_str());
1314     } else if (strcmp(argv[i], "-lmin") == 0) {
1315       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1316         continue;
1317       }
1318       ++i;
1319       str.clear();
1320       str.assign(argv[i]);
1321       cPar.l_min = atof(str.c_str());
1322     } else if (strcmp(argv[i], "-lmax") == 0) {
1323       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1324         continue;
1325       }
1326       ++i;
1327       str.clear();
1328       str.assign(argv[i]);
1329       cPar.l_max = atof(str.c_str());
1330     } else if (strcmp(argv[i], "-region") == 0) {
1331       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1332         continue;
1333       }
1334       ++i;
1335       str.clear();
1336       str.assign(argv[i]);
1337       cPar.n_region = atoi(str.c_str());
1338     } else if (strcmp(argv[i], "-pnr") == 0) {
1339       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1340         continue;
1341       }
1342       ++i;
1343       str.clear();
1344       str.assign(argv[i]);
1345       cPar.p_nr = atof(str.c_str());
1346     } else if (strcmp(argv[i], "-emi") == 0) {
1347       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1348         continue;
1349       }
1350       ++i;
1351       str.clear();
1352       str.assign(argv[i]);
1353       cPar.em_iter = atoi(str.c_str());
1354     } else if (strcmp(argv[i], "-nri") == 0) {
1355       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1356         continue;
1357       }
1358       ++i;
1359       str.clear();
1360       str.assign(argv[i]);
1361       cPar.nr_iter = atoi(str.c_str());
1362     } else if (strcmp(argv[i], "-nind") == 0) {
1363       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1364         continue;
1365       }
1366       ++i;
1367       str.clear();
1368       str.assign(argv[i]);
1369       cPar.ni_max = atoi(str.c_str()); // for testing purposes
1370       enforce(cPar.ni_max > 0);
1371     } else if (strcmp(argv[i], "-issue") == 0) {
1372       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1373         continue;
1374       }
1375       ++i;
1376       str.clear();
1377       str.assign(argv[i]);
1378       auto issue = atoi(str.c_str()); // for testing purposes
1379       enforce(issue > 0);
1380       debug_set_issue(issue);
1381     } else if (strcmp(argv[i], "-emp") == 0) {
1382       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1383         continue;
1384       }
1385       ++i;
1386       str.clear();
1387       str.assign(argv[i]);
1388       cPar.em_prec = atof(str.c_str());
1389     } else if (strcmp(argv[i], "-nrp") == 0) {
1390       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1391         continue;
1392       }
1393       ++i;
1394       str.clear();
1395       str.assign(argv[i]);
1396       cPar.nr_prec = atof(str.c_str());
1397     } else if (strcmp(argv[i], "-crt") == 0) {
1398       cPar.crt = 1;
1399     } else if (strcmp(argv[i], "-bslmm") == 0) {
1400       if (cPar.a_mode != 0) {
1401         cPar.error = true;
1402         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1403                 "-predict -calccor options is allowed."
1404              << endl;
1405         break;
1406       }
1407       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1408         cPar.a_mode = 11;
1409         continue;
1410       }
1411       ++i;
1412       str.clear();
1413       str.assign(argv[i]);
1414       cPar.a_mode = 10 + atoi(str.c_str());
1415     } else if (strcmp(argv[i], "-hmin") == 0) {
1416       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1417         continue;
1418       }
1419       ++i;
1420       str.clear();
1421       str.assign(argv[i]);
1422       cPar.h_min = atof(str.c_str());
1423     } else if (strcmp(argv[i], "-hmax") == 0) {
1424       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1425         continue;
1426       }
1427       ++i;
1428       str.clear();
1429       str.assign(argv[i]);
1430       cPar.h_max = atof(str.c_str());
1431     } else if (strcmp(argv[i], "-rmin") == 0) {
1432       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1433         continue;
1434       }
1435       ++i;
1436       str.clear();
1437       str.assign(argv[i]);
1438       cPar.rho_min = atof(str.c_str());
1439     } else if (strcmp(argv[i], "-rmax") == 0) {
1440       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1441         continue;
1442       }
1443       ++i;
1444       str.clear();
1445       str.assign(argv[i]);
1446       cPar.rho_max = atof(str.c_str());
1447     } else if (strcmp(argv[i], "-pmin") == 0) {
1448       if (argv[i + 1] == NULL) {
1449         continue;
1450       }
1451       ++i;
1452       str.clear();
1453       str.assign(argv[i]);
1454       cPar.logp_min = atof(str.c_str()) * log(10.0);
1455     } else if (strcmp(argv[i], "-pmax") == 0) {
1456       if (argv[i + 1] == NULL) {
1457         continue;
1458       }
1459       ++i;
1460       str.clear();
1461       str.assign(argv[i]);
1462       cPar.logp_max = atof(str.c_str()) * log(10.0);
1463     } else if (strcmp(argv[i], "-smin") == 0) {
1464       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1465         continue;
1466       }
1467       ++i;
1468       str.clear();
1469       str.assign(argv[i]);
1470       cPar.s_min = atoi(str.c_str());
1471     } else if (strcmp(argv[i], "-smax") == 0) {
1472       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1473         continue;
1474       }
1475       ++i;
1476       str.clear();
1477       str.assign(argv[i]);
1478       cPar.s_max = atoi(str.c_str());
1479     } else if (strcmp(argv[i], "-gmean") == 0) {
1480       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1481         continue;
1482       }
1483       ++i;
1484       str.clear();
1485       str.assign(argv[i]);
1486       cPar.geo_mean = atof(str.c_str());
1487     } else if (strcmp(argv[i], "-hscale") == 0) {
1488       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1489         continue;
1490       }
1491       ++i;
1492       str.clear();
1493       str.assign(argv[i]);
1494       cPar.h_scale = atof(str.c_str());
1495     } else if (strcmp(argv[i], "-rscale") == 0) {
1496       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1497         continue;
1498       }
1499       ++i;
1500       str.clear();
1501       str.assign(argv[i]);
1502       cPar.rho_scale = atof(str.c_str());
1503     } else if (strcmp(argv[i], "-pscale") == 0) {
1504       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1505         continue;
1506       }
1507       ++i;
1508       str.clear();
1509       str.assign(argv[i]);
1510       cPar.logp_scale = atof(str.c_str()) * log(10.0);
1511     } else if (strcmp(argv[i], "-w") == 0) {
1512       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1513         continue;
1514       }
1515       ++i;
1516       str.clear();
1517       str.assign(argv[i]);
1518       cPar.w_step = atoi(str.c_str());
1519     } else if (strcmp(argv[i], "-s") == 0) {
1520       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1521         continue;
1522       }
1523       ++i;
1524       str.clear();
1525       str.assign(argv[i]);
1526       cPar.s_step = atoi(str.c_str());
1527     } else if (strcmp(argv[i], "-rpace") == 0) {
1528       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1529         continue;
1530       }
1531       ++i;
1532       str.clear();
1533       str.assign(argv[i]);
1534       cPar.r_pace = atoi(str.c_str());
1535     } else if (strcmp(argv[i], "-wpace") == 0) {
1536       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1537         continue;
1538       }
1539       ++i;
1540       str.clear();
1541       str.assign(argv[i]);
1542       cPar.w_pace = atoi(str.c_str());
1543     } else if (strcmp(argv[i], "-seed") == 0) {
1544       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1545         continue;
1546       }
1547       ++i;
1548       str.clear();
1549       str.assign(argv[i]);
1550       cPar.randseed = atol(str.c_str());
1551     } else if (strcmp(argv[i], "-mh") == 0) {
1552       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1553         continue;
1554       }
1555       ++i;
1556       str.clear();
1557       str.assign(argv[i]);
1558       cPar.n_mh = atoi(str.c_str());
1559     } else if (strcmp(argv[i], "-predict") == 0) {
1560       if (cPar.a_mode != 0) {
1561         cPar.error = true;
1562         cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1563                 "-predict -calccor options is allowed."
1564              << endl;
1565         break;
1566       }
1567       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1568         cPar.a_mode = 41;
1569         continue;
1570       }
1571       ++i;
1572       str.clear();
1573       str.assign(argv[i]);
1574       cPar.a_mode = 40 + atoi(str.c_str());
1575     } else if (strcmp(argv[i], "-windowcm") == 0) {
1576       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1577         continue;
1578       }
1579       ++i;
1580       str.clear();
1581       str.assign(argv[i]);
1582       cPar.window_cm = atof(str.c_str());
1583     } else if (strcmp(argv[i], "-windowbp") == 0) {
1584       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1585         continue;
1586       }
1587       ++i;
1588       str.clear();
1589       str.assign(argv[i]);
1590       cPar.window_bp = atoi(str.c_str());
1591     } else if (strcmp(argv[i], "-windowns") == 0) {
1592       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1593         continue;
1594       }
1595       ++i;
1596       str.clear();
1597       str.assign(argv[i]);
1598       cPar.window_ns = atoi(str.c_str());
1599     } else if (strcmp(argv[i], "-debug-data") == 0) {
1600       // cPar.mode_debug = true;
1601       debug_set_debug_data_mode(true);
1602       debug_set_debug_mode(true);
1603     } else if (strcmp(argv[i], "-debug") == 0) {
1604       // cPar.mode_debug = true;
1605       debug_set_debug_mode(true);
1606     } else if (strcmp(argv[i], "-check") == 0) {
1607       // cPar.mode_check = false;
1608       debug_set_check_mode(true);
1609     } else if (strcmp(argv[i], "-no-check") == 0) {
1610       // cPar.mode_check = false;
1611       debug_set_no_check_mode(true);
1612     } else if (strcmp(argv[i], "-no-fpe-check") == 0) {
1613       // cPar.mode_check = false;
1614       debug_set_no_fpe_check_mode(true);
1615     } else if (strcmp(argv[i], "-strict") == 0) {
1616       // cPar.mode_strict = true;
1617       debug_set_strict_mode(true);
1618       debug_set_debug_mode(true);
1619     } else if (strcmp(argv[i], "-legacy") == 0) {
1620       debug_set_legacy_mode(true);
1621       warning_msg("you are running in legacy mode - support may drop in future versions of gemma");
1622     } else {
1623       cout << "error! unrecognized option: " << argv[i] << endl;
1624       cPar.error = true;
1625       continue;
1626     }
1627   }
1628 
1629   // Change prediction mode to 43 if the epm file is not provided.
1630   if (cPar.a_mode == 41 && cPar.file_epm.empty()) {
1631     cPar.a_mode = 43;
1632   }
1633 
1634   return;
1635 }
1636 
BatchRun(PARAM & cPar)1637 void GEMMA::BatchRun(PARAM &cPar) {
1638   clock_t time_begin, time_start;
1639   time_begin = clock();
1640 
1641   if (is_check_mode()) enable_segfpe(); // fast NaN checking by default
1642 
1643   // Read Files.
1644   cout << "Reading Files ... " << endl;
1645   cPar.ReadFiles();
1646   if (cPar.error == true) {
1647     cout << "error! fail to read files. " << endl;
1648     return;
1649   }
1650   cPar.CheckData();
1651   if (cPar.error == true) {
1652     cout << "error! fail to check data. " << endl;
1653     return;
1654   }
1655 
1656   // Prediction for bslmm
1657   if (cPar.a_mode == 41 || cPar.a_mode == 42) {
1658     gsl_vector *y_prdt;
1659 
1660     y_prdt = gsl_vector_safe_alloc(cPar.ni_total - cPar.ni_test);
1661 
1662     // set to zero
1663     gsl_vector_set_zero(y_prdt);
1664 
1665     PRDT cPRDT;
1666     cPRDT.CopyFromParam(cPar);
1667 
1668     // add breeding value if needed
1669     if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
1670       cout << "Adding Breeding Values ... " << endl;
1671 
1672       gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
1673       gsl_vector *u_hat = gsl_vector_safe_alloc(cPar.ni_test);
1674 
1675       // read kinship matrix and set u_hat
1676       vector<int> indicator_all;
1677       size_t c_bv = 0;
1678       for (size_t i = 0; i < cPar.indicator_idv.size(); i++) {
1679         indicator_all.push_back(1);
1680         if (cPar.indicator_bv[i] == 1) {
1681           gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]);
1682           c_bv++;
1683         }
1684       }
1685 
1686       ReadFile_kin(cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode,
1687                    cPar.error, G);
1688       if (cPar.error == true) {
1689         cout << "error! fail to read kinship/relatedness file. " << endl;
1690         return;
1691       }
1692 
1693       // read u
1694       cPRDT.AddBV(G, u_hat, y_prdt);
1695 
1696       gsl_matrix_safe_free(G);
1697       gsl_vector_safe_free(u_hat);
1698     }
1699 
1700     // add beta
1701     if (!cPar.file_bfile.empty()) {
1702       cPRDT.AnalyzePlink(y_prdt);
1703     } else {
1704       cPRDT.AnalyzeBimbam(y_prdt);
1705     }
1706 
1707     // add mu
1708     gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
1709 
1710     // convert y to probability if needed
1711     if (cPar.a_mode == 42) {
1712       double d;
1713       for (size_t i = 0; i < y_prdt->size; i++) {
1714         d = gsl_vector_get(y_prdt, i);
1715         d = gsl_cdf_gaussian_P(d, 1.0);
1716         gsl_vector_set(y_prdt, i, d);
1717       }
1718     }
1719 
1720     cPRDT.CopyToParam(cPar);
1721 
1722     cPRDT.WriteFiles(y_prdt);
1723 
1724     gsl_vector_safe_free(y_prdt);
1725   }
1726 
1727   // Prediction with kinship matrix only; for one or more phenotypes
1728   if (cPar.a_mode == 43) {
1729     // first, use individuals with full phenotypes to obtain estimates of Vg and
1730     // Ve
1731     gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
1732     gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
1733     gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
1734     gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
1735     gsl_matrix *UtW = gsl_matrix_safe_alloc(Y->size1, W->size2);
1736     gsl_matrix *UtY = gsl_matrix_safe_alloc(Y->size1, Y->size2);
1737     gsl_vector *eval = gsl_vector_safe_alloc(Y->size1);
1738 
1739     gsl_matrix *Y_full = gsl_matrix_safe_alloc(cPar.ni_cvt, cPar.n_ph);
1740     gsl_matrix *W_full = gsl_matrix_safe_alloc(Y_full->size1, cPar.n_cvt);
1741 
1742     // set covariates matrix W and phenotype matrix Y
1743     // an intercept should be included in W,
1744     cPar.CopyCvtPhen(W, Y, 0);
1745     cPar.CopyCvtPhen(W_full, Y_full, 1);
1746 
1747     gsl_matrix *Y_hat = gsl_matrix_safe_alloc(Y_full->size1, cPar.n_ph);
1748     gsl_matrix *G_full = gsl_matrix_safe_alloc(Y_full->size1, Y_full->size1);
1749     gsl_matrix *H_full = gsl_matrix_safe_alloc(Y_full->size1 * Y_hat->size2,
1750                                           Y_full->size1 * Y_hat->size2);
1751 
1752     // read relatedness matrix G, and matrix G_full
1753     ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode,
1754                  cPar.error, G);
1755     if (cPar.error == true) {
1756       cout << "error! fail to read kinship/relatedness file. " << endl;
1757       return;
1758     }
1759     // This is not so elegant. Reads twice to select on idv and then cvt
1760     ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
1761                  cPar.error, G_full);
1762     if (cPar.error == true) {
1763       cout << "error! fail to read kinship/relatedness file. " << endl;
1764       return;
1765     }
1766 
1767     // center matrix G
1768     CenterMatrix(G);
1769     CenterMatrix(G_full);
1770     validate_K(G);
1771 
1772     // eigen-decomposition and calculate trace_G
1773     cout << "Start Eigen-Decomposition..." << endl;
1774     time_start = clock();
1775     cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
1776     // write(eval,"eval zeroed");
1777     cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1778 
1779     // calculate UtW and Uty
1780     CalcUtX(U, W, UtW);
1781     CalcUtX(U, Y, UtY);
1782 
1783     // calculate variance component and beta estimates
1784     // and then obtain predicted values
1785     if (cPar.n_ph == 1) {
1786       gsl_vector *beta = gsl_vector_safe_alloc(W->size2);
1787       gsl_vector *se_beta = gsl_vector_safe_alloc(W->size2);
1788 
1789       double lambda, logl, vg, ve;
1790       gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
1791 
1792       // obtain estimates
1793       CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
1794                  cPar.n_region, lambda, logl);
1795       CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, lambda, vg, ve, beta,
1796                       se_beta);
1797 
1798       cout << "REMLE estimate for vg in the null model = " << vg << endl;
1799       cout << "REMLE estimate for ve in the null model = " << ve << endl;
1800       cPar.vg_remle_null = vg;
1801       cPar.ve_remle_null = ve;
1802 
1803       // obtain Y_hat from fixed effects
1804       gsl_vector_view Yhat_col = gsl_matrix_column(Y_hat, 0);
1805       gsl_blas_dgemv(CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
1806 
1807       // obtain H
1808       gsl_matrix_set_identity(H_full);
1809       gsl_matrix_scale(H_full, ve);
1810       gsl_matrix_scale(G_full, vg);
1811       gsl_matrix_add(H_full, G_full);
1812 
1813       // free matrices
1814       gsl_vector_safe_free(beta);
1815       gsl_vector_safe_free(se_beta);
1816     } else {
1817       gsl_matrix *Vg = gsl_matrix_safe_alloc(cPar.n_ph, cPar.n_ph);
1818       gsl_matrix *Ve = gsl_matrix_safe_alloc(cPar.n_ph, cPar.n_ph);
1819       gsl_matrix *B = gsl_matrix_safe_alloc(cPar.n_ph, W->size2);
1820       gsl_matrix *se_B = gsl_matrix_safe_alloc(cPar.n_ph, W->size2);
1821 
1822       // obtain estimates
1823       CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter,
1824                         cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max,
1825                         cPar.n_region, Vg, Ve, B, se_B);
1826 
1827       cout << "REMLE estimate for Vg in the null model: " << endl;
1828       for (size_t i = 0; i < Vg->size1; i++) {
1829         for (size_t j = 0; j <= i; j++) {
1830           cout << tab(j) << gsl_matrix_get(Vg, i, j);
1831         }
1832         cout << endl;
1833       }
1834       cout << "REMLE estimate for Ve in the null model: " << endl;
1835       for (size_t i = 0; i < Ve->size1; i++) {
1836         for (size_t j = 0; j <= i; j++) {
1837           cout << tab(j) << gsl_matrix_get(Ve, i, j);
1838         }
1839         cout << endl;
1840       }
1841       cPar.Vg_remle_null.clear();
1842       cPar.Ve_remle_null.clear();
1843       for (size_t i = 0; i < Vg->size1; i++) {
1844         for (size_t j = i; j < Vg->size2; j++) {
1845           cPar.Vg_remle_null.push_back(gsl_matrix_get(Vg, i, j));
1846           cPar.Ve_remle_null.push_back(gsl_matrix_get(Ve, i, j));
1847         }
1848       }
1849 
1850       // obtain Y_hat from fixed effects
1851       gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
1852 
1853       // obtain H
1854       KroneckerSym(G_full, Vg, H_full);
1855       for (size_t i = 0; i < G_full->size1; i++) {
1856         gsl_matrix_view H_sub = gsl_matrix_submatrix(
1857             H_full, i * Ve->size1, i * Ve->size2, Ve->size1, Ve->size2);
1858         gsl_matrix_add(&H_sub.matrix, Ve);
1859       }
1860 
1861       // free matrices
1862       gsl_matrix_safe_free(Vg);
1863       gsl_matrix_safe_free(Ve);
1864       gsl_matrix_safe_free(B);
1865       gsl_matrix_safe_free(se_B);
1866     }
1867 
1868     PRDT cPRDT;
1869 
1870     cPRDT.CopyFromParam(cPar);
1871 
1872     cout << "Predicting Missing Phentypes ... " << endl;
1873     time_start = clock();
1874     cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
1875     cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1876 
1877     cPRDT.WriteFiles(Y_full);
1878 
1879     gsl_matrix_safe_free(Y);
1880     gsl_matrix_safe_free(W);
1881     gsl_matrix_safe_free(G);
1882     gsl_matrix_safe_free(U);
1883     gsl_matrix_safe_free(UtW);
1884     gsl_matrix_safe_free(UtY);
1885     gsl_vector_safe_free(eval);
1886 
1887     gsl_matrix_safe_free(Y_full);
1888     gsl_matrix_safe_free(Y_hat);
1889     gsl_matrix_safe_free(W_full);
1890     gsl_matrix_safe_free(G_full);
1891     gsl_matrix_safe_free(H_full);
1892   }
1893 
1894   // Generate Kinship matrix (optionally using LOCO)
1895   if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2) {
1896     cout << "Calculating Relatedness Matrix ... " << endl;
1897 
1898     gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
1899     enforce_msg(G, "allocate G"); // just to be sure
1900 
1901     time_start = clock();
1902 
1903     cPar.CalcKin(G);
1904 
1905     cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1906     if (cPar.error == true) {
1907       cout << "error! fail to calculate relatedness matrix. " << endl;
1908       return;
1909     }
1910 
1911     // Now we have the Kinship matrix test it
1912     validate_K(G);
1913 
1914     if (cPar.a_mode == M_KIN) {
1915       cPar.WriteMatrix(G, "cXX");
1916     } else { // M_KIN2
1917       cPar.WriteMatrix(G, "sXX");
1918     }
1919 
1920     gsl_matrix_safe_free(G);
1921   }
1922 
1923   // Compute the LDSC weights (not implemented yet)
1924   if (cPar.a_mode == 72) {
1925     cout << "Calculating Weights ... " << endl;
1926 
1927     VARCOV cVarcov;
1928     cVarcov.CopyFromParam(cPar);
1929 
1930     if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
1931 
1932     if (!cPar.file_bfile.empty()) {
1933       cVarcov.AnalyzePlink();
1934     } else {
1935       cVarcov.AnalyzeBimbam();
1936     }
1937 
1938     cVarcov.CopyToParam(cPar);
1939   }
1940 
1941   // Compute the S matrix (and its variance), that is used for
1942   // variance component estimation using summary statistics.
1943   if (cPar.a_mode == 25 || cPar.a_mode == 26) {
1944     cout << "Calculating the S Matrix ... " << endl;
1945 
1946     gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc * 2, cPar.n_vc);
1947     gsl_vector *ns = gsl_vector_safe_alloc(cPar.n_vc + 1);
1948     gsl_matrix_set_zero(S);
1949     gsl_vector_set_zero(ns);
1950 
1951     gsl_matrix_view S_mat = gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
1952     gsl_matrix_view Svar_mat =
1953         gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
1954     gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc);
1955 
1956     gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
1957     gsl_matrix *A = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
1958     gsl_matrix_set_zero(K);
1959     gsl_matrix_set_zero(A);
1960 
1961     gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
1962     gsl_matrix *W = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_cvt);
1963 
1964     cPar.CopyCvtPhen(W, y, 0);
1965 
1966     set<string> setSnps_beta;
1967     map<string, double> mapRS2wA, mapRS2wK;
1968 
1969     cPar.ObtainWeight(setSnps_beta, mapRS2wK);
1970 
1971     time_start = clock();
1972     cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
1973                &ns_vec.vector);
1974     cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1975     if (cPar.error == true) {
1976       cout << "error! fail to calculate the S matrix. " << endl;
1977       return;
1978     }
1979 
1980     gsl_vector_set(ns, cPar.n_vc, cPar.ni_test);
1981 
1982     cPar.WriteMatrix(S, "S");
1983     cPar.WriteVector(ns, "size");
1984     cPar.WriteVar("snps");
1985 
1986     gsl_matrix_safe_free(S);
1987     gsl_vector_safe_free(ns);
1988 
1989     gsl_matrix_safe_free(A);
1990     gsl_matrix_safe_free(K);
1991 
1992     gsl_vector_safe_free(y);
1993     gsl_matrix_safe_free(K);
1994   }
1995 
1996   // Compute the q vector, that is used for variance component estimation using
1997   // summary statistics
1998   if (cPar.a_mode == 27 || cPar.a_mode == 28) {
1999     gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2000     gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
2001     gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
2002     gsl_vector_set_zero(q);
2003     gsl_vector_set_zero(s);
2004 
2005     gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
2006 
2007     vector<size_t> vec_cat, vec_ni;
2008     vector<double> vec_weight, vec_z2;
2009     map<string, double> mapRS2weight;
2010     mapRS2weight.clear();
2011 
2012     time_start = clock();
2013     ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni,
2014                   vec_weight, vec_z2, cPar.ni_total, cPar.ns_total,
2015                   cPar.ns_test);
2016     cout << "## number of total individuals = " << cPar.ni_total << endl;
2017     cout << "## number of total SNPs/var = " << cPar.ns_total << endl;
2018     cout << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
2019     cout << "## number of variance components = " << cPar.n_vc << endl;
2020     cout << "Calculating the q vector ... " << endl;
2021     Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
2022           &s_vec.vector);
2023     cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2024 
2025     if (cPar.error == true) {
2026       cout << "error! fail to calculate the q vector. " << endl;
2027       return;
2028     }
2029 
2030     gsl_vector_set(s, cPar.n_vc, cPar.ni_total);
2031 
2032     cPar.WriteMatrix(Vq, "Vq");
2033     cPar.WriteVector(q, "q");
2034     cPar.WriteVector(s, "size");
2035     gsl_matrix_safe_free(Vq);
2036     gsl_vector_safe_free(q);
2037     gsl_vector_safe_free(s);
2038   }
2039 
2040   // Calculate SNP covariance.
2041   if (cPar.a_mode == 71) {
2042     VARCOV cVarcov;
2043     cVarcov.CopyFromParam(cPar);
2044 
2045     if (is_check_mode()) disable_segfpe(); // fast NaN checking for now
2046 
2047     if (!cPar.file_bfile.empty()) {
2048       cVarcov.AnalyzePlink();
2049     } else {
2050       cVarcov.AnalyzeBimbam();
2051     }
2052 
2053     cVarcov.CopyToParam(cPar);
2054   }
2055 
2056   // LM.
2057   if (cPar.a_mode == 51 || cPar.a_mode == 52 || cPar.a_mode == 53 ||
2058       cPar.a_mode == 54) { // Fit LM
2059     gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
2060     gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
2061 
2062     // set covariates matrix W and phenotype matrix Y
2063     // an intercept should be included in W,
2064     cPar.CopyCvtPhen(W, Y, 0);
2065 
2066     // Fit LM or mvLM
2067     if (cPar.n_ph == 1) {
2068       LM cLm;
2069       cLm.CopyFromParam(cPar);
2070 
2071       gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
2072 
2073       // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
2074 
2075       if (!cPar.file_gene.empty()) {
2076         cLm.AnalyzeGene(W,
2077                         &Y_col.vector); // y is the predictor, not the phenotype
2078       } else if (!cPar.file_bfile.empty()) {
2079         cLm.AnalyzePlink(W, &Y_col.vector);
2080       } else {
2081         cLm.AnalyzeBimbam(W, &Y_col.vector);
2082       }
2083 
2084       cLm.WriteFiles();
2085       cLm.CopyToParam(cPar);
2086     }
2087     // release all matrices and vectors
2088     gsl_matrix_safe_free(Y);
2089     gsl_matrix_safe_free(W);
2090   }
2091 
2092   // VC estimation with one or multiple kinship matrices
2093   // REML approach only
2094   // if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already,
2095   // in param.cpp
2096   // for one phenotype only;
2097   if (cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) {
2098     if (!cPar.file_beta.empty()) {
2099       // need to obtain a common set of SNPs between beta file and the genotype
2100       // file; these are saved in mapRS2wA and mapRS2wK
2101       // normalize the weight in mapRS2wK to have an average of one; each
2102       // element of mapRS2wA is 1
2103       // update indicator_snps, so that the numbers are in accordance with
2104       // mapRS2wK
2105       set<string> setSnps_beta;
2106       ReadFile_snps_header(cPar.file_beta, setSnps_beta);
2107 
2108       map<string, double> mapRS2wA, mapRS2wK;
2109       cPar.ObtainWeight(setSnps_beta, mapRS2wK);
2110 
2111       cPar.UpdateSNP(mapRS2wK);
2112 
2113       // Setup matrices and vectors.
2114       gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc * 2, cPar.n_vc);
2115       gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2116       gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
2117       gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
2118 
2119       gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
2120       gsl_matrix *A = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
2121 
2122       gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
2123       gsl_matrix *W = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_cvt);
2124 
2125       gsl_matrix_set_zero(K);
2126       gsl_matrix_set_zero(A);
2127 
2128       gsl_matrix_set_zero(S);
2129       gsl_matrix_set_zero(Vq);
2130       gsl_vector_set_zero(q);
2131       gsl_vector_set_zero(s);
2132 
2133       cPar.CopyCvtPhen(W, y, 0);
2134 
2135       gsl_matrix_view S_mat =
2136           gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
2137       gsl_matrix_view Svar_mat =
2138           gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
2139       gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
2140 
2141       vector<size_t> vec_cat, vec_ni;
2142       vector<double> vec_weight, vec_z2;
2143 
2144       // read beta, based on the mapRS2wK
2145       ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni,
2146                     vec_weight, vec_z2, cPar.ni_study, cPar.ns_study,
2147                     cPar.ns_test);
2148 
2149       cout << "Study Panel: " << endl;
2150       cout << "## number of total individuals = " << cPar.ni_study << endl;
2151       cout << "## number of total SNPs/var = " << cPar.ns_study << endl;
2152       cout << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
2153       cout << "## number of variance components = " << cPar.n_vc << endl;
2154 
2155       // compute q
2156       Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
2157             &s_vec.vector);
2158 
2159       // compute S
2160       time_start = clock();
2161       cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
2162                  &s_vec.vector);
2163       cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2164       if (cPar.error == true) {
2165         cout << "error! fail to calculate the S matrix. " << endl;
2166         return;
2167       }
2168 
2169       // compute vc estimates
2170       CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
2171                cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
2172                cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
2173                cPar.v_enrich, cPar.v_se_enrich);
2174 
2175       assert(!has_nan(cPar.v_se_pve));
2176 
2177       // if LDSC weights, then compute the weights and run the above steps again
2178       if (cPar.a_mode == 62) {
2179         // compute the weights and normalize the weights for A
2180         cPar.UpdateWeight(1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
2181 
2182         // read beta file again, and update weigths vector
2183         ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni,
2184                       vec_weight, vec_z2, cPar.ni_study, cPar.ns_total,
2185                       cPar.ns_test);
2186 
2187         // compute q
2188         Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
2189               &s_vec.vector);
2190 
2191         // compute S
2192         time_start = clock();
2193         cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
2194                    &s_vec.vector);
2195         cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2196         if (cPar.error == true) {
2197           cout << "error! fail to calculate the S matrix. " << endl;
2198           return;
2199         }
2200 
2201         // compute vc estimates
2202         CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
2203                  cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
2204                  cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
2205                  cPar.v_enrich, cPar.v_se_enrich);
2206         assert(!has_nan(cPar.v_se_pve));
2207       }
2208 
2209 
2210       gsl_vector_set(s, cPar.n_vc, cPar.ni_test);
2211 
2212       cPar.WriteMatrix(S, "S");
2213       cPar.WriteMatrix(Vq, "Vq");
2214       cPar.WriteVector(q, "q");
2215       cPar.WriteVector(s, "size");
2216 
2217       gsl_matrix_safe_free(S);
2218       gsl_matrix_safe_free(Vq);
2219       gsl_vector_safe_free(q);
2220       gsl_vector_safe_free(s);
2221 
2222       gsl_matrix_safe_free(A);
2223       gsl_matrix_safe_free(K);
2224       gsl_vector_safe_free(y);
2225       gsl_matrix_safe_free(W);
2226     } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
2227       if (!cPar.file_study.empty()) {
2228         string sfile = cPar.file_study + ".size.txt";
2229         CountFileLines(sfile, cPar.n_vc);
2230       } else {
2231         string file_name;
2232         igzstream infile(cPar.file_mstudy.c_str(), igzstream::in);
2233         if (!infile) {
2234           cout << "error! fail to open mstudy file: " << cPar.file_study
2235                << endl;
2236           return;
2237         }
2238 
2239         safeGetline(infile, file_name);
2240 
2241         infile.clear();
2242         infile.close();
2243 
2244         string sfile = file_name + ".size.txt";
2245         CountFileLines(sfile, cPar.n_vc);
2246       }
2247 
2248       cPar.n_vc = cPar.n_vc - 1;
2249 
2250       gsl_matrix *S = gsl_matrix_safe_alloc(2 * cPar.n_vc, cPar.n_vc);
2251       gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2252       // gsl_matrix *V=gsl_matrix_safe_alloc (cPar.n_vc+1,
2253       // (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
2254       // gsl_matrix *Vslope=gsl_matrix_safe_alloc (n_lines+1,
2255       // (n_lines*(n_lines+1))/2*(n_lines+1) );
2256       gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
2257       gsl_vector *s_study = gsl_vector_safe_alloc(cPar.n_vc);
2258       gsl_vector *s_ref = gsl_vector_safe_alloc(cPar.n_vc);
2259       gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
2260 
2261       gsl_matrix_set_zero(S);
2262       gsl_matrix_view S_mat =
2263           gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
2264       gsl_matrix_view Svar_mat =
2265           gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
2266 
2267       gsl_matrix_set_zero(Vq);
2268       // gsl_matrix_set_zero(V);
2269       // gsl_matrix_set_zero(Vslope);
2270       gsl_vector_set_zero(q);
2271       gsl_vector_set_zero(s_study);
2272       gsl_vector_set_zero(s_ref);
2273 
2274       if (!cPar.file_study.empty()) {
2275         ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
2276       } else {
2277         ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
2278       }
2279 
2280       if (!cPar.file_ref.empty()) {
2281         ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
2282                      cPar.ni_ref);
2283       } else {
2284         ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
2285                       cPar.ni_ref);
2286       }
2287 
2288       cout << "## number of variance components = " << cPar.n_vc << endl;
2289       cout << "## number of individuals in the sample = " << cPar.ni_study
2290            << endl;
2291       cout << "## number of individuals in the reference = " << cPar.ni_ref
2292            << endl;
2293 
2294       CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study,
2295                cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total,
2296                cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich,
2297                cPar.v_se_enrich);
2298       assert(!has_nan(cPar.v_se_pve));
2299 
2300       gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
2301       gsl_vector_safe_memcpy(&s_sub.vector, s_ref);
2302       gsl_vector_set(s, cPar.n_vc, cPar.ni_ref);
2303 
2304       cPar.WriteMatrix(S, "S");
2305       cPar.WriteMatrix(Vq, "Vq");
2306       cPar.WriteVector(q, "q");
2307       cPar.WriteVector(s, "size");
2308 
2309       gsl_matrix_safe_free(S);
2310       gsl_matrix_safe_free(Vq);
2311       // gsl_matrix_safe_free (V);
2312       // gsl_matrix_safe_free (Vslope);
2313       gsl_vector_safe_free(q);
2314       gsl_vector_safe_free(s_study);
2315       gsl_vector_safe_free(s_ref);
2316       gsl_vector_safe_free(s);
2317     } else {
2318       gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
2319       gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
2320       gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1 * cPar.n_vc);
2321 
2322       // set covariates matrix W and phenotype matrix Y
2323       // an intercept should be included in W,
2324       cPar.CopyCvtPhen(W, Y, 0);
2325 
2326       // read kinship matrices
2327       if (!(cPar.file_mk).empty()) {
2328         ReadFile_mk(cPar.file_mk, cPar.indicator_idv, cPar.mapID2num,
2329                     cPar.k_mode, cPar.error, G);
2330         if (cPar.error == true) {
2331           cout << "error! fail to read kinship/relatedness file. " << endl;
2332           return;
2333         }
2334 
2335         // center matrix G, and obtain v_traceG
2336         double d = 0;
2337         (cPar.v_traceG).clear();
2338         for (size_t i = 0; i < cPar.n_vc; i++) {
2339           gsl_matrix_view G_sub =
2340               gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1);
2341           CenterMatrix(&G_sub.matrix);
2342           d = 0;
2343           for (size_t j = 0; j < G->size1; j++) {
2344             d += gsl_matrix_get(&G_sub.matrix, j, j);
2345           }
2346           d /= (double)G->size1;
2347           (cPar.v_traceG).push_back(d);
2348         }
2349       } else if (!(cPar.file_kin).empty()) {
2350         ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
2351                      cPar.k_mode, cPar.error, G);
2352         if (cPar.error == true) {
2353           cout << "error! fail to read kinship/relatedness file. " << endl;
2354           return;
2355         }
2356 
2357         // center matrix G
2358         CenterMatrix(G);
2359         validate_K(G);
2360 
2361         (cPar.v_traceG).clear();
2362         double d = 0;
2363         for (size_t j = 0; j < G->size1; j++) {
2364           d += gsl_matrix_get(G, j, j);
2365         }
2366         d /= (double)G->size1;
2367         (cPar.v_traceG).push_back(d);
2368       }
2369       // fit multiple variance components
2370       if (cPar.n_ph == 1) {
2371         //		  if (cPar.n_vc==1) {
2372         //		  } else {
2373         gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
2374         VC cVc;
2375         cVc.CopyFromParam(cPar);
2376         if (cPar.a_mode == 61) {
2377           cVc.CalcVChe(G, W, &Y_col.vector);
2378         } else if (cPar.a_mode == 62) {
2379           cVc.CalcVCreml(cPar.noconstrain, G, W, &Y_col.vector);
2380         } else {
2381           cVc.CalcVCacl(G, W, &Y_col.vector);
2382         }
2383         cVc.CopyToParam(cPar);
2384         // obtain pve from sigma2
2385         // obtain se_pve from se_sigma2
2386 
2387         //}
2388       }
2389     }
2390   }
2391 
2392   // compute confidence intervals with additional summary statistics
2393   // we do not check the sign of z-scores here, but they have to be matched with
2394   // the genotypes
2395   if (cPar.a_mode == 66 || cPar.a_mode == 67) {
2396     // read reference file first
2397     gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2398     gsl_matrix *Svar = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2399     gsl_vector *s_ref = gsl_vector_safe_alloc(cPar.n_vc);
2400 
2401     gsl_matrix_set_zero(S);
2402     gsl_matrix_set_zero(Svar);
2403     gsl_vector_set_zero(s_ref);
2404 
2405     if (!cPar.file_ref.empty()) {
2406       ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
2407     } else {
2408       ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
2409     }
2410 
2411     // need to obtain a common set of SNPs between beta file and the genotype
2412     // file; these are saved in mapRS2wA and mapRS2wK
2413     // normalize the weight in mapRS2wK to have an average of one; each element
2414     // of mapRS2wA is 1
2415     set<string> setSnps_beta;
2416     ReadFile_snps_header(cPar.file_beta, setSnps_beta);
2417 
2418     // obtain the weights for wA, which contains the SNP weights for SNPs used
2419     // in the model
2420     map<string, double> mapRS2wK;
2421     cPar.ObtainWeight(setSnps_beta, mapRS2wK);
2422 
2423     // set up matrices and vector
2424     gsl_matrix *Xz = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc);
2425     gsl_matrix *XWz = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc);
2426     gsl_matrix *XtXWz =
2427         gsl_matrix_safe_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
2428     gsl_vector *w = gsl_vector_safe_alloc(mapRS2wK.size());
2429     gsl_vector *w1 = gsl_vector_safe_alloc(mapRS2wK.size());
2430     gsl_vector *z = gsl_vector_safe_alloc(mapRS2wK.size());
2431     gsl_vector *s_vec = gsl_vector_safe_alloc(cPar.n_vc);
2432 
2433     vector<size_t> vec_cat, vec_size;
2434     vector<double> vec_z;
2435 
2436     map<string, double> mapRS2z, mapRS2wA;
2437     map<string, string> mapRS2A1;
2438     string file_str;
2439 
2440     // update s_vec, the number of snps in each category
2441     for (size_t i = 0; i < cPar.n_vc; i++) {
2442       vec_size.push_back(0);
2443     }
2444 
2445     for (map<string, double>::const_iterator it = mapRS2wK.begin();
2446          it != mapRS2wK.end(); ++it) {
2447       vec_size[cPar.mapRS2cat[it->first]]++;
2448     }
2449 
2450     for (size_t i = 0; i < cPar.n_vc; i++) {
2451       gsl_vector_set(s_vec, i, vec_size[i]);
2452     }
2453 
2454     // update mapRS2wA using v_pve and s_vec
2455     if (cPar.a_mode == 66) {
2456       for (map<string, double>::const_iterator it = mapRS2wK.begin();
2457            it != mapRS2wK.end(); ++it) {
2458         mapRS2wA[it->first] = 1;
2459       }
2460     } else {
2461       cPar.UpdateWeight(0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
2462     }
2463 
2464     // read in z-scores based on allele 0, and save that into a vector
2465     ReadFile_beta(cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
2466 
2467     // update snp indicator, save weights to w, save z-scores to vec_z, save
2468     // category label to vec_cat
2469     // sign of z is determined by matching alleles
2470     cPar.UpdateSNPnZ(mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
2471 
2472     // compute an n by k matrix of X_iWz
2473     cout << "Calculating Xz ... " << endl;
2474 
2475     gsl_matrix_set_zero(Xz);
2476     gsl_vector_set_all(w1, 1);
2477 
2478     if (!cPar.file_bfile.empty()) {
2479       file_str = cPar.file_bfile + ".bed";
2480       PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
2481                vec_cat, w1, z, 0, Xz);
2482     } else if (!cPar.file_geno.empty()) {
2483       BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
2484                 cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
2485     } else if (!cPar.file_mbfile.empty()) {
2486       MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
2487                cPar.mindicator_snp, vec_cat, w1, z, Xz);
2488     } else if (!cPar.file_mgeno.empty()) {
2489       MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
2490                cPar.mindicator_snp, vec_cat, w1, z, Xz);
2491     }
2492     if (cPar.a_mode == 66) {
2493       gsl_matrix_safe_memcpy(XWz, Xz);
2494     } else if (cPar.a_mode == 67) {
2495       cout << "Calculating XWz ... " << endl;
2496 
2497       gsl_matrix_set_zero(XWz);
2498 
2499       if (!cPar.file_bfile.empty()) {
2500         file_str = cPar.file_bfile + ".bed";
2501         PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
2502                  vec_cat, w, z, 0, XWz);
2503       } else if (!cPar.file_geno.empty()) {
2504         BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
2505                   cPar.indicator_snp, vec_cat, w, z, 0, XWz);
2506       } else if (!cPar.file_mbfile.empty()) {
2507         MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
2508                  cPar.mindicator_snp, vec_cat, w, z, XWz);
2509       } else if (!cPar.file_mgeno.empty()) {
2510         MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
2511                  cPar.mindicator_snp, vec_cat, w, z, XWz);
2512       }
2513     }
2514     // compute an p by k matrix of X_j^TWX_iWz
2515     cout << "Calculating XtXWz ... " << endl;
2516     gsl_matrix_set_zero(XtXWz);
2517 
2518     if (!cPar.file_bfile.empty()) {
2519       file_str = cPar.file_bfile + ".bed";
2520       PlinkXtXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
2521                  XWz, 0, XtXWz);
2522     } else if (!cPar.file_geno.empty()) {
2523       BimbamXtXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
2524                   cPar.indicator_snp, XWz, 0, XtXWz);
2525     } else if (!cPar.file_mbfile.empty()) {
2526       MFILEXtXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
2527                  cPar.mindicator_snp, XWz, XtXWz);
2528     } else if (!cPar.file_mgeno.empty()) {
2529       MFILEXtXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
2530                  cPar.mindicator_snp, XWz, XtXWz);
2531     }
2532     // compute confidence intervals
2533     CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve,
2534              cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2,
2535              cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
2536     assert(!has_nan(cPar.v_se_pve));
2537 
2538     gsl_matrix_safe_free(S);
2539     gsl_matrix_safe_free(Svar);
2540     gsl_vector_safe_free(s_ref);
2541 
2542     gsl_matrix_safe_free(Xz);
2543     gsl_matrix_safe_free(XWz);
2544     gsl_matrix_safe_free(XtXWz);
2545     gsl_vector_safe_free(w);
2546     gsl_vector_safe_free(w1);
2547     gsl_vector_safe_free(z);
2548     gsl_vector_safe_free(s_vec);
2549   }
2550 
2551   // LMM or mvLMM or Eigen-Decomposition
2552   if (cPar.a_mode == M_LMM1 || cPar.a_mode == M_LMM2 || cPar.a_mode == M_LMM3 ||
2553       cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 ||
2554       cPar.a_mode == M_EIGEN) { // Fit LMM or mvLMM or eigen
2555     gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
2556     enforce_msg(Y, "allocate Y"); // just to be sure
2557     gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
2558     gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c
2559                                                           // matrix
2560     gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2);
2561     gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
2562     gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
2563     gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2);
2564     gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2);
2565     gsl_vector *eval = gsl_vector_calloc(Y->size1);
2566     gsl_vector *env = gsl_vector_safe_alloc(Y->size1);
2567     gsl_vector *weight = gsl_vector_safe_alloc(Y->size1);
2568     assert_issue(is_issue(26), UtY->data[0] == 0.0);
2569 
2570     // set covariates matrix W and phenotype matrix Y
2571     // an intercept should be included in W,
2572     cPar.CopyCvtPhen(W, Y, 0);
2573     if (!cPar.file_gxe.empty()) {
2574       cPar.CopyGxe(env);
2575     }
2576 
2577     // read relatedness matrix G
2578     if (!(cPar.file_kin).empty()) {
2579       ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
2580                    cPar.k_mode, cPar.error, G);
2581       if (cPar.error == true) {
2582         cout << "error! fail to read kinship/relatedness file. " << endl;
2583         return;
2584       }
2585 
2586       // center matrix G
2587       CenterMatrix(G);
2588       validate_K(G);
2589 
2590       // is residual weights are provided, then
2591       if (!cPar.file_weight.empty()) {
2592         cPar.CopyWeight(weight);
2593         double d, wi, wj;
2594         for (size_t i = 0; i < G->size1; i++) {
2595           wi = gsl_vector_get(weight, i);
2596           for (size_t j = i; j < G->size2; j++) {
2597             wj = gsl_vector_get(weight, j);
2598             d = gsl_matrix_get(G, i, j);
2599             if (wi <= 0 || wj <= 0) {
2600               d = 0;
2601             } else {
2602               d /= safe_sqrt(wi * wj);
2603             }
2604             gsl_matrix_set(G, i, j, d);
2605             if (j != i) {
2606               gsl_matrix_set(G, j, i, d);
2607             }
2608           }
2609         }
2610       }
2611 
2612       // eigen-decomposition and calculate trace_G
2613       cout << "Start Eigen-Decomposition..." << endl;
2614       time_start = clock();
2615 
2616       if (cPar.a_mode == M_EIGEN) {
2617         cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 1);
2618       } else {
2619         cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
2620       }
2621       // write(eval,"eval");
2622 
2623       if (!cPar.file_weight.empty()) {
2624         double wi;
2625         for (size_t i = 0; i < U->size1; i++) {
2626           wi = gsl_vector_get(weight, i);
2627           if (wi <= 0) {
2628             wi = 0;
2629           } else {
2630             wi = safe_sqrt(wi);
2631           }
2632           gsl_vector_view Urow = gsl_matrix_row(U, i);
2633           gsl_vector_scale(&Urow.vector, wi);
2634         }
2635       }
2636 
2637       cPar.time_eigen =
2638           (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2639     } else {
2640       ReadFile_eigenU(cPar.file_ku, cPar.error, U);
2641       if (cPar.error == true) {
2642         cout << "error! fail to read the U file. " << endl;
2643         return;
2644       }
2645 
2646       ReadFile_eigenD(cPar.file_kd, cPar.error, eval);
2647       if (cPar.error == true) {
2648         cout << "error! fail to read the D file. " << endl;
2649         return;
2650       }
2651 
2652       cPar.trace_G = 0.0;
2653       for (size_t i = 0; i < eval->size; i++) {
2654         if (gsl_vector_get(eval, i) < 1e-10) {
2655           gsl_vector_set(eval, i, 0);
2656         }
2657         cPar.trace_G += gsl_vector_get(eval, i);
2658       }
2659       cPar.trace_G /= (double)eval->size;
2660     }
2661     // write(eval,"eval2");
2662 
2663     if (cPar.a_mode == M_EIGEN) {
2664       cPar.WriteMatrix(U, "eigenU");
2665       cPar.WriteVector(eval, "eigenD");
2666     } else if (!cPar.file_gene.empty()) {
2667       // calculate UtW and Uty
2668       CalcUtX(U, W, UtW);
2669       CalcUtX(U, Y, UtY);
2670 
2671       assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
2672 
2673       LMM cLmm;
2674       cLmm.CopyFromParam(cPar);
2675 
2676       gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
2677       gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
2678 
2679       cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W,
2680                        &Y_col.vector); // y is the predictor, not the phenotype
2681 
2682       cLmm.WriteFiles();
2683       cLmm.CopyToParam(cPar);
2684     } else {
2685       // calculate UtW and Uty
2686       CalcUtX(U, W, UtW);
2687       CalcUtX(U, Y, UtY);
2688       assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
2689 
2690       // calculate REMLE/MLE estimate and pve for univariate model
2691       if (cPar.n_ph == 1) { // one phenotype
2692         gsl_vector_view beta = gsl_matrix_row(B, 0);
2693         gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
2694         gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
2695 
2696         assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
2697 
2698         CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
2699                    cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
2700         assert(!isnan(UtY->data[0]));
2701 
2702         CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null,
2703                         cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector,
2704                         &se_beta.vector);
2705 
2706         assert(!isnan(UtY->data[0]));
2707 
2708         cPar.beta_mle_null.clear();
2709         cPar.se_beta_mle_null.clear();
2710         assert(!isnan(B->data[0]));
2711         assert(!isnan(se_B->data[0]));
2712         for (size_t i = 0; i < B->size2; i++) {
2713           cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i));
2714           cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i));
2715         }
2716         assert(!isnan(UtY->data[0]));
2717         assert(!isnan(cPar.beta_mle_null.front()));
2718         assert(!isnan(cPar.se_beta_mle_null.front()));
2719 
2720         // the following functions do not modify eval
2721         CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
2722                    cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
2723         CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_remle_null,
2724                         cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector,
2725                         &se_beta.vector);
2726 
2727         cPar.beta_remle_null.clear();
2728         cPar.se_beta_remle_null.clear();
2729         assert(!isnan(B->data[0]));
2730         assert(!isnan(se_B->data[0]));
2731 
2732         for (size_t i = 0; i < B->size2; i++) {
2733           cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i));
2734           cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i));
2735         }
2736 
2737         CalcPve(eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
2738                 cPar.pve_null, cPar.pve_se_null);
2739         cPar.PrintSummary();
2740 
2741         // calculate and output residuals
2742         if (cPar.a_mode == M_LMM5) {
2743           gsl_vector *Utu_hat = gsl_vector_safe_alloc(Y->size1);
2744           gsl_vector *Ute_hat = gsl_vector_safe_alloc(Y->size1);
2745           gsl_vector *u_hat = gsl_vector_safe_alloc(Y->size1);
2746           gsl_vector *e_hat = gsl_vector_safe_alloc(Y->size1);
2747           gsl_vector *y_hat = gsl_vector_safe_alloc(Y->size1);
2748 
2749           // obtain Utu and Ute
2750           gsl_vector_safe_memcpy(y_hat, &UtY_col.vector);
2751           gsl_blas_dgemv(CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
2752 
2753           double d, u, e;
2754           for (size_t i = 0; i < eval->size; i++) {
2755             d = gsl_vector_get(eval, i);
2756             u = cPar.l_remle_null * d / (cPar.l_remle_null * d + 1.0) *
2757                 gsl_vector_get(y_hat, i);
2758             e = 1.0 / (cPar.l_remle_null * d + 1.0) * gsl_vector_get(y_hat, i);
2759             gsl_vector_set(Utu_hat, i, u);
2760             gsl_vector_set(Ute_hat, i, e);
2761           }
2762 
2763           // obtain u and e
2764           gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
2765           gsl_blas_dgemv(CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
2766 
2767           // output residuals
2768           cPar.WriteVector(u_hat, "residU");
2769           cPar.WriteVector(e_hat, "residE");
2770 
2771           gsl_vector_safe_free(u_hat);
2772           gsl_vector_safe_free(e_hat);
2773           gsl_vector_safe_free(y_hat);
2774         }
2775       }
2776 
2777       // Fit LMM or mvLMM (w. LOCO)
2778       if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
2779           cPar.a_mode == 4) {
2780         if (cPar.n_ph == 1) {
2781           LMM cLmm;
2782           cLmm.CopyFromParam(cPar);
2783 
2784           // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
2785 
2786           gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
2787           gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
2788 
2789           if (!cPar.file_bfile.empty()) {
2790             // PLINK analysis
2791             if (cPar.file_gxe.empty()) {
2792               cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
2793                                 &Y_col.vector, cPar.setGWASnps);
2794             }
2795             else {
2796               cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
2797                                    &Y_col.vector, env);
2798             }
2799           }
2800           else {
2801             // BIMBAM analysis
2802 
2803             if (cPar.file_gxe.empty()) {
2804               cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
2805                                  &Y_col.vector, cPar.setGWASnps);
2806             } else {
2807               cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
2808                                     &Y_col.vector, env);
2809             }
2810           }
2811           cLmm.WriteFiles();
2812           cLmm.CopyToParam(cPar);
2813         } else {
2814           MVLMM cMvlmm;
2815           cMvlmm.CopyFromParam(cPar);
2816 
2817           // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking
2818 
2819           // write(eval,"eval3");
2820 
2821           if (!cPar.file_bfile.empty()) {
2822             if (cPar.file_gxe.empty()) {
2823               cMvlmm.AnalyzePlink(U, eval, UtW, UtY);
2824             } else {
2825               cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env);
2826             }
2827           } else {
2828             if (cPar.file_gxe.empty()) {
2829               cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY);
2830             } else {
2831               cMvlmm.AnalyzeBimbamGXE(U, eval, UtW, UtY, env);
2832             }
2833           }
2834 
2835           cMvlmm.WriteFiles();
2836           cMvlmm.CopyToParam(cPar);
2837         }
2838       }
2839     }
2840 
2841     // release all matrices and vectors
2842     gsl_matrix_safe_free(Y);
2843     gsl_matrix_safe_free(W);
2844     gsl_matrix_warn_free(B); // sometimes unused
2845     gsl_matrix_warn_free(se_B);
2846     gsl_matrix_warn_free(G);
2847     gsl_matrix_safe_free(U);
2848     gsl_matrix_safe_free(UtW);
2849     gsl_matrix_safe_free(UtY);
2850     gsl_vector_safe_free(eval);
2851     gsl_vector_free(env); // sometimes unused
2852   }
2853 
2854   // BSLMM
2855   if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
2856     gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
2857     gsl_matrix *W = gsl_matrix_safe_alloc(y->size, cPar.n_cvt);
2858     gsl_matrix *G = gsl_matrix_safe_alloc(y->size, y->size);
2859     gsl_matrix *UtX = gsl_matrix_safe_alloc(y->size, cPar.ns_test);
2860 
2861     // set covariates matrix W and phenotype vector y
2862     // an intercept should be included in W,
2863     cPar.CopyCvtPhen(W, y, 0);
2864 
2865     // center y, even for case/control data
2866     cPar.pheno_mean = CenterVector(y);
2867 
2868     // run bvsr if rho==1
2869     if (cPar.rho_min == 1 && cPar.rho_max == 1) {
2870       // read genotypes X (not UtX)
2871       cPar.ReadGenotypes(UtX, G, false);
2872 
2873       // perform BSLMM analysis
2874       BSLMM cBslmm;
2875       cBslmm.CopyFromParam(cPar);
2876       time_start = clock();
2877       cBslmm.MCMC(UtX, y);
2878       cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2879       cBslmm.CopyToParam(cPar);
2880       // else, if rho!=1
2881     } else {
2882       gsl_matrix *U = gsl_matrix_safe_alloc(y->size, y->size);
2883       gsl_vector *eval = gsl_vector_safe_alloc(y->size);
2884       gsl_matrix *UtW = gsl_matrix_safe_alloc(y->size, W->size2);
2885       gsl_vector *Uty = gsl_vector_safe_alloc(y->size);
2886 
2887       // read relatedness matrix G
2888       if (!(cPar.file_kin).empty()) {
2889         cPar.ReadGenotypes(UtX, G, false);
2890 
2891         // read relatedness matrix G
2892         ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
2893                      cPar.k_mode, cPar.error, G);
2894         if (cPar.error == true) {
2895           cout << "error! fail to read kinship/relatedness file. " << endl;
2896           return;
2897         }
2898 
2899         // center matrix G
2900         CenterMatrix(G);
2901         validate_K(G);
2902       } else {
2903         cPar.ReadGenotypes(UtX, G, true);
2904       }
2905 
2906       // eigen-decomposition and calculate trace_G
2907       cout << "Start Eigen-Decomposition..." << endl;
2908       time_start = clock();
2909 
2910       cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
2911 
2912       cPar.time_eigen =
2913           (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2914 
2915       // calculate UtW and Uty
2916       CalcUtX(U, W, UtW);
2917       CalcUtX(U, y, Uty);
2918 
2919       // calculate REMLE/MLE estimate and pve
2920       CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
2921                  cPar.l_mle_null, cPar.logl_mle_H0);
2922       CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
2923                  cPar.l_remle_null, cPar.logl_remle_H0);
2924       CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
2925               cPar.pve_se_null);
2926 
2927       cPar.PrintSummary();
2928 
2929       // Creat and calcualte UtX, use a large memory
2930       cout << "Calculating UtX..." << endl;
2931       time_start = clock();
2932       CalcUtX(U, UtX);
2933       cPar.time_UtX = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2934 
2935       // perform BSLMM or BSLMMDAP analysis
2936       if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
2937         BSLMM cBslmm;
2938         cBslmm.CopyFromParam(cPar);
2939         time_start = clock();
2940         if (cPar.a_mode == 12) { // ridge regression
2941           cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
2942         } else { // Run MCMC
2943           cBslmm.MCMC(U, UtX, Uty, eval, y);
2944         }
2945         cPar.time_opt =
2946             (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2947         cBslmm.CopyToParam(cPar);
2948       } else {
2949       }
2950 
2951       // release all matrices and vectors
2952       gsl_matrix_safe_free(G);
2953       gsl_matrix_safe_free(U);
2954       gsl_matrix_safe_free(UtW);
2955       gsl_vector_safe_free(eval);
2956       gsl_vector_safe_free(Uty);
2957     }
2958     gsl_matrix_safe_free(W);
2959     gsl_vector_safe_free(y);
2960     gsl_matrix_safe_free(UtX);
2961   }
2962 
2963   // BSLMM-DAP
2964   if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) {
2965     if (cPar.a_mode == 14) {
2966       gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
2967       gsl_matrix *W = gsl_matrix_safe_alloc(y->size, cPar.n_cvt);
2968       gsl_matrix *G = gsl_matrix_safe_alloc(y->size, y->size);
2969       gsl_matrix *UtX = gsl_matrix_safe_alloc(y->size, cPar.ns_test);
2970 
2971       // set covariates matrix W and phenotype vector y
2972       // an intercept should be included in W,
2973       cPar.CopyCvtPhen(W, y, 0);
2974 
2975       // center y, even for case/control data
2976       cPar.pheno_mean = CenterVector(y);
2977 
2978       // run bvsr if rho==1
2979       if (cPar.rho_min == 1 && cPar.rho_max == 1) {
2980         // read genotypes X (not UtX)
2981         cPar.ReadGenotypes(UtX, G, false);
2982 
2983         // perform BSLMM analysis
2984         BSLMM cBslmm;
2985         cBslmm.CopyFromParam(cPar);
2986         time_start = clock();
2987         cBslmm.MCMC(UtX, y);
2988         cPar.time_opt =
2989             (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2990         cBslmm.CopyToParam(cPar);
2991         // else, if rho!=1
2992       } else {
2993         gsl_matrix *U = gsl_matrix_safe_alloc(y->size, y->size);
2994         gsl_vector *eval = gsl_vector_safe_alloc(y->size);
2995         gsl_matrix *UtW = gsl_matrix_safe_alloc(y->size, W->size2);
2996         gsl_vector *Uty = gsl_vector_safe_alloc(y->size);
2997 
2998         // read relatedness matrix G
2999         if (!(cPar.file_kin).empty()) {
3000           cPar.ReadGenotypes(UtX, G, false);
3001 
3002           // read relatedness matrix G
3003           ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
3004                        cPar.k_mode, cPar.error, G);
3005           if (cPar.error == true) {
3006             cout << "error! fail to read kinship/relatedness file. " << endl;
3007             return;
3008           }
3009 
3010           // center matrix G
3011           CenterMatrix(G);
3012           validate_K(G);
3013 
3014         } else {
3015           cPar.ReadGenotypes(UtX, G, true);
3016         }
3017 
3018         // eigen-decomposition and calculate trace_G
3019         cout << "Start Eigen-Decomposition..." << endl;
3020         time_start = clock();
3021         cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
3022         cPar.time_eigen =
3023             (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3024 
3025         // calculate UtW and Uty
3026         CalcUtX(U, W, UtW);
3027         CalcUtX(U, y, Uty);
3028 
3029         // calculate REMLE/MLE estimate and pve
3030         CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
3031                    cPar.l_mle_null, cPar.logl_mle_H0);
3032         CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
3033                    cPar.l_remle_null, cPar.logl_remle_H0);
3034         CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
3035                 cPar.pve_se_null);
3036 
3037         cPar.PrintSummary();
3038 
3039         // Creat and calcualte UtX, use a large memory
3040         cout << "Calculating UtX..." << endl;
3041         time_start = clock();
3042         CalcUtX(U, UtX);
3043         cPar.time_UtX =
3044             (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3045 
3046         // perform analysis; assume X and y are already centered
3047         BSLMMDAP cBslmmDap;
3048         cBslmmDap.CopyFromParam(cPar);
3049         time_start = clock();
3050         cBslmmDap.DAP_CalcBF(U, UtX, Uty, eval, y);
3051         cPar.time_opt =
3052             (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3053         cBslmmDap.CopyToParam(cPar);
3054 
3055         // release all matrices and vectors
3056         gsl_matrix_safe_free(G);
3057         gsl_matrix_safe_free(U);
3058         gsl_matrix_safe_free(UtW);
3059         gsl_vector_safe_free(eval);
3060         gsl_vector_safe_free(Uty);
3061       }
3062 
3063       gsl_matrix_safe_free(W);
3064       gsl_vector_safe_free(y);
3065       gsl_matrix_safe_free(UtX);
3066     } else if (cPar.a_mode == 15) {
3067       // perform EM algorithm and estimate parameters
3068       vector<string> vec_rs;
3069       vector<double> vec_sa2, vec_sb2, wab;
3070       vector<vector<vector<double>>> BF;
3071 
3072       // read hyp and bf files (functions defined in BSLMMDAP)
3073       ReadFile_hyb(cPar.file_hyp, vec_sa2, vec_sb2, wab);
3074       ReadFile_bf(cPar.file_bf, vec_rs, BF);
3075 
3076       cPar.ns_test = vec_rs.size();
3077       if (wab.size() != BF[0][0].size()) {
3078         cout << "error! hyp and bf files dimension do not match" << endl;
3079       }
3080 
3081       // load annotations
3082       gsl_matrix *Ac = NULL;
3083       gsl_matrix_int *Ad = NULL;
3084       gsl_vector_int *dlevel = NULL;
3085       size_t kc, kd;
3086       if (!cPar.file_cat.empty()) {
3087         ReadFile_cat(cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
3088       } else {
3089         kc = 0;
3090         kd = 0;
3091       }
3092 
3093       cout << "## number of blocks = " << BF.size() << endl;
3094       cout << "## number of analyzed SNPs/var = " << vec_rs.size() << endl;
3095       cout << "## grid size for hyperparameters = " << wab.size() << endl;
3096       cout << "## number of continuous annotations = " << kc << endl;
3097       cout << "## number of discrete annotations = " << kd << endl;
3098 
3099       // DAP_EstimateHyper (const size_t kc, const size_t kd, const
3100       // vector<string> &vec_rs, const vector<double> &vec_sa2, const
3101       // vector<double> &vec_sb2, const vector<double> &wab, const
3102       // vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int
3103       // *Ad, gsl_vector_int *dlevel);
3104 
3105       // perform analysis
3106       BSLMMDAP cBslmmDap;
3107       cBslmmDap.CopyFromParam(cPar);
3108       time_start = clock();
3109       cBslmmDap.DAP_EstimateHyper(kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac,
3110                                   Ad, dlevel);
3111       cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3112       cBslmmDap.CopyToParam(cPar);
3113 
3114       gsl_matrix_safe_free(Ac);
3115       gsl_matrix_int_free(Ad);
3116       gsl_vector_int_free(dlevel);
3117     } else {
3118       //
3119     }
3120   }
3121 
3122   cPar.time_total = (clock() - time_begin) / (double(CLOCKS_PER_SEC) * 60.0);
3123 
3124   return;
3125 }
3126 
3127 // #include "Eigen/Dense"
3128 
WriteLog(int argc,char ** argv,PARAM & cPar)3129 void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
3130   string file_str;
3131   file_str = cPar.path_out + "/" + cPar.file_out;
3132   file_str += ".log.txt";
3133 
3134   ofstream outfile(file_str.c_str(), ofstream::out);
3135   if (!outfile) {
3136     cout << "error writing log file: " << file_str.c_str() << endl;
3137     return;
3138   }
3139 
3140   outfile << "##" << endl;
3141   outfile << "## GEMMA Version    = " << version << " (" << date << ")" << endl;
3142   outfile << "## Build profile    = " << GEMMA_PROFILE << endl ;
3143   #ifdef __GNUC__
3144   outfile << "## GCC version      = " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__ << endl;
3145   #endif
3146   outfile << "## GSL Version      = " << GSL_VERSION << endl;
3147   // outfile << "## Eigen Version    = " << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl;
3148 #ifdef OPENBLAS
3149 
3150   #ifndef OPENBLAS_LEGACY
3151   //outfile << "## OpenBlas         =" << OPENBLAS_VERSION << " - " << openblas_get_config() << endl;
3152   outfile << "##   arch           = " << openblas_get_corename() << endl;
3153   outfile << "##   threads        = " << openblas_get_num_threads() << endl;
3154   #else
3155   outfile << "## OpenBlas         = " << openblas_get_config() << endl;
3156   #endif
3157   string* pStr = new string[4] { "sequential", "threaded", "openmp" };
3158   outfile << "##   parallel type  = " << pStr[openblas_get_parallel()] << endl;
3159 #endif
3160 
3161   outfile << "##" << endl;
3162   outfile << "## Command Line Input = ";
3163   for (int i = 0; i < argc; i++) {
3164     outfile << argv[i] << " ";
3165   }
3166   outfile << endl;
3167 
3168   outfile << "##" << endl;
3169   time_t rawtime;
3170   time(&rawtime);
3171   tm *ptm = localtime(&rawtime);
3172 
3173   outfile << "## Date = " << asctime(ptm);
3174 
3175   outfile << "##" << endl;
3176   outfile << "## Summary Statistics:" << endl;
3177   if (!cPar.file_cor.empty() || !cPar.file_study.empty() ||
3178       !cPar.file_mstudy.empty()) {
3179     outfile << "## number of total individuals in the sample = "
3180             << cPar.ni_study << endl;
3181     outfile << "## number of total individuals in the reference = "
3182             << cPar.ni_ref << endl;
3183     outfile << "## number of variance components = " << cPar.n_vc << endl;
3184 
3185     outfile << "## pve estimates = ";
3186     for (size_t i = 0; i < cPar.v_pve.size(); i++) {
3187       outfile << "  " << cPar.v_pve[i];
3188     }
3189     outfile << endl;
3190 
3191     outfile << "## se(pve) = ";
3192     for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
3193       outfile << "  " << cPar.v_se_pve[i];
3194     }
3195     outfile << endl;
3196     assert(!has_nan(cPar.v_se_pve));
3197 
3198     if (cPar.n_vc > 1) {
3199       outfile << "## total pve = " << cPar.pve_total << endl;
3200       outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
3201     }
3202 
3203     outfile << "## sigma2 per snp = ";
3204     for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
3205       outfile << "  " << cPar.v_sigma2[i];
3206     }
3207     outfile << endl;
3208 
3209     outfile << "## se(sigma2 per snp) = ";
3210     for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
3211       outfile << "  " << cPar.v_se_sigma2[i];
3212     }
3213     outfile << endl;
3214 
3215     outfile << "## enrichment = ";
3216     for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
3217       outfile << "  " << cPar.v_enrich[i];
3218     }
3219     outfile << endl;
3220 
3221     outfile << "## se(enrichment) = ";
3222     for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
3223       outfile << "  " << cPar.v_se_enrich[i];
3224     }
3225     outfile << endl;
3226   } else if (!cPar.file_beta.empty() &&
3227              (cPar.a_mode == 61 || cPar.a_mode == 62)) {
3228     outfile << "## number of total individuals in the sample = "
3229             << cPar.ni_study << endl;
3230     outfile << "## number of total individuals in the reference = "
3231             << cPar.ni_total << endl;
3232     outfile << "## number of total SNPs/var in the sample = " << cPar.ns_study
3233             << endl;
3234     outfile << "## number of total SNPs/var in the reference panel = "
3235             << cPar.ns_total << endl;
3236     outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
3237     outfile << "## number of variance components = " << cPar.n_vc << endl;
3238   } else if (!cPar.file_beta.empty() &&
3239              (cPar.a_mode == 66 || cPar.a_mode == 67)) {
3240     outfile << "## number of total individuals in the sample = "
3241             << cPar.ni_total << endl;
3242     outfile << "## number of total individuals in the reference = "
3243             << cPar.ni_ref << endl;
3244     outfile << "## number of total SNPs/var in the sample = " << cPar.ns_total
3245             << endl;
3246     outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
3247     outfile << "## number of variance components = " << cPar.n_vc << endl;
3248 
3249     outfile << "## pve estimates = ";
3250     for (size_t i = 0; i < cPar.v_pve.size(); i++) {
3251       outfile << "  " << cPar.v_pve[i];
3252     }
3253     outfile << endl;
3254 
3255     outfile << "## se(pve) = ";
3256     for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
3257       outfile << "  " << cPar.v_se_pve[i];
3258     }
3259     outfile << endl;
3260 
3261     if (cPar.n_vc > 1) {
3262       outfile << "## total pve = " << cPar.pve_total << endl;
3263       outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
3264     }
3265 
3266     outfile << "## sigma2 per snp = ";
3267     for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
3268       outfile << "  " << cPar.v_sigma2[i];
3269     }
3270     outfile << endl;
3271 
3272     outfile << "## se(sigma2 per snp) = ";
3273     for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
3274       outfile << "  " << cPar.v_se_sigma2[i];
3275     }
3276     outfile << endl;
3277 
3278     outfile << "## enrichment = ";
3279     for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
3280       outfile << "  " << cPar.v_enrich[i];
3281     }
3282     outfile << endl;
3283 
3284     outfile << "## se(enrichment) = ";
3285     for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
3286       outfile << "  " << cPar.v_se_enrich[i];
3287     }
3288     outfile << endl;
3289   } else {
3290     outfile << "## number of total individuals = " << cPar.ni_total << endl;
3291 
3292     if (cPar.a_mode == 43) {
3293       outfile << "## number of analyzed individuals = " << cPar.ni_cvt << endl;
3294       outfile << "## number of individuals with full phenotypes = "
3295               << cPar.ni_test << endl;
3296     } else if (cPar.a_mode != 27 && cPar.a_mode != 28) {
3297       outfile << "## number of analyzed individuals = " << cPar.ni_test << endl;
3298     }
3299 
3300     outfile << "## number of covariates = " << cPar.n_cvt << endl;
3301     outfile << "## number of phenotypes = " << cPar.n_ph << endl;
3302     if (cPar.a_mode == 43) {
3303       outfile << "## number of observed data = " << cPar.np_obs << endl;
3304       outfile << "## number of missing data = " << cPar.np_miss << endl;
3305     }
3306     if (cPar.a_mode == 25 || cPar.a_mode == 26 || cPar.a_mode == 27 ||
3307         cPar.a_mode == 28 || cPar.a_mode == 61 || cPar.a_mode == 62 ||
3308         cPar.a_mode == 63 || cPar.a_mode == 66 || cPar.a_mode == 67) {
3309       outfile << "## number of variance components = " << cPar.n_vc << endl;
3310     }
3311 
3312     if (!(cPar.file_gene).empty()) {
3313       outfile << "## number of total genes = " << cPar.ng_total << endl;
3314       outfile << "## number of analyzed genes = " << cPar.ng_test << endl;
3315     } else if (cPar.file_epm.empty()) {
3316       outfile << "## number of total SNPs/var = " << cPar.ns_total << endl;
3317       outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
3318     } else {
3319       outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
3320     }
3321 
3322     if (cPar.a_mode == 13) {
3323       outfile << "## number of cases = " << cPar.ni_case << endl;
3324       outfile << "## number of controls = " << cPar.ni_control << endl;
3325     }
3326   }
3327 
3328   if ((cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) &&
3329       cPar.file_cor.empty() && cPar.file_study.empty() &&
3330       cPar.file_mstudy.empty()) {
3331     //	        outfile<<"## REMLE log-likelihood in the null model =
3332     //"<<cPar.logl_remle_H0<<endl;
3333     if (cPar.n_ph == 1) {
3334       outfile << "## pve estimates = ";
3335       for (size_t i = 0; i < cPar.v_pve.size(); i++) {
3336         outfile << "  " << cPar.v_pve[i];
3337       }
3338       outfile << endl;
3339 
3340       outfile << "## se(pve) = ";
3341       for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
3342         outfile << "  " << cPar.v_se_pve[i];
3343       }
3344       outfile << endl;
3345 
3346       if (cPar.n_vc > 1) {
3347         outfile << "## total pve = " << cPar.pve_total << endl;
3348         outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
3349       }
3350 
3351       outfile << "## sigma2 estimates = ";
3352       for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
3353         outfile << "  " << cPar.v_sigma2[i];
3354       }
3355       outfile << endl;
3356 
3357       outfile << "## se(sigma2) = ";
3358       for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
3359         outfile << "  " << cPar.v_se_sigma2[i];
3360       }
3361       outfile << endl;
3362 
3363       if (!cPar.file_beta.empty()) {
3364         outfile << "## enrichment = ";
3365         for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
3366           outfile << "  " << cPar.v_enrich[i];
3367         }
3368         outfile << endl;
3369 
3370         outfile << "## se(enrichment) = ";
3371         for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
3372           outfile << "  " << cPar.v_se_enrich[i];
3373         }
3374         outfile << endl;
3375       }
3376     }
3377   }
3378 
3379   if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
3380       cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 ||
3381       cPar.a_mode == 12 || cPar.a_mode == 13) {
3382     outfile << "## REMLE log-likelihood in the null model = "
3383             << cPar.logl_remle_H0 << endl;
3384     outfile << "## MLE log-likelihood in the null model = " << cPar.logl_mle_H0
3385             << endl;
3386     if (cPar.n_ph == 1) {
3387       // outfile<<"## lambda REMLE estimate in the null (linear mixed) model =
3388       // "<<cPar.l_remle_null<<endl;
3389       // outfile<<"## lambda MLE estimate in the null (linear mixed) model =
3390       // "<<cPar.l_mle_null<<endl;
3391       outfile << "## pve estimate in the null model = " << cPar.pve_null
3392               << endl;
3393       outfile << "## se(pve) in the null model = " << cPar.pve_se_null << endl;
3394       outfile << "## vg estimate in the null model = " << cPar.vg_remle_null
3395               << endl;
3396       outfile << "## ve estimate in the null model = " << cPar.ve_remle_null
3397               << endl;
3398       outfile << "## beta estimate in the null model = ";
3399       for (size_t i = 0; i < cPar.beta_remle_null.size(); i++) {
3400         outfile << "  " << cPar.beta_remle_null[i];
3401       }
3402       outfile << endl;
3403       outfile << "## se(beta) = ";
3404       for (size_t i = 0; i < cPar.se_beta_remle_null.size(); i++) {
3405         outfile << "  " << cPar.se_beta_remle_null[i];
3406       }
3407       outfile << endl;
3408 
3409     } else {
3410       size_t c;
3411       outfile << "## REMLE estimate for Vg in the null model: " << endl;
3412       for (size_t i = 0; i < cPar.n_ph; i++) {
3413         for (size_t j = 0; j <= i; j++) {
3414           c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3415               min(i, j);
3416           outfile << tab(j) << cPar.Vg_remle_null[c];
3417         }
3418         outfile << endl;
3419       }
3420       outfile << "## se(Vg): " << endl;
3421       for (size_t i = 0; i < cPar.n_ph; i++) {
3422         for (size_t j = 0; j <= i; j++) {
3423           c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3424               min(i, j);
3425           outfile << tab(j) << safe_sqrt(cPar.VVg_remle_null[c]);
3426         }
3427         outfile << endl;
3428       }
3429       outfile << "## REMLE estimate for Ve in the null model: " << endl;
3430       for (size_t i = 0; i < cPar.n_ph; i++) {
3431         for (size_t j = 0; j <= i; j++) {
3432           c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3433               min(i, j);
3434           outfile << tab(j) << cPar.Ve_remle_null[c];
3435         }
3436         outfile << endl;
3437       }
3438       outfile << "## se(Ve): " << endl;
3439       for (size_t i = 0; i < cPar.n_ph; i++) {
3440         for (size_t j = 0; j <= i; j++) {
3441           c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3442               min(i, j);
3443           outfile << tab(j) << safe_sqrt(cPar.VVe_remle_null[c]);
3444         }
3445         outfile << endl;
3446       }
3447 
3448       outfile << "## MLE estimate for Vg in the null model: " << endl;
3449       for (size_t i = 0; i < cPar.n_ph; i++) {
3450         for (size_t j = 0; j < cPar.n_ph; j++) {
3451           c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3452               min(i, j);
3453           outfile << tab(j) << cPar.Vg_mle_null[c];
3454         }
3455         outfile << endl;
3456       }
3457       outfile << "## se(Vg): " << endl;
3458       for (size_t i = 0; i < cPar.n_ph; i++) {
3459         for (size_t j = 0; j <= i; j++) {
3460           c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3461               min(i, j);
3462           outfile << tab(j) << safe_sqrt(cPar.VVg_mle_null[c]);
3463         }
3464         outfile << endl;
3465       }
3466       outfile << "## MLE estimate for Ve in the null model: " << endl;
3467       for (size_t i = 0; i < cPar.n_ph; i++) {
3468         for (size_t j = 0; j < cPar.n_ph; j++) {
3469           c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3470               min(i, j);
3471           outfile << tab(j) << cPar.Ve_mle_null[c];
3472         }
3473         outfile << endl;
3474       }
3475       outfile << "## se(Ve): " << endl;
3476       for (size_t i = 0; i < cPar.n_ph; i++) {
3477         for (size_t j = 0; j <= i; j++) {
3478           c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3479               min(i, j);
3480           outfile << tab(j) << safe_sqrt(cPar.VVe_mle_null[c]);
3481         }
3482         outfile << endl;
3483       }
3484       outfile << "## estimate for B (d by c) in the null model (columns "
3485                  "correspond to the covariates provided in the file): "
3486               << endl;
3487       for (size_t i = 0; i < cPar.n_ph; i++) {
3488         for (size_t j = 0; j < cPar.n_cvt; j++) {
3489           c = i * cPar.n_cvt + j;
3490           outfile << tab(j) << cPar.beta_remle_null[c];
3491         }
3492         outfile << endl;
3493       }
3494       outfile << "## se(B): " << endl;
3495       for (size_t i = 0; i < cPar.n_ph; i++) {
3496         for (size_t j = 0; j < cPar.n_cvt; j++) {
3497           c = i * cPar.n_cvt + j;
3498           outfile << tab(j) << cPar.se_beta_remle_null[c];
3499         }
3500         outfile << endl;
3501       }
3502     }
3503   }
3504 
3505   if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13 ||
3506       cPar.a_mode == 14 || cPar.a_mode == 16) {
3507     outfile << "## estimated mean = " << cPar.pheno_mean << endl;
3508   }
3509 
3510   if (cPar.a_mode == 11 || cPar.a_mode == 13) {
3511     outfile << "##" << endl;
3512     outfile << "## MCMC related:" << endl;
3513     outfile << "## initial value of h = " << cPar.cHyp_initial.h << endl;
3514     outfile << "## initial value of rho = " << cPar.cHyp_initial.rho << endl;
3515     outfile << "## initial value of pi = " << exp(cPar.cHyp_initial.logp)
3516             << endl;
3517     outfile << "## initial value of |gamma| = " << cPar.cHyp_initial.n_gamma
3518             << endl;
3519     outfile << "## random seed = " << cPar.randseed << endl;
3520     outfile << "## acceptance ratio = "
3521             << (double)cPar.n_accept /
3522                    (double)((cPar.w_step + cPar.s_step) * cPar.n_mh)
3523             << endl;
3524   }
3525 
3526   outfile << "##" << endl;
3527   outfile << "## Computation Time:" << endl;
3528   outfile << "## total computation time = " << cPar.time_total << " min "
3529           << endl;
3530   outfile << "## computation time break down: " << endl;
3531   if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2 || cPar.a_mode == 11 ||
3532       cPar.a_mode == 13 || cPar.a_mode == 14 || cPar.a_mode == 16) {
3533     outfile << "##      time on calculating relatedness matrix = "
3534             << cPar.time_G << " min " << endl;
3535   }
3536   if (cPar.a_mode == M_EIGEN) {
3537     outfile << "##      time on eigen-decomposition = " << cPar.time_eigen
3538             << " min " << endl;
3539   }
3540   if (cPar.a_mode == M_LMM1 || cPar.a_mode == M_LMM2 || cPar.a_mode == M_LMM3 ||
3541       cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 || cPar.a_mode == 11 ||
3542       cPar.a_mode == 12 || cPar.a_mode == 13 || cPar.a_mode == 14 ||
3543       cPar.a_mode == 16) {
3544     outfile << "##      time on eigen-decomposition = " << cPar.time_eigen
3545             << " min " << endl;
3546     outfile << "##      time on calculating UtX = " << cPar.time_UtX << " min "
3547             << endl;
3548   }
3549   if ((cPar.a_mode >= 1 && cPar.a_mode <= 4) ||
3550       (cPar.a_mode >= 51 && cPar.a_mode <= 54)) {
3551     outfile << "##      time on optimization = " << cPar.time_opt << " min "
3552             << endl;
3553   }
3554   if (cPar.a_mode == 11 || cPar.a_mode == 13) {
3555     outfile << "##      time on proposal = " << cPar.time_Proposal << " min "
3556             << endl;
3557     outfile << "##      time on mcmc = " << cPar.time_opt << " min " << endl;
3558     outfile << "##      time on Omega = " << cPar.time_Omega << " min " << endl;
3559   }
3560   if (cPar.a_mode == 41 || cPar.a_mode == 42) {
3561     outfile << "##      time on eigen-decomposition = " << cPar.time_eigen
3562             << " min " << endl;
3563   }
3564   if (cPar.a_mode == 43) {
3565     outfile << "##      time on eigen-decomposition = " << cPar.time_eigen
3566             << " min " << endl;
3567     outfile << "##      time on predicting phenotypes = " << cPar.time_opt
3568             << " min " << endl;
3569   }
3570   outfile << "##" << endl;
3571 
3572   outfile.close();
3573   outfile.clear();
3574 
3575   info_msg("Done");
3576   return;
3577 }
3578