1 /*
2 Genome-wide Efficient Mixed Model Association (GEMMA)
3 Copyright © 2011-2017, Xiang Zhou
4 Copyright © 2017, Peter Carbonetto
5 Copyright © 2017-2018, Pjotr Prins
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <cmath>
22 #include <cstring>
23 #include <ctime>
24 #include <fstream>
25 #include <iostream>
26 #include <string>
27 #include <sys/stat.h>
28 #ifdef OPENBLAS
29 extern "C" {
30 // these functions are defined in cblas.h - but if we include that we
31 // conflicts with other BLAS includes (GSL)
32 int openblas_get_num_threads(void);
33 int openblas_get_parallel(void);
34 char* openblas_get_config(void);
35 char* openblas_get_corename(void);
36 }
37 #else
38 #pragma message "Not compiling with OPENBLAS"
39 #endif
40
41 #include "gsl/gsl_blas.h"
42 #include "gsl/gsl_cdf.h"
43 #include "gsl/gsl_eigen.h"
44 #include "gsl/gsl_linalg.h"
45 #include "gsl/gsl_matrix.h"
46 #include "gsl/gsl_vector.h"
47 #include "gsl/gsl_version.h"
48
49 #include "bslmm.h"
50 #include "bslmmdap.h"
51 #include <csignal> // for gsl_error_handler
52 #include "gemma.h"
53 #include "gemma_io.h"
54 #include "lapack.h"
55 #include "ldr.h"
56 #include "lm.h"
57 #include "lmm.h"
58 #include "mathfunc.h"
59 #include "mvlmm.h"
60 #include "prdt.h"
61 #include "varcov.h"
62 #include "vc.h"
63 #include "debug.h"
64 #include "version.h"
65
66 using namespace std;
67
GEMMA(void)68 GEMMA::GEMMA(void) : version(GEMMA_VERSION), date(GEMMA_DATE), year(GEMMA_YEAR) {}
69
gemma_gsl_error_handler(const char * reason,const char * file,int line,int gsl_errno)70 void gemma_gsl_error_handler (const char * reason,
71 const char * file,
72 int line, int gsl_errno) {
73 cerr << "GSL ERROR: " << reason << " in " << file
74 << " at line " << line << " errno " << gsl_errno <<endl;
75 std::raise(SIGINT); // keep the stack trace for gdb
76 }
77
78 //#if defined(OPENBLAS) && !defined(OPENBLAS_LEGACY)
79 //#include <openblas_config.h>
80 //#endif
81
PrintHeader(void)82 void GEMMA::PrintHeader(void) {
83
84 cout <<
85 "GEMMA " << version << " (" << date << ") by Xiang Zhou and team (C) 2012-" << year << endl;
86 return;
87 }
88
PrintLicense(void)89 void GEMMA::PrintLicense(void) {
90 cout << endl;
91 cout << "The Software Is Distributed Under GNU General Public "
92 << "License, But May Also Require The Following Notifications." << endl;
93 cout << endl;
94
95 cout << "Including Lapack Routines In The Software May Require"
96 << " The Following Notification:" << endl;
97 cout << "Copyright (c) 1992-2010 The University of Tennessee and "
98 << "The University of Tennessee Research Foundation. All rights "
99 << "reserved." << endl;
100 cout << "Copyright (c) 2000-2010 The University of California "
101 << "Berkeley. All rights reserved." << endl;
102 cout << "Copyright (c) 2006-2010 The University of Colorado Denver. "
103 << "All rights reserved." << endl;
104 cout << endl;
105
106 cout << "$COPYRIGHT$" << endl;
107 cout << "Additional copyrights may follow" << endl;
108 cout << "$HEADER$" << endl;
109 cout << "Redistribution and use in source and binary forms, with or "
110 << "without modification, are permitted provided that the following "
111 << " conditions are met:" << endl;
112 cout << "- Redistributions of source code must retain the above "
113 << "copyright notice, this list of conditions and the following "
114 << "disclaimer." << endl;
115 cout << "- Redistributions in binary form must reproduce the above "
116 << "copyright notice, this list of conditions and the following "
117 << "disclaimer listed in this license in the documentation and/or "
118 << "other materials provided with the distribution." << endl;
119 cout << "- Neither the name of the copyright holders nor the names "
120 << "of its contributors may be used to endorse or promote products "
121 << "derived from this software without specific prior written "
122 << "permission." << endl;
123 cout << "The copyright holders provide no reassurances that the "
124 << "source code provided does not infringe any patent, copyright, "
125 << "or any other "
126 << "intellectual property rights of third parties. "
127 << "The copyright holders disclaim any liability to any recipient "
128 << "for claims brought against "
129 << "recipient by any third party for infringement of that parties "
130 << "intellectual property rights. " << endl;
131 cout << "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "
132 << "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "
133 << "INCLUDING, BUT NOT "
134 << "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "
135 << "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "
136 << "SHALL THE COPYRIGHT "
137 << "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "
138 << "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "
139 << "(INCLUDING, BUT NOT "
140 << "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "
141 << "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "
142 << "HOWEVER CAUSED AND ON ANY "
143 << "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "
144 << "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "
145 << "OUT OF THE USE "
146 << "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "
147 << "SUCH DAMAGE." << endl;
148 cout << endl;
149
150 return;
151 }
152
PrintHelp(size_t option)153 void GEMMA::PrintHelp(size_t option) {
154
155 if (option == 0) {
156 cout << endl;
157 cout << " type gemma -h [num] for detailed help" << endl;
158 cout << " options: " << endl;
159 cout << " 1: quick guide" << endl;
160 cout << " 2: file I/O related" << endl;
161 cout << " 3: SNP QC" << endl;
162 cout << " 4: calculate relatedness matrix" << endl;
163 cout << " 5: perform eigen decomposition" << endl;
164 cout << " 6: perform variance component estimation" << endl;
165 cout << " 7: fit a linear model" << endl;
166 cout << " 8: fit a linear mixed model" << endl;
167 cout << " 9: fit a multivariate linear mixed model" << endl;
168 cout << " 10: fit a Bayesian sparse linear mixed model" << endl;
169 cout << " 11: obtain predicted values" << endl;
170 cout << " 12: calculate snp variance covariance" << endl;
171 cout << " 13: note" << endl;
172 cout << " 14: debug options" << endl;
173 cout << endl;
174 }
175
176 if (option == 1) {
177 cout << " QUICK GUIDE" << endl;
178 cout << " to generate a relatedness matrix: " << endl;
179 cout << " gemma -bfile [prefix] -gk [num] -o [prefix]" << endl;
180 cout << " gemma -g [filename] -p [filename] -gk [num] -o [prefix]"
181 << endl;
182 cout << " to generate the S matrix: " << endl;
183 cout << " gemma -bfile [prefix] -gs -o [prefix]" << endl;
184 cout << " gemma -p [filename] -g [filename] -gs -o [prefix]"
185 << endl;
186 cout << " gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"
187 << endl;
188 cout << " gemma -p [filename] -g [filename] -cat [filename] -gs "
189 "-o [prefix]"
190 << endl;
191 cout << " gemma -bfile [prefix] -sample [num] -gs -o [prefix]"
192 << endl;
193 cout << " gemma -p [filename] -g [filename] -sample [num] -gs -o "
194 "[prefix]"
195 << endl;
196 cout << " to generate the q vector: " << endl;
197 cout << " gemma -beta [filename] -gq -o [prefix]" << endl;
198 cout << " gemma -beta [filename] -cat [filename] -gq -o [prefix]"
199 << endl;
200 cout << " to generate the ldsc weigthts: " << endl;
201 cout << " gemma -beta [filename] -gw -o [prefix]" << endl;
202 cout << " gemma -beta [filename] -cat [filename] -gw -o [prefix]"
203 << endl;
204 cout << " to perform eigen decomposition of the relatedness matrix: "
205 << endl;
206 cout << " gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"
207 << endl;
208 cout << " gemma -g [filename] -p [filename] -k [filename] -eigen "
209 "-o [prefix]"
210 << endl;
211 cout << " to estimate variance components: " << endl;
212 cout << " gemma -bfile [prefix] -k [filename] -vc [num] -o "
213 "[prefix]"
214 << endl;
215 cout << " gemma -p [filename] -k [filename] -vc [num] -o [prefix]"
216 << endl;
217 cout << " gemma -bfile [prefix] -mk [filename] -vc [num] -o "
218 "[prefix]"
219 << endl;
220 cout
221 << " gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"
222 << endl;
223 cout << " gemma -beta [filename] -cor [filename] -vc [num] -o "
224 "[prefix]"
225 << endl;
226 cout << " gemma -beta [filename] -cor [filename] -cat [filename] "
227 "-vc [num] -o [prefix]"
228 << endl;
229 cout << " options for the above two commands: -crt -windowbp [num]"
230 << endl;
231 cout << " gemma -mq [filename] -ms [filename] -mv [filename] -vc "
232 "[num] -o [prefix]"
233 << endl;
234 cout << " or with summary statistics, replace bfile with mbfile, "
235 "or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"
236 << endl;
237 cout << " gemma -beta [filename] -bfile [filename] -cat "
238 "[filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"
239 << endl;
240 cout << " gemma -beta [filename] -bfile [filename] -cat "
241 "[filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"
242 << endl;
243 cout << " to fit a linear mixed model: " << endl;
244 cout << " gemma -bfile [prefix] -k [filename] -lmm [num] -o "
245 "[prefix]"
246 << endl;
247 cout << " gemma -g [filename] -p [filename] -a [filename] -k "
248 "[filename] -lmm [num] -o [prefix]"
249 << endl;
250 cout << " to fit a linear mixed model to test g by e effects: " << endl;
251 cout << " gemma -bfile [prefix] -gxe [filename] -k [filename] "
252 "-lmm [num] -o [prefix]"
253 << endl;
254 cout << " gemma -g [filename] -p [filename] -a [filename] -gxe "
255 "[filename] -k [filename] -lmm [num] -o [prefix]"
256 << endl;
257 cout << " to fit a univariate linear mixed model with different residual "
258 "weights for different individuals: "
259 << endl;
260 cout << " gemma -bfile [prefix] -weight [filename] -k [filename] "
261 "-lmm [num] -o [prefix]"
262 << endl;
263 cout << " gemma -g [filename] -p [filename] -a [filename] "
264 "-weight [filename] -k [filename] -lmm [num] -o [prefix]"
265 << endl;
266 cout << " to fit a multivariate linear mixed model: " << endl;
267 cout << " gemma -bfile [prefix] -k [filename] -lmm [num] -n "
268 "[pheno cols...] -o [prefix]"
269 << endl;
270 cout << " gemma -g [filename] -p [filename] -a [filename] -k "
271 "[filename] -lmm [num] -n [pheno cols...] -o [prefix]"
272 << endl;
273 cout << " to fit a Bayesian sparse linear mixed model: " << endl;
274 cout << " gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl;
275 cout << " gemma -g [filename] -p [filename] -a [filename] -bslmm "
276 "[num] -o [prefix]"
277 << endl;
278 cout << " to obtain predicted values: " << endl;
279 cout << " gemma -bfile [prefix] -epm [filename] -emu [filename] "
280 "-ebv [filename] -k [filename] -predict [num] -o [prefix]"
281 << endl;
282 cout << " gemma -g [filename] -p [filename] -epm [filename] -emu "
283 "[filename] -ebv [filename] -k [filename] -predict [num] -o "
284 "[prefix]"
285 << endl;
286 cout << " to calculate correlations between SNPs: " << endl;
287 cout << " gemma -bfile [prefix] -calccor -o [prefix]" << endl;
288 cout << " gemma -g [filename] -p [filename] -calccor -o [prefix]"
289 << endl;
290 cout << endl;
291 }
292
293 if (option == 2) {
294 cout << " FILE I/O RELATED OPTIONS" << endl;
295 cout << " -bfile [prefix] "
296 << " specify input PLINK binary ped file prefix." << endl;
297 cout << " requires: *.fam, *.bim and *.bed files" << endl;
298 cout << " missing value: -9" << endl;
299 cout << " -g [filename] "
300 << " specify input BIMBAM mean genotype file name" << endl;
301 cout << " format: rs#1, allele0, allele1, genotype for individual "
302 "1, genotype for individual 2, ..."
303 << endl;
304 cout << " rs#2, allele0, allele1, genotype for individual "
305 "1, genotype for individual 2, ..."
306 << endl;
307 cout << " ..." << endl;
308 cout << " missing value: NA" << endl;
309 cout << " -p [filename] "
310 << " specify input BIMBAM-style phenotype file name (when used with PLINK .fam phenotypes are ignored)" << endl;
311 cout << " format: phenotype for individual 1" << endl;
312 cout << " phenotype for individual 2" << endl;
313 cout << " ..." << endl;
314 cout << " missing value: NA" << endl;
315 cout << " -a [filename] "
316 << " specify input BIMBAM SNP annotation file name (optional)" << endl;
317 cout << " format: rs#1, base_position, chr_number" << endl;
318 cout << " rs#2, base_position, chr_number" << endl;
319 cout << " ..." << endl;
320
321 cout << " -gxe [filename] "
322 << " specify input file that contains a column of environmental "
323 "factor for g by e tests"
324 << endl;
325 cout << " format: variable for individual 1" << endl;
326 cout << " variable for individual 2" << endl;
327 cout << " ..." << endl;
328 cout << " missing value: NA" << endl;
329 cout << " -widv [filename] "
330 << " weight file contains a column of positive values to be used "
331 << "as weights for residuals---each weight corresponds to an "
332 << "individual, in which a high weight corresponds to high "
333 << "residual error variance for this individual (similar in "
334 << "format to phenotype file)"
335 << endl;
336 cout << " format: variable for individual 1" << endl;
337 cout << " variable for individual 2" << endl;
338 cout << " ..." << endl;
339 cout << " missing value: NA" << endl;
340 cout << " -k [filename] "
341 << " specify input kinship/relatedness matrix file name" << endl;
342 cout << " -mk [filename] "
343 << " specify input file which contains a list of kinship/relatedness "
344 "matrices"
345 << endl;
346 cout << " -u [filename] "
347 << " specify input file containing the eigen vectors of the "
348 "kinship/relatedness matrix"
349 << endl;
350 cout << " -d [filename] "
351 << " specify input file containing the eigen values of the "
352 "kinship/relatedness matrix"
353 << endl;
354 cout << " -c [filename] "
355 << " specify input covariates file name (optional)" << endl;
356 cout << " -cat [filename] "
357 << " specify input category file name (optional), which contains rs "
358 "cat1 cat2 ..."
359 << endl;
360 cout << " -beta [filename] "
361 << " specify input beta file name (optional), which contains rs beta "
362 "se_beta n_total (or n_mis and n_obs) estimates from a lm model"
363 << endl;
364 cout << " -cor [filename] "
365 << " specify input correlation file name (optional), which contains "
366 "rs window_size correlations from snps"
367 << endl;
368 cout << " missing value: NA" << endl;
369 cout << " note: the intercept (a column of 1s) may need to be "
370 "included"
371 << endl;
372 cout << " -epm [filename] "
373 << " specify input estimated parameter file name" << endl;
374 cout << " -en [n1] [n2] [n3] [n4] "
375 << " specify values for the input estimated parameter file (with a "
376 "header)"
377 << endl;
378 cout << " options: n1: rs column number" << endl;
379 cout << " n2: estimated alpha column number (0 to ignore)"
380 << endl;
381 cout << " n3: estimated beta column number (0 to ignore)"
382 << endl;
383 cout << " n4: estimated gamma column number (0 to ignore)"
384 << endl;
385 cout << " default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if "
386 "-ebv is specified"
387 << endl;
388 cout << " -ebv [filename] "
389 << " specify input estimated random effect (breeding value) file name"
390 << endl;
391 cout << " format: value for individual 1" << endl;
392 cout << " value for individual 2" << endl;
393 cout << " ..." << endl;
394 cout << " missing value: NA" << endl;
395 cout << " -emu [filename] "
396 << " specify input log file name containing estimated mean" << endl;
397 cout << " -mu [num] "
398 << " specify input estimated mean value" << endl;
399 cout << " -gene [filename] "
400 << " specify input gene expression file name" << endl;
401 cout << " format: header" << endl;
402 cout << " gene1, count for individual 1, count for "
403 "individual 2, ..."
404 << endl;
405 cout << " gene2, count for individual 1, count for "
406 "individual 2, ..."
407 << endl;
408 cout << " ..." << endl;
409 cout << " missing value: not allowed" << endl;
410 cout << " -r [filename] "
411 << " specify input total read count file name" << endl;
412 cout << " format: total read count for individual 1" << endl;
413 cout << " total read count for individual 2" << endl;
414 cout << " ..." << endl;
415 cout << " missing value: NA" << endl;
416 cout
417 << " -snps [filename] "
418 << " specify input snps file name to only analyze a certain set of snps"
419 << endl;
420 cout << " format: rs#1" << endl;
421 cout << " rs#2" << endl;
422 cout << " ..." << endl;
423 cout << " missing value: NA" << endl;
424 cout << " -silence "
425 << " silent terminal display" << endl;
426 cout << " -km [num] "
427 << " specify input kinship/relatedness file type (default 1)." << endl;
428 cout << " options: 1: \"n by n matrix\" format" << endl;
429 cout << " 2: \"id id value\" format" << endl;
430 cout << " -n [num] "
431 << " specify phenotype column in the phenotype/*.fam file (optional; "
432 "default 1)"
433 << endl;
434 cout << " -pace [num] "
435 << " specify terminal display update pace (default 1,000 SNPs or "
436 "1,000 iterations)."
437 << endl;
438 cout << " -outdir [path] "
439 << " specify output directory path (default \"./output/\")" << endl;
440 cout << " -o [prefix] "
441 << " specify output file prefix (default \"result\")" << endl;
442 cout << " output: prefix.cXX.txt or prefix.sXX.txt from "
443 "kinship/relatedness matrix estimation"
444 << endl;
445 cout << " output: prefix.assoc.txt and prefix.log.txt form "
446 "association tests"
447 << endl;
448 cout << endl;
449 }
450
451 if (option == 3) {
452 cout << " SNP QC OPTIONS" << endl;
453 cout << " -miss [num] "
454 << " specify missingness threshold (default 0.05)" << endl;
455 cout << " -maf [num] "
456 << " specify minor allele frequency threshold (default 0.01)" << endl;
457 cout << " -hwe [num] "
458 << " specify HWE test p value threshold (default 0; no test)" << endl;
459 cout << " -r2 [num] "
460 << " specify r-squared threshold (default 0.9999)" << endl;
461 cout << " -notsnp "
462 << " minor allele frequency cutoff is not used" << endl;
463 cout << endl;
464 }
465
466 if (option == 4) {
467 cout << " RELATEDNESS MATRIX (K) CALCULATION OPTIONS" << endl;
468 cout << " -ksnps [filename] "
469 << " specify input snps file name to compute K" << endl;
470 cout << " -loco [chr] "
471 << " leave one chromosome out (LOCO) by name (requires -a annotation "
472 "file)"
473 << endl;
474 cout << " -a [filename] "
475 << " specify input BIMBAM SNP annotation file name (LOCO only)"
476 << endl;
477 cout << " -gk [num] "
478 << " specify which type of kinship/relatedness matrix to generate "
479 "(default 1)"
480 << endl;
481 cout << " options: 1: centered XX^T/p" << endl;
482 cout << " 2: standardized XX^T/p" << endl;
483 cout << " note: non-polymorphic SNPs are excluded " << endl;
484 cout << endl;
485 }
486
487 if (option == 5) {
488 cout << " EIGEN-DECOMPOSITION OPTIONS" << endl;
489 cout << " -eigen "
490 << " specify to perform eigen decomposition of the loaded relatedness "
491 "matrix"
492 << endl;
493 cout << endl;
494 }
495
496 if (option == 6) {
497 cout << " VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
498 cout << " -vc "
499 << " specify to perform variance component estimation for the loaded "
500 "relatedness matrix/matrices"
501 << endl;
502 cout
503 << " options (with kinship file): 1: HE regression (default)"
504 << endl;
505 cout << " 2: REML" << endl;
506 cout << " options (with beta/cor files): 1: Centered genotypes "
507 "(default)"
508 << endl;
509 cout << " 2: Standardized genotypes"
510 << endl;
511 cout << " -crt -windowbp [num]"
512 << " specify the window size based on bp (default 1000000; 1Mb)"
513 << endl;
514 cout << " -crt -windowcm [num]"
515 << " specify the window size based on cm (default 0)" << endl;
516 cout << " -crt -windowns [num]"
517 << " specify the window size based on number of snps (default 0)"
518 << endl;
519 cout << endl;
520 }
521
522 if (option == 7) {
523 cout << " LINEAR MODEL OPTIONS" << endl;
524 cout << " -lm [num] "
525 << " specify analysis options (default 1)." << endl;
526 cout << " options: 1: Wald test" << endl;
527 cout << " 2: Likelihood ratio test" << endl;
528 cout << " 3: Score test" << endl;
529 cout << " 4: 1-3" << endl;
530 cout << endl;
531 }
532
533 if (option == 8) {
534 cout << " LINEAR MIXED MODEL OPTIONS" << endl;
535 cout << " -lmm [num] "
536 << " specify analysis options (default 1)." << endl;
537 cout << " options: 1: Wald test" << endl;
538 cout << " 2: Likelihood ratio test" << endl;
539 cout << " 3: Score test" << endl;
540 cout << " 4: 1-3" << endl;
541 cout << " 5: Parameter estimation in the null model only"
542 << endl;
543 cout << " -lmin [num] "
544 << " specify minimal value for lambda (default 1e-5)" << endl;
545 cout << " -lmax [num] "
546 << " specify maximum value for lambda (default 1e+5)" << endl;
547 cout
548 << " -region [num] "
549 << " specify the number of regions used to evaluate lambda (default 10)"
550 << endl;
551 cout << " -loco [chr] "
552 << " leave one chromosome out (LOCO) by name (requires -a annotation "
553 "file)"
554 << endl;
555 cout << endl;
556 }
557
558 if (option == 9) {
559 cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
560 cout << " -n [pheno cols...] - range of phenotypes" << endl;
561 cout << " -pnr "
562 << " specify the pvalue threshold to use the Newton-Raphson's method "
563 "(default 0.001)"
564 << endl;
565 cout << " -emi "
566 << " specify the maximum number of iterations for the PX-EM method in "
567 "the null (default 10000)"
568 << endl;
569 cout << " -nri "
570 << " specify the maximum number of iterations for the "
571 "Newton-Raphson's method in the null (default 100)"
572 << endl;
573 cout << " -emp "
574 << " specify the precision for the PX-EM method in the null (default "
575 "0.0001)"
576 << endl;
577 cout << " -nrp "
578 << " specify the precision for the Newton-Raphson's method in the "
579 "null (default 0.0001)"
580 << endl;
581 cout << " -crt "
582 << " specify to output corrected pvalues for these pvalues that are "
583 "below the -pnr threshold"
584 << endl;
585 cout << endl;
586 }
587
588 if (option == 10) {
589 cout << " MULTI-LOCUS ANALYSIS OPTIONS" << endl;
590 cout << " -bslmm [num] "
591 << " specify analysis options (default 1)." << endl;
592 cout << " options: 1: BSLMM" << endl;
593 cout << " 2: standard ridge regression/GBLUP (no mcmc)"
594 << endl;
595 cout << " 3: probit BSLMM (requires 0/1 phenotypes)"
596 << endl;
597 cout
598 << " 4: BSLMM with DAP for Hyper Parameter Estimation"
599 << endl;
600 cout << " 5: BSLMM with DAP for Fine Mapping" << endl;
601
602 cout << " -ldr [num] "
603 << " specify analysis options (default 1)." << endl;
604 cout << " options: 1: LDR" << endl;
605
606 cout << " MCMC OPTIONS" << endl;
607 cout << " Prior" << endl;
608 cout << " -hmin [num] "
609 << " specify minimum value for h (default 0)" << endl;
610 cout << " -hmax [num] "
611 << " specify maximum value for h (default 1)" << endl;
612 cout << " -rmin [num] "
613 << " specify minimum value for rho (default 0)" << endl;
614 cout << " -rmax [num] "
615 << " specify maximum value for rho (default 1)" << endl;
616 cout << " -pmin [num] "
617 << " specify minimum value for log10(pi) (default log10(1/p), where p "
618 "is the number of analyzed SNPs )"
619 << endl;
620 cout << " -pmax [num] "
621 << " specify maximum value for log10(pi) (default log10(1) )" << endl;
622 cout << " -smin [num] "
623 << " specify minimum value for |gamma| (default 0)" << endl;
624 cout << " -smax [num] "
625 << " specify maximum value for |gamma| (default 300)" << endl;
626
627 cout << " Proposal" << endl;
628 cout << " -gmean [num] "
629 << " specify the mean for the geometric distribution (default: 2000)"
630 << endl;
631 cout << " -hscale [num] "
632 << " specify the step size scale for the proposal distribution of h "
633 "(value between 0 and 1, default min(10/sqrt(n),1) )"
634 << endl;
635 cout << " -rscale [num] "
636 << " specify the step size scale for the proposal distribution of rho "
637 "(value between 0 and 1, default min(10/sqrt(n),1) )"
638 << endl;
639 cout << " -pscale [num] "
640 << " specify the step size scale for the proposal distribution of "
641 "log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )"
642 << endl;
643
644 cout << " Others" << endl;
645 cout << " -w [num] "
646 << " specify burn-in steps (default 100,000)" << endl;
647 cout << " -s [num] "
648 << " specify sampling steps (default 1,000,000)" << endl;
649 cout << " -rpace [num] "
650 << " specify recording pace, record one state in every [num] steps "
651 "(default 10)"
652 << endl;
653 cout << " -wpace [num] "
654 << " specify writing pace, write values down in every [num] recorded "
655 "steps (default 1000)"
656 << endl;
657 cout << " -seed [num] "
658 << " specify random seed (a random seed is generated by default)"
659 << endl;
660 cout << " -mh [num] "
661 << " specify number of MH steps in each iteration (default 10)"
662 << endl;
663 cout << " requires: 0/1 phenotypes and -bslmm 3 option" << endl;
664 cout << endl;
665 }
666
667 if (option == 11) {
668 cout << " PREDICTION OPTIONS" << endl;
669 cout << " -predict [num] "
670 << " specify prediction options (default 1)." << endl;
671 cout << " options: 1: predict for individuals with missing "
672 "phenotypes"
673 << endl;
674 cout << " 2: predict for individuals with missing "
675 "phenotypes, and convert the predicted values to probability "
676 "scale. Use only for files fitted with -bslmm 3 option"
677 << endl;
678 cout << endl;
679 }
680
681 if (option == 12) {
682 cout << " CALC CORRELATION OPTIONS" << endl;
683 cout << " -calccor " << endl;
684 cout << " -windowbp [num] "
685 << " specify the window size based on bp (default 1000000; 1Mb)"
686 << endl;
687 cout << " -windowcm [num] "
688 << " specify the window size based on cm (default 0; not used)"
689 << endl;
690 cout << " -windowns [num] "
691 << " specify the window size based on number of snps (default 0; not "
692 "used)"
693 << endl;
694 cout << endl;
695 }
696
697 if (option == 13) {
698 cout << " NOTE" << endl;
699 cout << " 1. Only individuals with non-missing phenotoypes and covariates "
700 "will be analyzed."
701 << endl;
702 cout << " 2. Missing genotoypes will be repalced with the mean genotype of "
703 "that SNP."
704 << endl;
705 cout << " 3. For lmm analysis, memory should be large enough to hold the "
706 "relatedness matrix and to perform eigen decomposition."
707 << endl;
708 cout << " 4. For multivariate lmm analysis, use a large -pnr for each snp "
709 "will increase computation time dramatically."
710 << endl;
711 cout << " 5. For bslmm analysis, in addition to 3, memory should be large "
712 "enough to hold the whole genotype matrix."
713 << endl;
714 cout << endl;
715 }
716
717 if (option == 14) {
718 cout << " DEBUG OPTIONS" << endl;
719 cout << " -check enable checks (slower)" << endl;
720 cout << " -no-fpe-check disable hardware floating point checking" << endl;
721 cout << " -strict strict mode will stop when there is a problem" << endl;
722 cout << " -silence silent terminal display" << endl;
723 cout << " -debug debug output" << endl;
724 cout << " -debug-data debug data output" << endl;
725 cout << " -nind [num] read up to num individuals" << endl;
726 cout << " -issue [num] enable tests relevant to issue tracker" << endl;
727 cout << " -legacy run gemma in legacy mode" << endl;
728 cout << endl;
729 }
730
731 cout << "The GEMMA software is distributed under the GNU General Public v3" << endl;
732 cout << " -license show license information" << endl;
733 cout <<
734 " see also http://www.xzlab.org/software.html, https://github.com/genetics-statistics" << endl;
735 return;
736 }
737
738 // OPTIONS
739 // -------
740 // gk: 21-22
741 // gs: 25-26
742 // gq: 27-28
743 // eigen: 31-32
744 // lmm: 1-5
745 // bslmm: 11-15
746 // predict: 41-43
747 // lm: 51
748 // vc: 61
749 // ci: 66-67
750 // calccor: 71
751 // gw: 72
752
Assign(int argc,char ** argv,PARAM & cPar)753 void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
754 string str;
755
756 for (int i = 1; i < argc; i++) {
757 if (strcmp(argv[i], "-bfile") == 0 || strcmp(argv[i], "--bfile") == 0 ||
758 strcmp(argv[i], "-b") == 0) {
759 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
760 continue;
761 }
762 ++i;
763 str.clear();
764 str.assign(argv[i]);
765 cPar.file_bfile = str;
766 } else if (strcmp(argv[i], "-mbfile") == 0 ||
767 strcmp(argv[i], "--mbfile") == 0 ||
768 strcmp(argv[i], "-mb") == 0) {
769 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
770 continue;
771 }
772 ++i;
773 str.clear();
774 str.assign(argv[i]);
775 cPar.file_mbfile = str;
776 } else if (strcmp(argv[i], "-silence") == 0 || strcmp(argv[i], "--quiet") == 0) {
777 debug_set_quiet_mode(true);
778 } else if (strcmp(argv[i], "-g") == 0) {
779 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
780 continue;
781 }
782 ++i;
783 str.clear();
784 str.assign(argv[i]);
785 cPar.file_geno = str;
786 } else if (strcmp(argv[i], "-mg") == 0) {
787 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
788 continue;
789 }
790 ++i;
791 str.clear();
792 str.assign(argv[i]);
793 cPar.file_mgeno = str;
794 } else if (strcmp(argv[i], "-p") == 0) {
795 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
796 continue;
797 }
798 ++i;
799 str.clear();
800 str.assign(argv[i]);
801 cPar.file_pheno = str;
802 } else if (strcmp(argv[i], "-a") == 0) {
803 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
804 continue;
805 }
806 ++i;
807 str.clear();
808 str.assign(argv[i]);
809 cPar.file_anno = str;
810 } else if (strcmp(argv[i], "-gxe") == 0) {
811 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
812 continue;
813 }
814 ++i;
815 str.clear();
816 str.assign(argv[i]);
817 cPar.file_gxe = str;
818 } else if (strcmp(argv[i], "-widv") == 0) {
819 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
820 continue;
821 }
822 ++i;
823 str.clear();
824 str.assign(argv[i]);
825 cPar.file_weight = str;
826 } else if (strcmp(argv[i], "-wsnp") == 0) {
827 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
828 continue;
829 }
830 ++i;
831 str.clear();
832 str.assign(argv[i]);
833 cPar.file_wsnp = str;
834 } else if (strcmp(argv[i], "-wcat") == 0) {
835 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
836 continue;
837 }
838 ++i;
839 str.clear();
840 str.assign(argv[i]);
841 cPar.file_wcat = str;
842 } else if (strcmp(argv[i], "-k") == 0) {
843 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
844 continue;
845 }
846 ++i;
847 str.clear();
848 str.assign(argv[i]);
849 cPar.file_kin = str;
850 } else if (strcmp(argv[i], "-mk") == 0) {
851 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
852 continue;
853 }
854 ++i;
855 str.clear();
856 str.assign(argv[i]);
857 cPar.file_mk = str;
858 } else if (strcmp(argv[i], "-u") == 0) {
859 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
860 continue;
861 }
862 ++i;
863 str.clear();
864 str.assign(argv[i]);
865 cPar.file_ku = str;
866 } else if (strcmp(argv[i], "-d") == 0) {
867 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
868 continue;
869 }
870 ++i;
871 str.clear();
872 str.assign(argv[i]);
873 cPar.file_kd = str;
874 } else if (strcmp(argv[i], "-c") == 0) {
875 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
876 continue;
877 }
878 ++i;
879 str.clear();
880 str.assign(argv[i]);
881 cPar.file_cvt = str;
882 } else if (strcmp(argv[i], "-cat") == 0) {
883 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
884 continue;
885 }
886 ++i;
887 str.clear();
888 str.assign(argv[i]);
889 cPar.file_cat = str;
890 } else if (strcmp(argv[i], "-mcat") == 0) {
891 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
892 continue;
893 }
894 ++i;
895 str.clear();
896 str.assign(argv[i]);
897 cPar.file_mcat = str;
898 } else if (strcmp(argv[i], "-catc") == 0) {
899 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
900 continue;
901 }
902 ++i;
903 str.clear();
904 str.assign(argv[i]);
905 cPar.file_catc = str;
906 } else if (strcmp(argv[i], "-mcatc") == 0) {
907 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
908 continue;
909 }
910 ++i;
911 str.clear();
912 str.assign(argv[i]);
913 cPar.file_mcatc = str;
914 } else if (strcmp(argv[i], "-beta") == 0) {
915 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
916 continue;
917 }
918 ++i;
919 str.clear();
920 str.assign(argv[i]);
921 cPar.file_beta = str;
922 } else if (strcmp(argv[i], "-bf") == 0) {
923 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
924 continue;
925 }
926 ++i;
927 str.clear();
928 str.assign(argv[i]);
929 cPar.file_bf = str;
930 } else if (strcmp(argv[i], "-hyp") == 0) {
931 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
932 continue;
933 }
934 ++i;
935 str.clear();
936 str.assign(argv[i]);
937 cPar.file_hyp = str;
938 } else if (strcmp(argv[i], "-cor") == 0) {
939 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
940 continue;
941 }
942 ++i;
943 str.clear();
944 str.assign(argv[i]);
945 cPar.file_cor = str;
946 } else if (strcmp(argv[i], "-study") == 0) {
947 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
948 continue;
949 }
950 ++i;
951 str.clear();
952 str.assign(argv[i]);
953 cPar.file_study = str;
954 } else if (strcmp(argv[i], "-ref") == 0) {
955 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
956 continue;
957 }
958 ++i;
959 str.clear();
960 str.assign(argv[i]);
961 cPar.file_ref = str;
962 } else if (strcmp(argv[i], "-mstudy") == 0) {
963 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
964 continue;
965 }
966 ++i;
967 str.clear();
968 str.assign(argv[i]);
969 cPar.file_mstudy = str;
970 } else if (strcmp(argv[i], "-mref") == 0) {
971 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
972 continue;
973 }
974 ++i;
975 str.clear();
976 str.assign(argv[i]);
977 cPar.file_mref = str;
978 } else if (strcmp(argv[i], "-epm") == 0) {
979 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
980 continue;
981 }
982 ++i;
983 str.clear();
984 str.assign(argv[i]);
985 cPar.file_epm = str;
986 } else if (strcmp(argv[i], "-en") == 0) {
987 while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
988 ++i;
989 str.clear();
990 str.assign(argv[i]);
991 cPar.est_column.push_back(atoi(str.c_str()));
992 }
993 } else if (strcmp(argv[i], "-ebv") == 0) {
994 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
995 continue;
996 }
997 ++i;
998 str.clear();
999 str.assign(argv[i]);
1000 cPar.file_ebv = str;
1001 } else if (strcmp(argv[i], "-emu") == 0) {
1002 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1003 continue;
1004 }
1005 ++i;
1006 str.clear();
1007 str.assign(argv[i]);
1008 cPar.file_log = str;
1009 } else if (strcmp(argv[i], "-mu") == 0) {
1010 if (argv[i + 1] == NULL) {
1011 continue;
1012 }
1013 ++i;
1014 str.clear();
1015 str.assign(argv[i]);
1016 cPar.pheno_mean = atof(str.c_str());
1017 } else if (strcmp(argv[i], "-gene") == 0) {
1018 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1019 continue;
1020 }
1021 ++i;
1022 str.clear();
1023 str.assign(argv[i]);
1024 cPar.file_gene = str;
1025 } else if (strcmp(argv[i], "-r") == 0) {
1026 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1027 continue;
1028 }
1029 ++i;
1030 str.clear();
1031 str.assign(argv[i]);
1032 cPar.file_read = str;
1033 } else if (strcmp(argv[i], "-snps") == 0) {
1034 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1035 continue;
1036 }
1037 ++i;
1038 str.clear();
1039 str.assign(argv[i]);
1040 cPar.file_snps = str;
1041 } else if (strcmp(argv[i], "-km") == 0) {
1042 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1043 continue;
1044 }
1045 ++i;
1046 str.clear();
1047 str.assign(argv[i]);
1048 cPar.k_mode = atoi(str.c_str());
1049 } else if (strcmp(argv[i], "-n") == 0) { // set pheno column (list/range)
1050 (cPar.p_column).clear();
1051 while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
1052 ++i;
1053 str.clear();
1054 str.assign(argv[i]);
1055 (cPar.p_column).push_back(atoi(str.c_str()));
1056 }
1057 } else if (strcmp(argv[i], "-pace") == 0) {
1058 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1059 continue;
1060 }
1061 ++i;
1062 str.clear();
1063 str.assign(argv[i]);
1064 cPar.d_pace = atoi(str.c_str());
1065 } else if (strcmp(argv[i], "-outdir") == 0) {
1066 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1067 continue;
1068 }
1069 ++i;
1070 str.clear();
1071 str.assign(argv[i]);
1072 cPar.path_out = str;
1073 } else if (strcmp(argv[i], "-o") == 0) {
1074 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1075 continue;
1076 }
1077 ++i;
1078 str.clear();
1079 str.assign(argv[i]);
1080 cPar.file_out = str;
1081 } else if (strcmp(argv[i], "-miss") == 0) {
1082 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1083 continue;
1084 }
1085 ++i;
1086 str.clear();
1087 str.assign(argv[i]);
1088 cPar.miss_level = atof(str.c_str());
1089 } else if (strcmp(argv[i], "-maf") == 0) {
1090 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1091 continue;
1092 }
1093 ++i;
1094 str.clear();
1095 str.assign(argv[i]);
1096 if (cPar.maf_level != -1) {
1097 cPar.maf_level = atof(str.c_str());
1098 }
1099 } else if (strcmp(argv[i], "-hwe") == 0) {
1100 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1101 continue;
1102 }
1103 ++i;
1104 str.clear();
1105 str.assign(argv[i]);
1106 cPar.hwe_level = atof(str.c_str());
1107 } else if (strcmp(argv[i], "-r2") == 0) {
1108 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1109 continue;
1110 }
1111 ++i;
1112 str.clear();
1113 str.assign(argv[i]);
1114 cPar.r2_level = atof(str.c_str());
1115 } else if (strcmp(argv[i], "-notsnp") == 0) {
1116 cPar.maf_level = -1;
1117 } else if (strcmp(argv[i], "-loco") == 0) {
1118 assert(argv[i + 1]);
1119 ++i;
1120 str.clear();
1121 str.assign(argv[i]);
1122 cPar.loco = str;
1123 } else if (strcmp(argv[i], "-gk") == 0) {
1124 if (cPar.a_mode != 0) {
1125 cPar.error = true;
1126 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1127 "-predict -calccor options is allowed."
1128 << endl;
1129 break;
1130 }
1131 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1132 cPar.a_mode = M_KIN; // default
1133 continue;
1134 }
1135 ++i;
1136 str.clear();
1137 str.assign(argv[i]);
1138 cPar.a_mode = 20 + atoi(str.c_str());
1139 } else if (strcmp(argv[i], "-gs") == 0) {
1140 if (cPar.a_mode != 0) {
1141 cPar.error = true;
1142 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1143 "-predict -calccor options is allowed."
1144 << endl;
1145 break;
1146 }
1147 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1148 cPar.a_mode = 25;
1149 continue;
1150 }
1151 ++i;
1152 str.clear();
1153 str.assign(argv[i]);
1154 cPar.a_mode = 24 + atoi(str.c_str());
1155 } else if (strcmp(argv[i], "-gq") == 0) {
1156 if (cPar.a_mode != 0) {
1157 cPar.error = true;
1158 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1159 "-predict -calccor options is allowed."
1160 << endl;
1161 break;
1162 }
1163 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1164 cPar.a_mode = 27;
1165 continue;
1166 }
1167 ++i;
1168 str.clear();
1169 str.assign(argv[i]);
1170 cPar.a_mode = 26 + atoi(str.c_str());
1171 } else if (strcmp(argv[i], "-gw") == 0) {
1172 if (cPar.a_mode != 0) {
1173 cPar.error = true;
1174 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1175 "-predict -calccor options is allowed."
1176 << endl;
1177 break;
1178 }
1179 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1180 cPar.a_mode = 72;
1181 continue;
1182 }
1183 ++i;
1184 str.clear();
1185 str.assign(argv[i]);
1186 cPar.a_mode = 71 + atoi(str.c_str());
1187 } else if (strcmp(argv[i], "-sample") == 0) {
1188 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1189 continue;
1190 }
1191 ++i;
1192 str.clear();
1193 str.assign(argv[i]);
1194 cPar.ni_subsample = atoi(str.c_str());
1195 } else if (strcmp(argv[i], "-eigen") == 0) {
1196 if (cPar.a_mode != 0) {
1197 cPar.error = true;
1198 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1199 "-predict -calccor options is allowed."
1200 << endl;
1201 break;
1202 }
1203 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1204 cPar.a_mode = 31;
1205 continue;
1206 }
1207 ++i;
1208 str.clear();
1209 str.assign(argv[i]);
1210 cPar.a_mode = 30 + atoi(str.c_str());
1211 } else if (strcmp(argv[i], "-calccor") == 0) {
1212 if (cPar.a_mode != 0) {
1213 cPar.error = true;
1214 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1215 "-predict -calccor options is allowed."
1216 << endl;
1217 break;
1218 }
1219 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1220 cPar.a_mode = 71;
1221 continue;
1222 }
1223 ++i;
1224 str.clear();
1225 str.assign(argv[i]);
1226 cPar.a_mode = 70 + atoi(str.c_str());
1227 } else if (strcmp(argv[i], "-vc") == 0) {
1228 if (cPar.a_mode != 0) {
1229 cPar.error = true;
1230 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1231 "-predict -calccor options is allowed."
1232 << endl;
1233 break;
1234 }
1235 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1236 cPar.a_mode = 61;
1237 continue;
1238 }
1239 ++i;
1240 str.clear();
1241 str.assign(argv[i]);
1242 cPar.a_mode = 60 + atoi(str.c_str());
1243 } else if (strcmp(argv[i], "-ci") == 0) {
1244 if (cPar.a_mode != 0) {
1245 cPar.error = true;
1246 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1247 "-predict -calccor options is allowed."
1248 << endl;
1249 break;
1250 }
1251 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1252 cPar.a_mode = 66;
1253 continue;
1254 }
1255 ++i;
1256 str.clear();
1257 str.assign(argv[i]);
1258 cPar.a_mode = 65 + atoi(str.c_str());
1259 } else if (strcmp(argv[i], "-pve") == 0) {
1260 double s = 0;
1261 while (argv[i + 1] != NULL &&
1262 (argv[i + 1][0] != '-' || !isalpha(argv[i + 1][1]))) {
1263 ++i;
1264 str.clear();
1265 str.assign(argv[i]);
1266 cPar.v_pve.push_back(atof(str.c_str()));
1267 s += atof(str.c_str());
1268 }
1269 if (s == 1) {
1270 cout << "summation of pve equals one." << endl;
1271 }
1272 } else if (strcmp(argv[i], "-blocks") == 0) {
1273 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1274 continue;
1275 }
1276 ++i;
1277 str.clear();
1278 str.assign(argv[i]);
1279 cPar.n_block = atoi(str.c_str());
1280 } else if (strcmp(argv[i], "-noconstrain") == 0) {
1281 cPar.noconstrain = true;
1282 } else if (strcmp(argv[i], "-lm") == 0) {
1283 if (cPar.a_mode != 0) {
1284 cPar.error = true;
1285 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1286 "-predict -calccor options is allowed."
1287 << endl;
1288 break;
1289 }
1290 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1291 cPar.a_mode = 51;
1292 continue;
1293 }
1294 ++i;
1295 str.clear();
1296 str.assign(argv[i]);
1297 cPar.a_mode = 50 + atoi(str.c_str());
1298 } else if (strcmp(argv[i], "-fa") == 0 || strcmp(argv[i], "-lmm") == 0) {
1299 if (cPar.a_mode != 0) {
1300 cPar.error = true;
1301 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1302 "-predict -calccor options is allowed."
1303 << endl;
1304 break;
1305 }
1306 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1307 cPar.a_mode = 1;
1308 continue;
1309 }
1310 ++i;
1311 str.clear();
1312 str.assign(argv[i]);
1313 cPar.a_mode = atoi(str.c_str());
1314 } else if (strcmp(argv[i], "-lmin") == 0) {
1315 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1316 continue;
1317 }
1318 ++i;
1319 str.clear();
1320 str.assign(argv[i]);
1321 cPar.l_min = atof(str.c_str());
1322 } else if (strcmp(argv[i], "-lmax") == 0) {
1323 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1324 continue;
1325 }
1326 ++i;
1327 str.clear();
1328 str.assign(argv[i]);
1329 cPar.l_max = atof(str.c_str());
1330 } else if (strcmp(argv[i], "-region") == 0) {
1331 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1332 continue;
1333 }
1334 ++i;
1335 str.clear();
1336 str.assign(argv[i]);
1337 cPar.n_region = atoi(str.c_str());
1338 } else if (strcmp(argv[i], "-pnr") == 0) {
1339 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1340 continue;
1341 }
1342 ++i;
1343 str.clear();
1344 str.assign(argv[i]);
1345 cPar.p_nr = atof(str.c_str());
1346 } else if (strcmp(argv[i], "-emi") == 0) {
1347 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1348 continue;
1349 }
1350 ++i;
1351 str.clear();
1352 str.assign(argv[i]);
1353 cPar.em_iter = atoi(str.c_str());
1354 } else if (strcmp(argv[i], "-nri") == 0) {
1355 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1356 continue;
1357 }
1358 ++i;
1359 str.clear();
1360 str.assign(argv[i]);
1361 cPar.nr_iter = atoi(str.c_str());
1362 } else if (strcmp(argv[i], "-nind") == 0) {
1363 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1364 continue;
1365 }
1366 ++i;
1367 str.clear();
1368 str.assign(argv[i]);
1369 cPar.ni_max = atoi(str.c_str()); // for testing purposes
1370 enforce(cPar.ni_max > 0);
1371 } else if (strcmp(argv[i], "-issue") == 0) {
1372 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1373 continue;
1374 }
1375 ++i;
1376 str.clear();
1377 str.assign(argv[i]);
1378 auto issue = atoi(str.c_str()); // for testing purposes
1379 enforce(issue > 0);
1380 debug_set_issue(issue);
1381 } else if (strcmp(argv[i], "-emp") == 0) {
1382 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1383 continue;
1384 }
1385 ++i;
1386 str.clear();
1387 str.assign(argv[i]);
1388 cPar.em_prec = atof(str.c_str());
1389 } else if (strcmp(argv[i], "-nrp") == 0) {
1390 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1391 continue;
1392 }
1393 ++i;
1394 str.clear();
1395 str.assign(argv[i]);
1396 cPar.nr_prec = atof(str.c_str());
1397 } else if (strcmp(argv[i], "-crt") == 0) {
1398 cPar.crt = 1;
1399 } else if (strcmp(argv[i], "-bslmm") == 0) {
1400 if (cPar.a_mode != 0) {
1401 cPar.error = true;
1402 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1403 "-predict -calccor options is allowed."
1404 << endl;
1405 break;
1406 }
1407 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1408 cPar.a_mode = 11;
1409 continue;
1410 }
1411 ++i;
1412 str.clear();
1413 str.assign(argv[i]);
1414 cPar.a_mode = 10 + atoi(str.c_str());
1415 } else if (strcmp(argv[i], "-hmin") == 0) {
1416 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1417 continue;
1418 }
1419 ++i;
1420 str.clear();
1421 str.assign(argv[i]);
1422 cPar.h_min = atof(str.c_str());
1423 } else if (strcmp(argv[i], "-hmax") == 0) {
1424 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1425 continue;
1426 }
1427 ++i;
1428 str.clear();
1429 str.assign(argv[i]);
1430 cPar.h_max = atof(str.c_str());
1431 } else if (strcmp(argv[i], "-rmin") == 0) {
1432 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1433 continue;
1434 }
1435 ++i;
1436 str.clear();
1437 str.assign(argv[i]);
1438 cPar.rho_min = atof(str.c_str());
1439 } else if (strcmp(argv[i], "-rmax") == 0) {
1440 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1441 continue;
1442 }
1443 ++i;
1444 str.clear();
1445 str.assign(argv[i]);
1446 cPar.rho_max = atof(str.c_str());
1447 } else if (strcmp(argv[i], "-pmin") == 0) {
1448 if (argv[i + 1] == NULL) {
1449 continue;
1450 }
1451 ++i;
1452 str.clear();
1453 str.assign(argv[i]);
1454 cPar.logp_min = atof(str.c_str()) * log(10.0);
1455 } else if (strcmp(argv[i], "-pmax") == 0) {
1456 if (argv[i + 1] == NULL) {
1457 continue;
1458 }
1459 ++i;
1460 str.clear();
1461 str.assign(argv[i]);
1462 cPar.logp_max = atof(str.c_str()) * log(10.0);
1463 } else if (strcmp(argv[i], "-smin") == 0) {
1464 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1465 continue;
1466 }
1467 ++i;
1468 str.clear();
1469 str.assign(argv[i]);
1470 cPar.s_min = atoi(str.c_str());
1471 } else if (strcmp(argv[i], "-smax") == 0) {
1472 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1473 continue;
1474 }
1475 ++i;
1476 str.clear();
1477 str.assign(argv[i]);
1478 cPar.s_max = atoi(str.c_str());
1479 } else if (strcmp(argv[i], "-gmean") == 0) {
1480 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1481 continue;
1482 }
1483 ++i;
1484 str.clear();
1485 str.assign(argv[i]);
1486 cPar.geo_mean = atof(str.c_str());
1487 } else if (strcmp(argv[i], "-hscale") == 0) {
1488 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1489 continue;
1490 }
1491 ++i;
1492 str.clear();
1493 str.assign(argv[i]);
1494 cPar.h_scale = atof(str.c_str());
1495 } else if (strcmp(argv[i], "-rscale") == 0) {
1496 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1497 continue;
1498 }
1499 ++i;
1500 str.clear();
1501 str.assign(argv[i]);
1502 cPar.rho_scale = atof(str.c_str());
1503 } else if (strcmp(argv[i], "-pscale") == 0) {
1504 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1505 continue;
1506 }
1507 ++i;
1508 str.clear();
1509 str.assign(argv[i]);
1510 cPar.logp_scale = atof(str.c_str()) * log(10.0);
1511 } else if (strcmp(argv[i], "-w") == 0) {
1512 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1513 continue;
1514 }
1515 ++i;
1516 str.clear();
1517 str.assign(argv[i]);
1518 cPar.w_step = atoi(str.c_str());
1519 } else if (strcmp(argv[i], "-s") == 0) {
1520 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1521 continue;
1522 }
1523 ++i;
1524 str.clear();
1525 str.assign(argv[i]);
1526 cPar.s_step = atoi(str.c_str());
1527 } else if (strcmp(argv[i], "-rpace") == 0) {
1528 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1529 continue;
1530 }
1531 ++i;
1532 str.clear();
1533 str.assign(argv[i]);
1534 cPar.r_pace = atoi(str.c_str());
1535 } else if (strcmp(argv[i], "-wpace") == 0) {
1536 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1537 continue;
1538 }
1539 ++i;
1540 str.clear();
1541 str.assign(argv[i]);
1542 cPar.w_pace = atoi(str.c_str());
1543 } else if (strcmp(argv[i], "-seed") == 0) {
1544 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1545 continue;
1546 }
1547 ++i;
1548 str.clear();
1549 str.assign(argv[i]);
1550 cPar.randseed = atol(str.c_str());
1551 } else if (strcmp(argv[i], "-mh") == 0) {
1552 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1553 continue;
1554 }
1555 ++i;
1556 str.clear();
1557 str.assign(argv[i]);
1558 cPar.n_mh = atoi(str.c_str());
1559 } else if (strcmp(argv[i], "-predict") == 0) {
1560 if (cPar.a_mode != 0) {
1561 cPar.error = true;
1562 cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
1563 "-predict -calccor options is allowed."
1564 << endl;
1565 break;
1566 }
1567 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1568 cPar.a_mode = 41;
1569 continue;
1570 }
1571 ++i;
1572 str.clear();
1573 str.assign(argv[i]);
1574 cPar.a_mode = 40 + atoi(str.c_str());
1575 } else if (strcmp(argv[i], "-windowcm") == 0) {
1576 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1577 continue;
1578 }
1579 ++i;
1580 str.clear();
1581 str.assign(argv[i]);
1582 cPar.window_cm = atof(str.c_str());
1583 } else if (strcmp(argv[i], "-windowbp") == 0) {
1584 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1585 continue;
1586 }
1587 ++i;
1588 str.clear();
1589 str.assign(argv[i]);
1590 cPar.window_bp = atoi(str.c_str());
1591 } else if (strcmp(argv[i], "-windowns") == 0) {
1592 if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
1593 continue;
1594 }
1595 ++i;
1596 str.clear();
1597 str.assign(argv[i]);
1598 cPar.window_ns = atoi(str.c_str());
1599 } else if (strcmp(argv[i], "-debug-data") == 0) {
1600 // cPar.mode_debug = true;
1601 debug_set_debug_data_mode(true);
1602 debug_set_debug_mode(true);
1603 } else if (strcmp(argv[i], "-debug") == 0) {
1604 // cPar.mode_debug = true;
1605 debug_set_debug_mode(true);
1606 } else if (strcmp(argv[i], "-check") == 0) {
1607 // cPar.mode_check = false;
1608 debug_set_check_mode(true);
1609 } else if (strcmp(argv[i], "-no-check") == 0) {
1610 // cPar.mode_check = false;
1611 debug_set_no_check_mode(true);
1612 } else if (strcmp(argv[i], "-no-fpe-check") == 0) {
1613 // cPar.mode_check = false;
1614 debug_set_no_fpe_check_mode(true);
1615 } else if (strcmp(argv[i], "-strict") == 0) {
1616 // cPar.mode_strict = true;
1617 debug_set_strict_mode(true);
1618 debug_set_debug_mode(true);
1619 } else if (strcmp(argv[i], "-legacy") == 0) {
1620 debug_set_legacy_mode(true);
1621 warning_msg("you are running in legacy mode - support may drop in future versions of gemma");
1622 } else {
1623 cout << "error! unrecognized option: " << argv[i] << endl;
1624 cPar.error = true;
1625 continue;
1626 }
1627 }
1628
1629 // Change prediction mode to 43 if the epm file is not provided.
1630 if (cPar.a_mode == 41 && cPar.file_epm.empty()) {
1631 cPar.a_mode = 43;
1632 }
1633
1634 return;
1635 }
1636
BatchRun(PARAM & cPar)1637 void GEMMA::BatchRun(PARAM &cPar) {
1638 clock_t time_begin, time_start;
1639 time_begin = clock();
1640
1641 if (is_check_mode()) enable_segfpe(); // fast NaN checking by default
1642
1643 // Read Files.
1644 cout << "Reading Files ... " << endl;
1645 cPar.ReadFiles();
1646 if (cPar.error == true) {
1647 cout << "error! fail to read files. " << endl;
1648 return;
1649 }
1650 cPar.CheckData();
1651 if (cPar.error == true) {
1652 cout << "error! fail to check data. " << endl;
1653 return;
1654 }
1655
1656 // Prediction for bslmm
1657 if (cPar.a_mode == 41 || cPar.a_mode == 42) {
1658 gsl_vector *y_prdt;
1659
1660 y_prdt = gsl_vector_safe_alloc(cPar.ni_total - cPar.ni_test);
1661
1662 // set to zero
1663 gsl_vector_set_zero(y_prdt);
1664
1665 PRDT cPRDT;
1666 cPRDT.CopyFromParam(cPar);
1667
1668 // add breeding value if needed
1669 if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
1670 cout << "Adding Breeding Values ... " << endl;
1671
1672 gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
1673 gsl_vector *u_hat = gsl_vector_safe_alloc(cPar.ni_test);
1674
1675 // read kinship matrix and set u_hat
1676 vector<int> indicator_all;
1677 size_t c_bv = 0;
1678 for (size_t i = 0; i < cPar.indicator_idv.size(); i++) {
1679 indicator_all.push_back(1);
1680 if (cPar.indicator_bv[i] == 1) {
1681 gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]);
1682 c_bv++;
1683 }
1684 }
1685
1686 ReadFile_kin(cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode,
1687 cPar.error, G);
1688 if (cPar.error == true) {
1689 cout << "error! fail to read kinship/relatedness file. " << endl;
1690 return;
1691 }
1692
1693 // read u
1694 cPRDT.AddBV(G, u_hat, y_prdt);
1695
1696 gsl_matrix_safe_free(G);
1697 gsl_vector_safe_free(u_hat);
1698 }
1699
1700 // add beta
1701 if (!cPar.file_bfile.empty()) {
1702 cPRDT.AnalyzePlink(y_prdt);
1703 } else {
1704 cPRDT.AnalyzeBimbam(y_prdt);
1705 }
1706
1707 // add mu
1708 gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
1709
1710 // convert y to probability if needed
1711 if (cPar.a_mode == 42) {
1712 double d;
1713 for (size_t i = 0; i < y_prdt->size; i++) {
1714 d = gsl_vector_get(y_prdt, i);
1715 d = gsl_cdf_gaussian_P(d, 1.0);
1716 gsl_vector_set(y_prdt, i, d);
1717 }
1718 }
1719
1720 cPRDT.CopyToParam(cPar);
1721
1722 cPRDT.WriteFiles(y_prdt);
1723
1724 gsl_vector_safe_free(y_prdt);
1725 }
1726
1727 // Prediction with kinship matrix only; for one or more phenotypes
1728 if (cPar.a_mode == 43) {
1729 // first, use individuals with full phenotypes to obtain estimates of Vg and
1730 // Ve
1731 gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
1732 gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
1733 gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
1734 gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
1735 gsl_matrix *UtW = gsl_matrix_safe_alloc(Y->size1, W->size2);
1736 gsl_matrix *UtY = gsl_matrix_safe_alloc(Y->size1, Y->size2);
1737 gsl_vector *eval = gsl_vector_safe_alloc(Y->size1);
1738
1739 gsl_matrix *Y_full = gsl_matrix_safe_alloc(cPar.ni_cvt, cPar.n_ph);
1740 gsl_matrix *W_full = gsl_matrix_safe_alloc(Y_full->size1, cPar.n_cvt);
1741
1742 // set covariates matrix W and phenotype matrix Y
1743 // an intercept should be included in W,
1744 cPar.CopyCvtPhen(W, Y, 0);
1745 cPar.CopyCvtPhen(W_full, Y_full, 1);
1746
1747 gsl_matrix *Y_hat = gsl_matrix_safe_alloc(Y_full->size1, cPar.n_ph);
1748 gsl_matrix *G_full = gsl_matrix_safe_alloc(Y_full->size1, Y_full->size1);
1749 gsl_matrix *H_full = gsl_matrix_safe_alloc(Y_full->size1 * Y_hat->size2,
1750 Y_full->size1 * Y_hat->size2);
1751
1752 // read relatedness matrix G, and matrix G_full
1753 ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode,
1754 cPar.error, G);
1755 if (cPar.error == true) {
1756 cout << "error! fail to read kinship/relatedness file. " << endl;
1757 return;
1758 }
1759 // This is not so elegant. Reads twice to select on idv and then cvt
1760 ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
1761 cPar.error, G_full);
1762 if (cPar.error == true) {
1763 cout << "error! fail to read kinship/relatedness file. " << endl;
1764 return;
1765 }
1766
1767 // center matrix G
1768 CenterMatrix(G);
1769 CenterMatrix(G_full);
1770 validate_K(G);
1771
1772 // eigen-decomposition and calculate trace_G
1773 cout << "Start Eigen-Decomposition..." << endl;
1774 time_start = clock();
1775 cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
1776 // write(eval,"eval zeroed");
1777 cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1778
1779 // calculate UtW and Uty
1780 CalcUtX(U, W, UtW);
1781 CalcUtX(U, Y, UtY);
1782
1783 // calculate variance component and beta estimates
1784 // and then obtain predicted values
1785 if (cPar.n_ph == 1) {
1786 gsl_vector *beta = gsl_vector_safe_alloc(W->size2);
1787 gsl_vector *se_beta = gsl_vector_safe_alloc(W->size2);
1788
1789 double lambda, logl, vg, ve;
1790 gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
1791
1792 // obtain estimates
1793 CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
1794 cPar.n_region, lambda, logl);
1795 CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, lambda, vg, ve, beta,
1796 se_beta);
1797
1798 cout << "REMLE estimate for vg in the null model = " << vg << endl;
1799 cout << "REMLE estimate for ve in the null model = " << ve << endl;
1800 cPar.vg_remle_null = vg;
1801 cPar.ve_remle_null = ve;
1802
1803 // obtain Y_hat from fixed effects
1804 gsl_vector_view Yhat_col = gsl_matrix_column(Y_hat, 0);
1805 gsl_blas_dgemv(CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
1806
1807 // obtain H
1808 gsl_matrix_set_identity(H_full);
1809 gsl_matrix_scale(H_full, ve);
1810 gsl_matrix_scale(G_full, vg);
1811 gsl_matrix_add(H_full, G_full);
1812
1813 // free matrices
1814 gsl_vector_safe_free(beta);
1815 gsl_vector_safe_free(se_beta);
1816 } else {
1817 gsl_matrix *Vg = gsl_matrix_safe_alloc(cPar.n_ph, cPar.n_ph);
1818 gsl_matrix *Ve = gsl_matrix_safe_alloc(cPar.n_ph, cPar.n_ph);
1819 gsl_matrix *B = gsl_matrix_safe_alloc(cPar.n_ph, W->size2);
1820 gsl_matrix *se_B = gsl_matrix_safe_alloc(cPar.n_ph, W->size2);
1821
1822 // obtain estimates
1823 CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter,
1824 cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max,
1825 cPar.n_region, Vg, Ve, B, se_B);
1826
1827 cout << "REMLE estimate for Vg in the null model: " << endl;
1828 for (size_t i = 0; i < Vg->size1; i++) {
1829 for (size_t j = 0; j <= i; j++) {
1830 cout << tab(j) << gsl_matrix_get(Vg, i, j);
1831 }
1832 cout << endl;
1833 }
1834 cout << "REMLE estimate for Ve in the null model: " << endl;
1835 for (size_t i = 0; i < Ve->size1; i++) {
1836 for (size_t j = 0; j <= i; j++) {
1837 cout << tab(j) << gsl_matrix_get(Ve, i, j);
1838 }
1839 cout << endl;
1840 }
1841 cPar.Vg_remle_null.clear();
1842 cPar.Ve_remle_null.clear();
1843 for (size_t i = 0; i < Vg->size1; i++) {
1844 for (size_t j = i; j < Vg->size2; j++) {
1845 cPar.Vg_remle_null.push_back(gsl_matrix_get(Vg, i, j));
1846 cPar.Ve_remle_null.push_back(gsl_matrix_get(Ve, i, j));
1847 }
1848 }
1849
1850 // obtain Y_hat from fixed effects
1851 gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
1852
1853 // obtain H
1854 KroneckerSym(G_full, Vg, H_full);
1855 for (size_t i = 0; i < G_full->size1; i++) {
1856 gsl_matrix_view H_sub = gsl_matrix_submatrix(
1857 H_full, i * Ve->size1, i * Ve->size2, Ve->size1, Ve->size2);
1858 gsl_matrix_add(&H_sub.matrix, Ve);
1859 }
1860
1861 // free matrices
1862 gsl_matrix_safe_free(Vg);
1863 gsl_matrix_safe_free(Ve);
1864 gsl_matrix_safe_free(B);
1865 gsl_matrix_safe_free(se_B);
1866 }
1867
1868 PRDT cPRDT;
1869
1870 cPRDT.CopyFromParam(cPar);
1871
1872 cout << "Predicting Missing Phentypes ... " << endl;
1873 time_start = clock();
1874 cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
1875 cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1876
1877 cPRDT.WriteFiles(Y_full);
1878
1879 gsl_matrix_safe_free(Y);
1880 gsl_matrix_safe_free(W);
1881 gsl_matrix_safe_free(G);
1882 gsl_matrix_safe_free(U);
1883 gsl_matrix_safe_free(UtW);
1884 gsl_matrix_safe_free(UtY);
1885 gsl_vector_safe_free(eval);
1886
1887 gsl_matrix_safe_free(Y_full);
1888 gsl_matrix_safe_free(Y_hat);
1889 gsl_matrix_safe_free(W_full);
1890 gsl_matrix_safe_free(G_full);
1891 gsl_matrix_safe_free(H_full);
1892 }
1893
1894 // Generate Kinship matrix (optionally using LOCO)
1895 if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2) {
1896 cout << "Calculating Relatedness Matrix ... " << endl;
1897
1898 gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
1899 enforce_msg(G, "allocate G"); // just to be sure
1900
1901 time_start = clock();
1902
1903 cPar.CalcKin(G);
1904
1905 cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1906 if (cPar.error == true) {
1907 cout << "error! fail to calculate relatedness matrix. " << endl;
1908 return;
1909 }
1910
1911 // Now we have the Kinship matrix test it
1912 validate_K(G);
1913
1914 if (cPar.a_mode == M_KIN) {
1915 cPar.WriteMatrix(G, "cXX");
1916 } else { // M_KIN2
1917 cPar.WriteMatrix(G, "sXX");
1918 }
1919
1920 gsl_matrix_safe_free(G);
1921 }
1922
1923 // Compute the LDSC weights (not implemented yet)
1924 if (cPar.a_mode == 72) {
1925 cout << "Calculating Weights ... " << endl;
1926
1927 VARCOV cVarcov;
1928 cVarcov.CopyFromParam(cPar);
1929
1930 if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
1931
1932 if (!cPar.file_bfile.empty()) {
1933 cVarcov.AnalyzePlink();
1934 } else {
1935 cVarcov.AnalyzeBimbam();
1936 }
1937
1938 cVarcov.CopyToParam(cPar);
1939 }
1940
1941 // Compute the S matrix (and its variance), that is used for
1942 // variance component estimation using summary statistics.
1943 if (cPar.a_mode == 25 || cPar.a_mode == 26) {
1944 cout << "Calculating the S Matrix ... " << endl;
1945
1946 gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc * 2, cPar.n_vc);
1947 gsl_vector *ns = gsl_vector_safe_alloc(cPar.n_vc + 1);
1948 gsl_matrix_set_zero(S);
1949 gsl_vector_set_zero(ns);
1950
1951 gsl_matrix_view S_mat = gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
1952 gsl_matrix_view Svar_mat =
1953 gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
1954 gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc);
1955
1956 gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
1957 gsl_matrix *A = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
1958 gsl_matrix_set_zero(K);
1959 gsl_matrix_set_zero(A);
1960
1961 gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
1962 gsl_matrix *W = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_cvt);
1963
1964 cPar.CopyCvtPhen(W, y, 0);
1965
1966 set<string> setSnps_beta;
1967 map<string, double> mapRS2wA, mapRS2wK;
1968
1969 cPar.ObtainWeight(setSnps_beta, mapRS2wK);
1970
1971 time_start = clock();
1972 cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
1973 &ns_vec.vector);
1974 cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
1975 if (cPar.error == true) {
1976 cout << "error! fail to calculate the S matrix. " << endl;
1977 return;
1978 }
1979
1980 gsl_vector_set(ns, cPar.n_vc, cPar.ni_test);
1981
1982 cPar.WriteMatrix(S, "S");
1983 cPar.WriteVector(ns, "size");
1984 cPar.WriteVar("snps");
1985
1986 gsl_matrix_safe_free(S);
1987 gsl_vector_safe_free(ns);
1988
1989 gsl_matrix_safe_free(A);
1990 gsl_matrix_safe_free(K);
1991
1992 gsl_vector_safe_free(y);
1993 gsl_matrix_safe_free(K);
1994 }
1995
1996 // Compute the q vector, that is used for variance component estimation using
1997 // summary statistics
1998 if (cPar.a_mode == 27 || cPar.a_mode == 28) {
1999 gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2000 gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
2001 gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
2002 gsl_vector_set_zero(q);
2003 gsl_vector_set_zero(s);
2004
2005 gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
2006
2007 vector<size_t> vec_cat, vec_ni;
2008 vector<double> vec_weight, vec_z2;
2009 map<string, double> mapRS2weight;
2010 mapRS2weight.clear();
2011
2012 time_start = clock();
2013 ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni,
2014 vec_weight, vec_z2, cPar.ni_total, cPar.ns_total,
2015 cPar.ns_test);
2016 cout << "## number of total individuals = " << cPar.ni_total << endl;
2017 cout << "## number of total SNPs/var = " << cPar.ns_total << endl;
2018 cout << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
2019 cout << "## number of variance components = " << cPar.n_vc << endl;
2020 cout << "Calculating the q vector ... " << endl;
2021 Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
2022 &s_vec.vector);
2023 cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2024
2025 if (cPar.error == true) {
2026 cout << "error! fail to calculate the q vector. " << endl;
2027 return;
2028 }
2029
2030 gsl_vector_set(s, cPar.n_vc, cPar.ni_total);
2031
2032 cPar.WriteMatrix(Vq, "Vq");
2033 cPar.WriteVector(q, "q");
2034 cPar.WriteVector(s, "size");
2035 gsl_matrix_safe_free(Vq);
2036 gsl_vector_safe_free(q);
2037 gsl_vector_safe_free(s);
2038 }
2039
2040 // Calculate SNP covariance.
2041 if (cPar.a_mode == 71) {
2042 VARCOV cVarcov;
2043 cVarcov.CopyFromParam(cPar);
2044
2045 if (is_check_mode()) disable_segfpe(); // fast NaN checking for now
2046
2047 if (!cPar.file_bfile.empty()) {
2048 cVarcov.AnalyzePlink();
2049 } else {
2050 cVarcov.AnalyzeBimbam();
2051 }
2052
2053 cVarcov.CopyToParam(cPar);
2054 }
2055
2056 // LM.
2057 if (cPar.a_mode == 51 || cPar.a_mode == 52 || cPar.a_mode == 53 ||
2058 cPar.a_mode == 54) { // Fit LM
2059 gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
2060 gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
2061
2062 // set covariates matrix W and phenotype matrix Y
2063 // an intercept should be included in W,
2064 cPar.CopyCvtPhen(W, Y, 0);
2065
2066 // Fit LM or mvLM
2067 if (cPar.n_ph == 1) {
2068 LM cLm;
2069 cLm.CopyFromParam(cPar);
2070
2071 gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
2072
2073 // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
2074
2075 if (!cPar.file_gene.empty()) {
2076 cLm.AnalyzeGene(W,
2077 &Y_col.vector); // y is the predictor, not the phenotype
2078 } else if (!cPar.file_bfile.empty()) {
2079 cLm.AnalyzePlink(W, &Y_col.vector);
2080 } else {
2081 cLm.AnalyzeBimbam(W, &Y_col.vector);
2082 }
2083
2084 cLm.WriteFiles();
2085 cLm.CopyToParam(cPar);
2086 }
2087 // release all matrices and vectors
2088 gsl_matrix_safe_free(Y);
2089 gsl_matrix_safe_free(W);
2090 }
2091
2092 // VC estimation with one or multiple kinship matrices
2093 // REML approach only
2094 // if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already,
2095 // in param.cpp
2096 // for one phenotype only;
2097 if (cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) {
2098 if (!cPar.file_beta.empty()) {
2099 // need to obtain a common set of SNPs between beta file and the genotype
2100 // file; these are saved in mapRS2wA and mapRS2wK
2101 // normalize the weight in mapRS2wK to have an average of one; each
2102 // element of mapRS2wA is 1
2103 // update indicator_snps, so that the numbers are in accordance with
2104 // mapRS2wK
2105 set<string> setSnps_beta;
2106 ReadFile_snps_header(cPar.file_beta, setSnps_beta);
2107
2108 map<string, double> mapRS2wA, mapRS2wK;
2109 cPar.ObtainWeight(setSnps_beta, mapRS2wK);
2110
2111 cPar.UpdateSNP(mapRS2wK);
2112
2113 // Setup matrices and vectors.
2114 gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc * 2, cPar.n_vc);
2115 gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2116 gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
2117 gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
2118
2119 gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
2120 gsl_matrix *A = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
2121
2122 gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
2123 gsl_matrix *W = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_cvt);
2124
2125 gsl_matrix_set_zero(K);
2126 gsl_matrix_set_zero(A);
2127
2128 gsl_matrix_set_zero(S);
2129 gsl_matrix_set_zero(Vq);
2130 gsl_vector_set_zero(q);
2131 gsl_vector_set_zero(s);
2132
2133 cPar.CopyCvtPhen(W, y, 0);
2134
2135 gsl_matrix_view S_mat =
2136 gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
2137 gsl_matrix_view Svar_mat =
2138 gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
2139 gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
2140
2141 vector<size_t> vec_cat, vec_ni;
2142 vector<double> vec_weight, vec_z2;
2143
2144 // read beta, based on the mapRS2wK
2145 ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni,
2146 vec_weight, vec_z2, cPar.ni_study, cPar.ns_study,
2147 cPar.ns_test);
2148
2149 cout << "Study Panel: " << endl;
2150 cout << "## number of total individuals = " << cPar.ni_study << endl;
2151 cout << "## number of total SNPs/var = " << cPar.ns_study << endl;
2152 cout << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
2153 cout << "## number of variance components = " << cPar.n_vc << endl;
2154
2155 // compute q
2156 Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
2157 &s_vec.vector);
2158
2159 // compute S
2160 time_start = clock();
2161 cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
2162 &s_vec.vector);
2163 cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2164 if (cPar.error == true) {
2165 cout << "error! fail to calculate the S matrix. " << endl;
2166 return;
2167 }
2168
2169 // compute vc estimates
2170 CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
2171 cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
2172 cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
2173 cPar.v_enrich, cPar.v_se_enrich);
2174
2175 assert(!has_nan(cPar.v_se_pve));
2176
2177 // if LDSC weights, then compute the weights and run the above steps again
2178 if (cPar.a_mode == 62) {
2179 // compute the weights and normalize the weights for A
2180 cPar.UpdateWeight(1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
2181
2182 // read beta file again, and update weigths vector
2183 ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni,
2184 vec_weight, vec_z2, cPar.ni_study, cPar.ns_total,
2185 cPar.ns_test);
2186
2187 // compute q
2188 Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
2189 &s_vec.vector);
2190
2191 // compute S
2192 time_start = clock();
2193 cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
2194 &s_vec.vector);
2195 cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2196 if (cPar.error == true) {
2197 cout << "error! fail to calculate the S matrix. " << endl;
2198 return;
2199 }
2200
2201 // compute vc estimates
2202 CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
2203 cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
2204 cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
2205 cPar.v_enrich, cPar.v_se_enrich);
2206 assert(!has_nan(cPar.v_se_pve));
2207 }
2208
2209
2210 gsl_vector_set(s, cPar.n_vc, cPar.ni_test);
2211
2212 cPar.WriteMatrix(S, "S");
2213 cPar.WriteMatrix(Vq, "Vq");
2214 cPar.WriteVector(q, "q");
2215 cPar.WriteVector(s, "size");
2216
2217 gsl_matrix_safe_free(S);
2218 gsl_matrix_safe_free(Vq);
2219 gsl_vector_safe_free(q);
2220 gsl_vector_safe_free(s);
2221
2222 gsl_matrix_safe_free(A);
2223 gsl_matrix_safe_free(K);
2224 gsl_vector_safe_free(y);
2225 gsl_matrix_safe_free(W);
2226 } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
2227 if (!cPar.file_study.empty()) {
2228 string sfile = cPar.file_study + ".size.txt";
2229 CountFileLines(sfile, cPar.n_vc);
2230 } else {
2231 string file_name;
2232 igzstream infile(cPar.file_mstudy.c_str(), igzstream::in);
2233 if (!infile) {
2234 cout << "error! fail to open mstudy file: " << cPar.file_study
2235 << endl;
2236 return;
2237 }
2238
2239 safeGetline(infile, file_name);
2240
2241 infile.clear();
2242 infile.close();
2243
2244 string sfile = file_name + ".size.txt";
2245 CountFileLines(sfile, cPar.n_vc);
2246 }
2247
2248 cPar.n_vc = cPar.n_vc - 1;
2249
2250 gsl_matrix *S = gsl_matrix_safe_alloc(2 * cPar.n_vc, cPar.n_vc);
2251 gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2252 // gsl_matrix *V=gsl_matrix_safe_alloc (cPar.n_vc+1,
2253 // (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
2254 // gsl_matrix *Vslope=gsl_matrix_safe_alloc (n_lines+1,
2255 // (n_lines*(n_lines+1))/2*(n_lines+1) );
2256 gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
2257 gsl_vector *s_study = gsl_vector_safe_alloc(cPar.n_vc);
2258 gsl_vector *s_ref = gsl_vector_safe_alloc(cPar.n_vc);
2259 gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
2260
2261 gsl_matrix_set_zero(S);
2262 gsl_matrix_view S_mat =
2263 gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
2264 gsl_matrix_view Svar_mat =
2265 gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
2266
2267 gsl_matrix_set_zero(Vq);
2268 // gsl_matrix_set_zero(V);
2269 // gsl_matrix_set_zero(Vslope);
2270 gsl_vector_set_zero(q);
2271 gsl_vector_set_zero(s_study);
2272 gsl_vector_set_zero(s_ref);
2273
2274 if (!cPar.file_study.empty()) {
2275 ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
2276 } else {
2277 ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
2278 }
2279
2280 if (!cPar.file_ref.empty()) {
2281 ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
2282 cPar.ni_ref);
2283 } else {
2284 ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
2285 cPar.ni_ref);
2286 }
2287
2288 cout << "## number of variance components = " << cPar.n_vc << endl;
2289 cout << "## number of individuals in the sample = " << cPar.ni_study
2290 << endl;
2291 cout << "## number of individuals in the reference = " << cPar.ni_ref
2292 << endl;
2293
2294 CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study,
2295 cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total,
2296 cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich,
2297 cPar.v_se_enrich);
2298 assert(!has_nan(cPar.v_se_pve));
2299
2300 gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
2301 gsl_vector_safe_memcpy(&s_sub.vector, s_ref);
2302 gsl_vector_set(s, cPar.n_vc, cPar.ni_ref);
2303
2304 cPar.WriteMatrix(S, "S");
2305 cPar.WriteMatrix(Vq, "Vq");
2306 cPar.WriteVector(q, "q");
2307 cPar.WriteVector(s, "size");
2308
2309 gsl_matrix_safe_free(S);
2310 gsl_matrix_safe_free(Vq);
2311 // gsl_matrix_safe_free (V);
2312 // gsl_matrix_safe_free (Vslope);
2313 gsl_vector_safe_free(q);
2314 gsl_vector_safe_free(s_study);
2315 gsl_vector_safe_free(s_ref);
2316 gsl_vector_safe_free(s);
2317 } else {
2318 gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
2319 gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
2320 gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1 * cPar.n_vc);
2321
2322 // set covariates matrix W and phenotype matrix Y
2323 // an intercept should be included in W,
2324 cPar.CopyCvtPhen(W, Y, 0);
2325
2326 // read kinship matrices
2327 if (!(cPar.file_mk).empty()) {
2328 ReadFile_mk(cPar.file_mk, cPar.indicator_idv, cPar.mapID2num,
2329 cPar.k_mode, cPar.error, G);
2330 if (cPar.error == true) {
2331 cout << "error! fail to read kinship/relatedness file. " << endl;
2332 return;
2333 }
2334
2335 // center matrix G, and obtain v_traceG
2336 double d = 0;
2337 (cPar.v_traceG).clear();
2338 for (size_t i = 0; i < cPar.n_vc; i++) {
2339 gsl_matrix_view G_sub =
2340 gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1);
2341 CenterMatrix(&G_sub.matrix);
2342 d = 0;
2343 for (size_t j = 0; j < G->size1; j++) {
2344 d += gsl_matrix_get(&G_sub.matrix, j, j);
2345 }
2346 d /= (double)G->size1;
2347 (cPar.v_traceG).push_back(d);
2348 }
2349 } else if (!(cPar.file_kin).empty()) {
2350 ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
2351 cPar.k_mode, cPar.error, G);
2352 if (cPar.error == true) {
2353 cout << "error! fail to read kinship/relatedness file. " << endl;
2354 return;
2355 }
2356
2357 // center matrix G
2358 CenterMatrix(G);
2359 validate_K(G);
2360
2361 (cPar.v_traceG).clear();
2362 double d = 0;
2363 for (size_t j = 0; j < G->size1; j++) {
2364 d += gsl_matrix_get(G, j, j);
2365 }
2366 d /= (double)G->size1;
2367 (cPar.v_traceG).push_back(d);
2368 }
2369 // fit multiple variance components
2370 if (cPar.n_ph == 1) {
2371 // if (cPar.n_vc==1) {
2372 // } else {
2373 gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
2374 VC cVc;
2375 cVc.CopyFromParam(cPar);
2376 if (cPar.a_mode == 61) {
2377 cVc.CalcVChe(G, W, &Y_col.vector);
2378 } else if (cPar.a_mode == 62) {
2379 cVc.CalcVCreml(cPar.noconstrain, G, W, &Y_col.vector);
2380 } else {
2381 cVc.CalcVCacl(G, W, &Y_col.vector);
2382 }
2383 cVc.CopyToParam(cPar);
2384 // obtain pve from sigma2
2385 // obtain se_pve from se_sigma2
2386
2387 //}
2388 }
2389 }
2390 }
2391
2392 // compute confidence intervals with additional summary statistics
2393 // we do not check the sign of z-scores here, but they have to be matched with
2394 // the genotypes
2395 if (cPar.a_mode == 66 || cPar.a_mode == 67) {
2396 // read reference file first
2397 gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2398 gsl_matrix *Svar = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
2399 gsl_vector *s_ref = gsl_vector_safe_alloc(cPar.n_vc);
2400
2401 gsl_matrix_set_zero(S);
2402 gsl_matrix_set_zero(Svar);
2403 gsl_vector_set_zero(s_ref);
2404
2405 if (!cPar.file_ref.empty()) {
2406 ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
2407 } else {
2408 ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
2409 }
2410
2411 // need to obtain a common set of SNPs between beta file and the genotype
2412 // file; these are saved in mapRS2wA and mapRS2wK
2413 // normalize the weight in mapRS2wK to have an average of one; each element
2414 // of mapRS2wA is 1
2415 set<string> setSnps_beta;
2416 ReadFile_snps_header(cPar.file_beta, setSnps_beta);
2417
2418 // obtain the weights for wA, which contains the SNP weights for SNPs used
2419 // in the model
2420 map<string, double> mapRS2wK;
2421 cPar.ObtainWeight(setSnps_beta, mapRS2wK);
2422
2423 // set up matrices and vector
2424 gsl_matrix *Xz = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc);
2425 gsl_matrix *XWz = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc);
2426 gsl_matrix *XtXWz =
2427 gsl_matrix_safe_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
2428 gsl_vector *w = gsl_vector_safe_alloc(mapRS2wK.size());
2429 gsl_vector *w1 = gsl_vector_safe_alloc(mapRS2wK.size());
2430 gsl_vector *z = gsl_vector_safe_alloc(mapRS2wK.size());
2431 gsl_vector *s_vec = gsl_vector_safe_alloc(cPar.n_vc);
2432
2433 vector<size_t> vec_cat, vec_size;
2434 vector<double> vec_z;
2435
2436 map<string, double> mapRS2z, mapRS2wA;
2437 map<string, string> mapRS2A1;
2438 string file_str;
2439
2440 // update s_vec, the number of snps in each category
2441 for (size_t i = 0; i < cPar.n_vc; i++) {
2442 vec_size.push_back(0);
2443 }
2444
2445 for (map<string, double>::const_iterator it = mapRS2wK.begin();
2446 it != mapRS2wK.end(); ++it) {
2447 vec_size[cPar.mapRS2cat[it->first]]++;
2448 }
2449
2450 for (size_t i = 0; i < cPar.n_vc; i++) {
2451 gsl_vector_set(s_vec, i, vec_size[i]);
2452 }
2453
2454 // update mapRS2wA using v_pve and s_vec
2455 if (cPar.a_mode == 66) {
2456 for (map<string, double>::const_iterator it = mapRS2wK.begin();
2457 it != mapRS2wK.end(); ++it) {
2458 mapRS2wA[it->first] = 1;
2459 }
2460 } else {
2461 cPar.UpdateWeight(0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
2462 }
2463
2464 // read in z-scores based on allele 0, and save that into a vector
2465 ReadFile_beta(cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
2466
2467 // update snp indicator, save weights to w, save z-scores to vec_z, save
2468 // category label to vec_cat
2469 // sign of z is determined by matching alleles
2470 cPar.UpdateSNPnZ(mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
2471
2472 // compute an n by k matrix of X_iWz
2473 cout << "Calculating Xz ... " << endl;
2474
2475 gsl_matrix_set_zero(Xz);
2476 gsl_vector_set_all(w1, 1);
2477
2478 if (!cPar.file_bfile.empty()) {
2479 file_str = cPar.file_bfile + ".bed";
2480 PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
2481 vec_cat, w1, z, 0, Xz);
2482 } else if (!cPar.file_geno.empty()) {
2483 BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
2484 cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
2485 } else if (!cPar.file_mbfile.empty()) {
2486 MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
2487 cPar.mindicator_snp, vec_cat, w1, z, Xz);
2488 } else if (!cPar.file_mgeno.empty()) {
2489 MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
2490 cPar.mindicator_snp, vec_cat, w1, z, Xz);
2491 }
2492 if (cPar.a_mode == 66) {
2493 gsl_matrix_safe_memcpy(XWz, Xz);
2494 } else if (cPar.a_mode == 67) {
2495 cout << "Calculating XWz ... " << endl;
2496
2497 gsl_matrix_set_zero(XWz);
2498
2499 if (!cPar.file_bfile.empty()) {
2500 file_str = cPar.file_bfile + ".bed";
2501 PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
2502 vec_cat, w, z, 0, XWz);
2503 } else if (!cPar.file_geno.empty()) {
2504 BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
2505 cPar.indicator_snp, vec_cat, w, z, 0, XWz);
2506 } else if (!cPar.file_mbfile.empty()) {
2507 MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
2508 cPar.mindicator_snp, vec_cat, w, z, XWz);
2509 } else if (!cPar.file_mgeno.empty()) {
2510 MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
2511 cPar.mindicator_snp, vec_cat, w, z, XWz);
2512 }
2513 }
2514 // compute an p by k matrix of X_j^TWX_iWz
2515 cout << "Calculating XtXWz ... " << endl;
2516 gsl_matrix_set_zero(XtXWz);
2517
2518 if (!cPar.file_bfile.empty()) {
2519 file_str = cPar.file_bfile + ".bed";
2520 PlinkXtXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
2521 XWz, 0, XtXWz);
2522 } else if (!cPar.file_geno.empty()) {
2523 BimbamXtXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
2524 cPar.indicator_snp, XWz, 0, XtXWz);
2525 } else if (!cPar.file_mbfile.empty()) {
2526 MFILEXtXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
2527 cPar.mindicator_snp, XWz, XtXWz);
2528 } else if (!cPar.file_mgeno.empty()) {
2529 MFILEXtXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
2530 cPar.mindicator_snp, XWz, XtXWz);
2531 }
2532 // compute confidence intervals
2533 CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve,
2534 cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2,
2535 cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
2536 assert(!has_nan(cPar.v_se_pve));
2537
2538 gsl_matrix_safe_free(S);
2539 gsl_matrix_safe_free(Svar);
2540 gsl_vector_safe_free(s_ref);
2541
2542 gsl_matrix_safe_free(Xz);
2543 gsl_matrix_safe_free(XWz);
2544 gsl_matrix_safe_free(XtXWz);
2545 gsl_vector_safe_free(w);
2546 gsl_vector_safe_free(w1);
2547 gsl_vector_safe_free(z);
2548 gsl_vector_safe_free(s_vec);
2549 }
2550
2551 // LMM or mvLMM or Eigen-Decomposition
2552 if (cPar.a_mode == M_LMM1 || cPar.a_mode == M_LMM2 || cPar.a_mode == M_LMM3 ||
2553 cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 ||
2554 cPar.a_mode == M_EIGEN) { // Fit LMM or mvLMM or eigen
2555 gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
2556 enforce_msg(Y, "allocate Y"); // just to be sure
2557 gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
2558 gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c
2559 // matrix
2560 gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2);
2561 gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
2562 gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
2563 gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2);
2564 gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2);
2565 gsl_vector *eval = gsl_vector_calloc(Y->size1);
2566 gsl_vector *env = gsl_vector_safe_alloc(Y->size1);
2567 gsl_vector *weight = gsl_vector_safe_alloc(Y->size1);
2568 assert_issue(is_issue(26), UtY->data[0] == 0.0);
2569
2570 // set covariates matrix W and phenotype matrix Y
2571 // an intercept should be included in W,
2572 cPar.CopyCvtPhen(W, Y, 0);
2573 if (!cPar.file_gxe.empty()) {
2574 cPar.CopyGxe(env);
2575 }
2576
2577 // read relatedness matrix G
2578 if (!(cPar.file_kin).empty()) {
2579 ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
2580 cPar.k_mode, cPar.error, G);
2581 if (cPar.error == true) {
2582 cout << "error! fail to read kinship/relatedness file. " << endl;
2583 return;
2584 }
2585
2586 // center matrix G
2587 CenterMatrix(G);
2588 validate_K(G);
2589
2590 // is residual weights are provided, then
2591 if (!cPar.file_weight.empty()) {
2592 cPar.CopyWeight(weight);
2593 double d, wi, wj;
2594 for (size_t i = 0; i < G->size1; i++) {
2595 wi = gsl_vector_get(weight, i);
2596 for (size_t j = i; j < G->size2; j++) {
2597 wj = gsl_vector_get(weight, j);
2598 d = gsl_matrix_get(G, i, j);
2599 if (wi <= 0 || wj <= 0) {
2600 d = 0;
2601 } else {
2602 d /= safe_sqrt(wi * wj);
2603 }
2604 gsl_matrix_set(G, i, j, d);
2605 if (j != i) {
2606 gsl_matrix_set(G, j, i, d);
2607 }
2608 }
2609 }
2610 }
2611
2612 // eigen-decomposition and calculate trace_G
2613 cout << "Start Eigen-Decomposition..." << endl;
2614 time_start = clock();
2615
2616 if (cPar.a_mode == M_EIGEN) {
2617 cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 1);
2618 } else {
2619 cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
2620 }
2621 // write(eval,"eval");
2622
2623 if (!cPar.file_weight.empty()) {
2624 double wi;
2625 for (size_t i = 0; i < U->size1; i++) {
2626 wi = gsl_vector_get(weight, i);
2627 if (wi <= 0) {
2628 wi = 0;
2629 } else {
2630 wi = safe_sqrt(wi);
2631 }
2632 gsl_vector_view Urow = gsl_matrix_row(U, i);
2633 gsl_vector_scale(&Urow.vector, wi);
2634 }
2635 }
2636
2637 cPar.time_eigen =
2638 (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2639 } else {
2640 ReadFile_eigenU(cPar.file_ku, cPar.error, U);
2641 if (cPar.error == true) {
2642 cout << "error! fail to read the U file. " << endl;
2643 return;
2644 }
2645
2646 ReadFile_eigenD(cPar.file_kd, cPar.error, eval);
2647 if (cPar.error == true) {
2648 cout << "error! fail to read the D file. " << endl;
2649 return;
2650 }
2651
2652 cPar.trace_G = 0.0;
2653 for (size_t i = 0; i < eval->size; i++) {
2654 if (gsl_vector_get(eval, i) < 1e-10) {
2655 gsl_vector_set(eval, i, 0);
2656 }
2657 cPar.trace_G += gsl_vector_get(eval, i);
2658 }
2659 cPar.trace_G /= (double)eval->size;
2660 }
2661 // write(eval,"eval2");
2662
2663 if (cPar.a_mode == M_EIGEN) {
2664 cPar.WriteMatrix(U, "eigenU");
2665 cPar.WriteVector(eval, "eigenD");
2666 } else if (!cPar.file_gene.empty()) {
2667 // calculate UtW and Uty
2668 CalcUtX(U, W, UtW);
2669 CalcUtX(U, Y, UtY);
2670
2671 assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
2672
2673 LMM cLmm;
2674 cLmm.CopyFromParam(cPar);
2675
2676 gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
2677 gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
2678
2679 cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W,
2680 &Y_col.vector); // y is the predictor, not the phenotype
2681
2682 cLmm.WriteFiles();
2683 cLmm.CopyToParam(cPar);
2684 } else {
2685 // calculate UtW and Uty
2686 CalcUtX(U, W, UtW);
2687 CalcUtX(U, Y, UtY);
2688 assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
2689
2690 // calculate REMLE/MLE estimate and pve for univariate model
2691 if (cPar.n_ph == 1) { // one phenotype
2692 gsl_vector_view beta = gsl_matrix_row(B, 0);
2693 gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
2694 gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
2695
2696 assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
2697
2698 CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
2699 cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
2700 assert(!isnan(UtY->data[0]));
2701
2702 CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null,
2703 cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector,
2704 &se_beta.vector);
2705
2706 assert(!isnan(UtY->data[0]));
2707
2708 cPar.beta_mle_null.clear();
2709 cPar.se_beta_mle_null.clear();
2710 assert(!isnan(B->data[0]));
2711 assert(!isnan(se_B->data[0]));
2712 for (size_t i = 0; i < B->size2; i++) {
2713 cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i));
2714 cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i));
2715 }
2716 assert(!isnan(UtY->data[0]));
2717 assert(!isnan(cPar.beta_mle_null.front()));
2718 assert(!isnan(cPar.se_beta_mle_null.front()));
2719
2720 // the following functions do not modify eval
2721 CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
2722 cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
2723 CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_remle_null,
2724 cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector,
2725 &se_beta.vector);
2726
2727 cPar.beta_remle_null.clear();
2728 cPar.se_beta_remle_null.clear();
2729 assert(!isnan(B->data[0]));
2730 assert(!isnan(se_B->data[0]));
2731
2732 for (size_t i = 0; i < B->size2; i++) {
2733 cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i));
2734 cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i));
2735 }
2736
2737 CalcPve(eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
2738 cPar.pve_null, cPar.pve_se_null);
2739 cPar.PrintSummary();
2740
2741 // calculate and output residuals
2742 if (cPar.a_mode == M_LMM5) {
2743 gsl_vector *Utu_hat = gsl_vector_safe_alloc(Y->size1);
2744 gsl_vector *Ute_hat = gsl_vector_safe_alloc(Y->size1);
2745 gsl_vector *u_hat = gsl_vector_safe_alloc(Y->size1);
2746 gsl_vector *e_hat = gsl_vector_safe_alloc(Y->size1);
2747 gsl_vector *y_hat = gsl_vector_safe_alloc(Y->size1);
2748
2749 // obtain Utu and Ute
2750 gsl_vector_safe_memcpy(y_hat, &UtY_col.vector);
2751 gsl_blas_dgemv(CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
2752
2753 double d, u, e;
2754 for (size_t i = 0; i < eval->size; i++) {
2755 d = gsl_vector_get(eval, i);
2756 u = cPar.l_remle_null * d / (cPar.l_remle_null * d + 1.0) *
2757 gsl_vector_get(y_hat, i);
2758 e = 1.0 / (cPar.l_remle_null * d + 1.0) * gsl_vector_get(y_hat, i);
2759 gsl_vector_set(Utu_hat, i, u);
2760 gsl_vector_set(Ute_hat, i, e);
2761 }
2762
2763 // obtain u and e
2764 gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
2765 gsl_blas_dgemv(CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
2766
2767 // output residuals
2768 cPar.WriteVector(u_hat, "residU");
2769 cPar.WriteVector(e_hat, "residE");
2770
2771 gsl_vector_safe_free(u_hat);
2772 gsl_vector_safe_free(e_hat);
2773 gsl_vector_safe_free(y_hat);
2774 }
2775 }
2776
2777 // Fit LMM or mvLMM (w. LOCO)
2778 if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
2779 cPar.a_mode == 4) {
2780 if (cPar.n_ph == 1) {
2781 LMM cLmm;
2782 cLmm.CopyFromParam(cPar);
2783
2784 // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
2785
2786 gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
2787 gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
2788
2789 if (!cPar.file_bfile.empty()) {
2790 // PLINK analysis
2791 if (cPar.file_gxe.empty()) {
2792 cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
2793 &Y_col.vector, cPar.setGWASnps);
2794 }
2795 else {
2796 cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
2797 &Y_col.vector, env);
2798 }
2799 }
2800 else {
2801 // BIMBAM analysis
2802
2803 if (cPar.file_gxe.empty()) {
2804 cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
2805 &Y_col.vector, cPar.setGWASnps);
2806 } else {
2807 cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
2808 &Y_col.vector, env);
2809 }
2810 }
2811 cLmm.WriteFiles();
2812 cLmm.CopyToParam(cPar);
2813 } else {
2814 MVLMM cMvlmm;
2815 cMvlmm.CopyFromParam(cPar);
2816
2817 // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking
2818
2819 // write(eval,"eval3");
2820
2821 if (!cPar.file_bfile.empty()) {
2822 if (cPar.file_gxe.empty()) {
2823 cMvlmm.AnalyzePlink(U, eval, UtW, UtY);
2824 } else {
2825 cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env);
2826 }
2827 } else {
2828 if (cPar.file_gxe.empty()) {
2829 cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY);
2830 } else {
2831 cMvlmm.AnalyzeBimbamGXE(U, eval, UtW, UtY, env);
2832 }
2833 }
2834
2835 cMvlmm.WriteFiles();
2836 cMvlmm.CopyToParam(cPar);
2837 }
2838 }
2839 }
2840
2841 // release all matrices and vectors
2842 gsl_matrix_safe_free(Y);
2843 gsl_matrix_safe_free(W);
2844 gsl_matrix_warn_free(B); // sometimes unused
2845 gsl_matrix_warn_free(se_B);
2846 gsl_matrix_warn_free(G);
2847 gsl_matrix_safe_free(U);
2848 gsl_matrix_safe_free(UtW);
2849 gsl_matrix_safe_free(UtY);
2850 gsl_vector_safe_free(eval);
2851 gsl_vector_free(env); // sometimes unused
2852 }
2853
2854 // BSLMM
2855 if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
2856 gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
2857 gsl_matrix *W = gsl_matrix_safe_alloc(y->size, cPar.n_cvt);
2858 gsl_matrix *G = gsl_matrix_safe_alloc(y->size, y->size);
2859 gsl_matrix *UtX = gsl_matrix_safe_alloc(y->size, cPar.ns_test);
2860
2861 // set covariates matrix W and phenotype vector y
2862 // an intercept should be included in W,
2863 cPar.CopyCvtPhen(W, y, 0);
2864
2865 // center y, even for case/control data
2866 cPar.pheno_mean = CenterVector(y);
2867
2868 // run bvsr if rho==1
2869 if (cPar.rho_min == 1 && cPar.rho_max == 1) {
2870 // read genotypes X (not UtX)
2871 cPar.ReadGenotypes(UtX, G, false);
2872
2873 // perform BSLMM analysis
2874 BSLMM cBslmm;
2875 cBslmm.CopyFromParam(cPar);
2876 time_start = clock();
2877 cBslmm.MCMC(UtX, y);
2878 cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2879 cBslmm.CopyToParam(cPar);
2880 // else, if rho!=1
2881 } else {
2882 gsl_matrix *U = gsl_matrix_safe_alloc(y->size, y->size);
2883 gsl_vector *eval = gsl_vector_safe_alloc(y->size);
2884 gsl_matrix *UtW = gsl_matrix_safe_alloc(y->size, W->size2);
2885 gsl_vector *Uty = gsl_vector_safe_alloc(y->size);
2886
2887 // read relatedness matrix G
2888 if (!(cPar.file_kin).empty()) {
2889 cPar.ReadGenotypes(UtX, G, false);
2890
2891 // read relatedness matrix G
2892 ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
2893 cPar.k_mode, cPar.error, G);
2894 if (cPar.error == true) {
2895 cout << "error! fail to read kinship/relatedness file. " << endl;
2896 return;
2897 }
2898
2899 // center matrix G
2900 CenterMatrix(G);
2901 validate_K(G);
2902 } else {
2903 cPar.ReadGenotypes(UtX, G, true);
2904 }
2905
2906 // eigen-decomposition and calculate trace_G
2907 cout << "Start Eigen-Decomposition..." << endl;
2908 time_start = clock();
2909
2910 cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
2911
2912 cPar.time_eigen =
2913 (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2914
2915 // calculate UtW and Uty
2916 CalcUtX(U, W, UtW);
2917 CalcUtX(U, y, Uty);
2918
2919 // calculate REMLE/MLE estimate and pve
2920 CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
2921 cPar.l_mle_null, cPar.logl_mle_H0);
2922 CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
2923 cPar.l_remle_null, cPar.logl_remle_H0);
2924 CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
2925 cPar.pve_se_null);
2926
2927 cPar.PrintSummary();
2928
2929 // Creat and calcualte UtX, use a large memory
2930 cout << "Calculating UtX..." << endl;
2931 time_start = clock();
2932 CalcUtX(U, UtX);
2933 cPar.time_UtX = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2934
2935 // perform BSLMM or BSLMMDAP analysis
2936 if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
2937 BSLMM cBslmm;
2938 cBslmm.CopyFromParam(cPar);
2939 time_start = clock();
2940 if (cPar.a_mode == 12) { // ridge regression
2941 cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
2942 } else { // Run MCMC
2943 cBslmm.MCMC(U, UtX, Uty, eval, y);
2944 }
2945 cPar.time_opt =
2946 (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2947 cBslmm.CopyToParam(cPar);
2948 } else {
2949 }
2950
2951 // release all matrices and vectors
2952 gsl_matrix_safe_free(G);
2953 gsl_matrix_safe_free(U);
2954 gsl_matrix_safe_free(UtW);
2955 gsl_vector_safe_free(eval);
2956 gsl_vector_safe_free(Uty);
2957 }
2958 gsl_matrix_safe_free(W);
2959 gsl_vector_safe_free(y);
2960 gsl_matrix_safe_free(UtX);
2961 }
2962
2963 // BSLMM-DAP
2964 if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) {
2965 if (cPar.a_mode == 14) {
2966 gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
2967 gsl_matrix *W = gsl_matrix_safe_alloc(y->size, cPar.n_cvt);
2968 gsl_matrix *G = gsl_matrix_safe_alloc(y->size, y->size);
2969 gsl_matrix *UtX = gsl_matrix_safe_alloc(y->size, cPar.ns_test);
2970
2971 // set covariates matrix W and phenotype vector y
2972 // an intercept should be included in W,
2973 cPar.CopyCvtPhen(W, y, 0);
2974
2975 // center y, even for case/control data
2976 cPar.pheno_mean = CenterVector(y);
2977
2978 // run bvsr if rho==1
2979 if (cPar.rho_min == 1 && cPar.rho_max == 1) {
2980 // read genotypes X (not UtX)
2981 cPar.ReadGenotypes(UtX, G, false);
2982
2983 // perform BSLMM analysis
2984 BSLMM cBslmm;
2985 cBslmm.CopyFromParam(cPar);
2986 time_start = clock();
2987 cBslmm.MCMC(UtX, y);
2988 cPar.time_opt =
2989 (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
2990 cBslmm.CopyToParam(cPar);
2991 // else, if rho!=1
2992 } else {
2993 gsl_matrix *U = gsl_matrix_safe_alloc(y->size, y->size);
2994 gsl_vector *eval = gsl_vector_safe_alloc(y->size);
2995 gsl_matrix *UtW = gsl_matrix_safe_alloc(y->size, W->size2);
2996 gsl_vector *Uty = gsl_vector_safe_alloc(y->size);
2997
2998 // read relatedness matrix G
2999 if (!(cPar.file_kin).empty()) {
3000 cPar.ReadGenotypes(UtX, G, false);
3001
3002 // read relatedness matrix G
3003 ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
3004 cPar.k_mode, cPar.error, G);
3005 if (cPar.error == true) {
3006 cout << "error! fail to read kinship/relatedness file. " << endl;
3007 return;
3008 }
3009
3010 // center matrix G
3011 CenterMatrix(G);
3012 validate_K(G);
3013
3014 } else {
3015 cPar.ReadGenotypes(UtX, G, true);
3016 }
3017
3018 // eigen-decomposition and calculate trace_G
3019 cout << "Start Eigen-Decomposition..." << endl;
3020 time_start = clock();
3021 cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
3022 cPar.time_eigen =
3023 (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3024
3025 // calculate UtW and Uty
3026 CalcUtX(U, W, UtW);
3027 CalcUtX(U, y, Uty);
3028
3029 // calculate REMLE/MLE estimate and pve
3030 CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
3031 cPar.l_mle_null, cPar.logl_mle_H0);
3032 CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
3033 cPar.l_remle_null, cPar.logl_remle_H0);
3034 CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
3035 cPar.pve_se_null);
3036
3037 cPar.PrintSummary();
3038
3039 // Creat and calcualte UtX, use a large memory
3040 cout << "Calculating UtX..." << endl;
3041 time_start = clock();
3042 CalcUtX(U, UtX);
3043 cPar.time_UtX =
3044 (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3045
3046 // perform analysis; assume X and y are already centered
3047 BSLMMDAP cBslmmDap;
3048 cBslmmDap.CopyFromParam(cPar);
3049 time_start = clock();
3050 cBslmmDap.DAP_CalcBF(U, UtX, Uty, eval, y);
3051 cPar.time_opt =
3052 (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3053 cBslmmDap.CopyToParam(cPar);
3054
3055 // release all matrices and vectors
3056 gsl_matrix_safe_free(G);
3057 gsl_matrix_safe_free(U);
3058 gsl_matrix_safe_free(UtW);
3059 gsl_vector_safe_free(eval);
3060 gsl_vector_safe_free(Uty);
3061 }
3062
3063 gsl_matrix_safe_free(W);
3064 gsl_vector_safe_free(y);
3065 gsl_matrix_safe_free(UtX);
3066 } else if (cPar.a_mode == 15) {
3067 // perform EM algorithm and estimate parameters
3068 vector<string> vec_rs;
3069 vector<double> vec_sa2, vec_sb2, wab;
3070 vector<vector<vector<double>>> BF;
3071
3072 // read hyp and bf files (functions defined in BSLMMDAP)
3073 ReadFile_hyb(cPar.file_hyp, vec_sa2, vec_sb2, wab);
3074 ReadFile_bf(cPar.file_bf, vec_rs, BF);
3075
3076 cPar.ns_test = vec_rs.size();
3077 if (wab.size() != BF[0][0].size()) {
3078 cout << "error! hyp and bf files dimension do not match" << endl;
3079 }
3080
3081 // load annotations
3082 gsl_matrix *Ac = NULL;
3083 gsl_matrix_int *Ad = NULL;
3084 gsl_vector_int *dlevel = NULL;
3085 size_t kc, kd;
3086 if (!cPar.file_cat.empty()) {
3087 ReadFile_cat(cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
3088 } else {
3089 kc = 0;
3090 kd = 0;
3091 }
3092
3093 cout << "## number of blocks = " << BF.size() << endl;
3094 cout << "## number of analyzed SNPs/var = " << vec_rs.size() << endl;
3095 cout << "## grid size for hyperparameters = " << wab.size() << endl;
3096 cout << "## number of continuous annotations = " << kc << endl;
3097 cout << "## number of discrete annotations = " << kd << endl;
3098
3099 // DAP_EstimateHyper (const size_t kc, const size_t kd, const
3100 // vector<string> &vec_rs, const vector<double> &vec_sa2, const
3101 // vector<double> &vec_sb2, const vector<double> &wab, const
3102 // vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int
3103 // *Ad, gsl_vector_int *dlevel);
3104
3105 // perform analysis
3106 BSLMMDAP cBslmmDap;
3107 cBslmmDap.CopyFromParam(cPar);
3108 time_start = clock();
3109 cBslmmDap.DAP_EstimateHyper(kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac,
3110 Ad, dlevel);
3111 cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
3112 cBslmmDap.CopyToParam(cPar);
3113
3114 gsl_matrix_safe_free(Ac);
3115 gsl_matrix_int_free(Ad);
3116 gsl_vector_int_free(dlevel);
3117 } else {
3118 //
3119 }
3120 }
3121
3122 cPar.time_total = (clock() - time_begin) / (double(CLOCKS_PER_SEC) * 60.0);
3123
3124 return;
3125 }
3126
3127 // #include "Eigen/Dense"
3128
WriteLog(int argc,char ** argv,PARAM & cPar)3129 void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
3130 string file_str;
3131 file_str = cPar.path_out + "/" + cPar.file_out;
3132 file_str += ".log.txt";
3133
3134 ofstream outfile(file_str.c_str(), ofstream::out);
3135 if (!outfile) {
3136 cout << "error writing log file: " << file_str.c_str() << endl;
3137 return;
3138 }
3139
3140 outfile << "##" << endl;
3141 outfile << "## GEMMA Version = " << version << " (" << date << ")" << endl;
3142 outfile << "## Build profile = " << GEMMA_PROFILE << endl ;
3143 #ifdef __GNUC__
3144 outfile << "## GCC version = " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__ << endl;
3145 #endif
3146 outfile << "## GSL Version = " << GSL_VERSION << endl;
3147 // outfile << "## Eigen Version = " << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl;
3148 #ifdef OPENBLAS
3149
3150 #ifndef OPENBLAS_LEGACY
3151 //outfile << "## OpenBlas =" << OPENBLAS_VERSION << " - " << openblas_get_config() << endl;
3152 outfile << "## arch = " << openblas_get_corename() << endl;
3153 outfile << "## threads = " << openblas_get_num_threads() << endl;
3154 #else
3155 outfile << "## OpenBlas = " << openblas_get_config() << endl;
3156 #endif
3157 string* pStr = new string[4] { "sequential", "threaded", "openmp" };
3158 outfile << "## parallel type = " << pStr[openblas_get_parallel()] << endl;
3159 #endif
3160
3161 outfile << "##" << endl;
3162 outfile << "## Command Line Input = ";
3163 for (int i = 0; i < argc; i++) {
3164 outfile << argv[i] << " ";
3165 }
3166 outfile << endl;
3167
3168 outfile << "##" << endl;
3169 time_t rawtime;
3170 time(&rawtime);
3171 tm *ptm = localtime(&rawtime);
3172
3173 outfile << "## Date = " << asctime(ptm);
3174
3175 outfile << "##" << endl;
3176 outfile << "## Summary Statistics:" << endl;
3177 if (!cPar.file_cor.empty() || !cPar.file_study.empty() ||
3178 !cPar.file_mstudy.empty()) {
3179 outfile << "## number of total individuals in the sample = "
3180 << cPar.ni_study << endl;
3181 outfile << "## number of total individuals in the reference = "
3182 << cPar.ni_ref << endl;
3183 outfile << "## number of variance components = " << cPar.n_vc << endl;
3184
3185 outfile << "## pve estimates = ";
3186 for (size_t i = 0; i < cPar.v_pve.size(); i++) {
3187 outfile << " " << cPar.v_pve[i];
3188 }
3189 outfile << endl;
3190
3191 outfile << "## se(pve) = ";
3192 for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
3193 outfile << " " << cPar.v_se_pve[i];
3194 }
3195 outfile << endl;
3196 assert(!has_nan(cPar.v_se_pve));
3197
3198 if (cPar.n_vc > 1) {
3199 outfile << "## total pve = " << cPar.pve_total << endl;
3200 outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
3201 }
3202
3203 outfile << "## sigma2 per snp = ";
3204 for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
3205 outfile << " " << cPar.v_sigma2[i];
3206 }
3207 outfile << endl;
3208
3209 outfile << "## se(sigma2 per snp) = ";
3210 for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
3211 outfile << " " << cPar.v_se_sigma2[i];
3212 }
3213 outfile << endl;
3214
3215 outfile << "## enrichment = ";
3216 for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
3217 outfile << " " << cPar.v_enrich[i];
3218 }
3219 outfile << endl;
3220
3221 outfile << "## se(enrichment) = ";
3222 for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
3223 outfile << " " << cPar.v_se_enrich[i];
3224 }
3225 outfile << endl;
3226 } else if (!cPar.file_beta.empty() &&
3227 (cPar.a_mode == 61 || cPar.a_mode == 62)) {
3228 outfile << "## number of total individuals in the sample = "
3229 << cPar.ni_study << endl;
3230 outfile << "## number of total individuals in the reference = "
3231 << cPar.ni_total << endl;
3232 outfile << "## number of total SNPs/var in the sample = " << cPar.ns_study
3233 << endl;
3234 outfile << "## number of total SNPs/var in the reference panel = "
3235 << cPar.ns_total << endl;
3236 outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
3237 outfile << "## number of variance components = " << cPar.n_vc << endl;
3238 } else if (!cPar.file_beta.empty() &&
3239 (cPar.a_mode == 66 || cPar.a_mode == 67)) {
3240 outfile << "## number of total individuals in the sample = "
3241 << cPar.ni_total << endl;
3242 outfile << "## number of total individuals in the reference = "
3243 << cPar.ni_ref << endl;
3244 outfile << "## number of total SNPs/var in the sample = " << cPar.ns_total
3245 << endl;
3246 outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
3247 outfile << "## number of variance components = " << cPar.n_vc << endl;
3248
3249 outfile << "## pve estimates = ";
3250 for (size_t i = 0; i < cPar.v_pve.size(); i++) {
3251 outfile << " " << cPar.v_pve[i];
3252 }
3253 outfile << endl;
3254
3255 outfile << "## se(pve) = ";
3256 for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
3257 outfile << " " << cPar.v_se_pve[i];
3258 }
3259 outfile << endl;
3260
3261 if (cPar.n_vc > 1) {
3262 outfile << "## total pve = " << cPar.pve_total << endl;
3263 outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
3264 }
3265
3266 outfile << "## sigma2 per snp = ";
3267 for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
3268 outfile << " " << cPar.v_sigma2[i];
3269 }
3270 outfile << endl;
3271
3272 outfile << "## se(sigma2 per snp) = ";
3273 for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
3274 outfile << " " << cPar.v_se_sigma2[i];
3275 }
3276 outfile << endl;
3277
3278 outfile << "## enrichment = ";
3279 for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
3280 outfile << " " << cPar.v_enrich[i];
3281 }
3282 outfile << endl;
3283
3284 outfile << "## se(enrichment) = ";
3285 for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
3286 outfile << " " << cPar.v_se_enrich[i];
3287 }
3288 outfile << endl;
3289 } else {
3290 outfile << "## number of total individuals = " << cPar.ni_total << endl;
3291
3292 if (cPar.a_mode == 43) {
3293 outfile << "## number of analyzed individuals = " << cPar.ni_cvt << endl;
3294 outfile << "## number of individuals with full phenotypes = "
3295 << cPar.ni_test << endl;
3296 } else if (cPar.a_mode != 27 && cPar.a_mode != 28) {
3297 outfile << "## number of analyzed individuals = " << cPar.ni_test << endl;
3298 }
3299
3300 outfile << "## number of covariates = " << cPar.n_cvt << endl;
3301 outfile << "## number of phenotypes = " << cPar.n_ph << endl;
3302 if (cPar.a_mode == 43) {
3303 outfile << "## number of observed data = " << cPar.np_obs << endl;
3304 outfile << "## number of missing data = " << cPar.np_miss << endl;
3305 }
3306 if (cPar.a_mode == 25 || cPar.a_mode == 26 || cPar.a_mode == 27 ||
3307 cPar.a_mode == 28 || cPar.a_mode == 61 || cPar.a_mode == 62 ||
3308 cPar.a_mode == 63 || cPar.a_mode == 66 || cPar.a_mode == 67) {
3309 outfile << "## number of variance components = " << cPar.n_vc << endl;
3310 }
3311
3312 if (!(cPar.file_gene).empty()) {
3313 outfile << "## number of total genes = " << cPar.ng_total << endl;
3314 outfile << "## number of analyzed genes = " << cPar.ng_test << endl;
3315 } else if (cPar.file_epm.empty()) {
3316 outfile << "## number of total SNPs/var = " << cPar.ns_total << endl;
3317 outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
3318 } else {
3319 outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
3320 }
3321
3322 if (cPar.a_mode == 13) {
3323 outfile << "## number of cases = " << cPar.ni_case << endl;
3324 outfile << "## number of controls = " << cPar.ni_control << endl;
3325 }
3326 }
3327
3328 if ((cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) &&
3329 cPar.file_cor.empty() && cPar.file_study.empty() &&
3330 cPar.file_mstudy.empty()) {
3331 // outfile<<"## REMLE log-likelihood in the null model =
3332 //"<<cPar.logl_remle_H0<<endl;
3333 if (cPar.n_ph == 1) {
3334 outfile << "## pve estimates = ";
3335 for (size_t i = 0; i < cPar.v_pve.size(); i++) {
3336 outfile << " " << cPar.v_pve[i];
3337 }
3338 outfile << endl;
3339
3340 outfile << "## se(pve) = ";
3341 for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
3342 outfile << " " << cPar.v_se_pve[i];
3343 }
3344 outfile << endl;
3345
3346 if (cPar.n_vc > 1) {
3347 outfile << "## total pve = " << cPar.pve_total << endl;
3348 outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
3349 }
3350
3351 outfile << "## sigma2 estimates = ";
3352 for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
3353 outfile << " " << cPar.v_sigma2[i];
3354 }
3355 outfile << endl;
3356
3357 outfile << "## se(sigma2) = ";
3358 for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
3359 outfile << " " << cPar.v_se_sigma2[i];
3360 }
3361 outfile << endl;
3362
3363 if (!cPar.file_beta.empty()) {
3364 outfile << "## enrichment = ";
3365 for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
3366 outfile << " " << cPar.v_enrich[i];
3367 }
3368 outfile << endl;
3369
3370 outfile << "## se(enrichment) = ";
3371 for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
3372 outfile << " " << cPar.v_se_enrich[i];
3373 }
3374 outfile << endl;
3375 }
3376 }
3377 }
3378
3379 if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
3380 cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 ||
3381 cPar.a_mode == 12 || cPar.a_mode == 13) {
3382 outfile << "## REMLE log-likelihood in the null model = "
3383 << cPar.logl_remle_H0 << endl;
3384 outfile << "## MLE log-likelihood in the null model = " << cPar.logl_mle_H0
3385 << endl;
3386 if (cPar.n_ph == 1) {
3387 // outfile<<"## lambda REMLE estimate in the null (linear mixed) model =
3388 // "<<cPar.l_remle_null<<endl;
3389 // outfile<<"## lambda MLE estimate in the null (linear mixed) model =
3390 // "<<cPar.l_mle_null<<endl;
3391 outfile << "## pve estimate in the null model = " << cPar.pve_null
3392 << endl;
3393 outfile << "## se(pve) in the null model = " << cPar.pve_se_null << endl;
3394 outfile << "## vg estimate in the null model = " << cPar.vg_remle_null
3395 << endl;
3396 outfile << "## ve estimate in the null model = " << cPar.ve_remle_null
3397 << endl;
3398 outfile << "## beta estimate in the null model = ";
3399 for (size_t i = 0; i < cPar.beta_remle_null.size(); i++) {
3400 outfile << " " << cPar.beta_remle_null[i];
3401 }
3402 outfile << endl;
3403 outfile << "## se(beta) = ";
3404 for (size_t i = 0; i < cPar.se_beta_remle_null.size(); i++) {
3405 outfile << " " << cPar.se_beta_remle_null[i];
3406 }
3407 outfile << endl;
3408
3409 } else {
3410 size_t c;
3411 outfile << "## REMLE estimate for Vg in the null model: " << endl;
3412 for (size_t i = 0; i < cPar.n_ph; i++) {
3413 for (size_t j = 0; j <= i; j++) {
3414 c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3415 min(i, j);
3416 outfile << tab(j) << cPar.Vg_remle_null[c];
3417 }
3418 outfile << endl;
3419 }
3420 outfile << "## se(Vg): " << endl;
3421 for (size_t i = 0; i < cPar.n_ph; i++) {
3422 for (size_t j = 0; j <= i; j++) {
3423 c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3424 min(i, j);
3425 outfile << tab(j) << safe_sqrt(cPar.VVg_remle_null[c]);
3426 }
3427 outfile << endl;
3428 }
3429 outfile << "## REMLE estimate for Ve in the null model: " << endl;
3430 for (size_t i = 0; i < cPar.n_ph; i++) {
3431 for (size_t j = 0; j <= i; j++) {
3432 c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3433 min(i, j);
3434 outfile << tab(j) << cPar.Ve_remle_null[c];
3435 }
3436 outfile << endl;
3437 }
3438 outfile << "## se(Ve): " << endl;
3439 for (size_t i = 0; i < cPar.n_ph; i++) {
3440 for (size_t j = 0; j <= i; j++) {
3441 c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3442 min(i, j);
3443 outfile << tab(j) << safe_sqrt(cPar.VVe_remle_null[c]);
3444 }
3445 outfile << endl;
3446 }
3447
3448 outfile << "## MLE estimate for Vg in the null model: " << endl;
3449 for (size_t i = 0; i < cPar.n_ph; i++) {
3450 for (size_t j = 0; j < cPar.n_ph; j++) {
3451 c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3452 min(i, j);
3453 outfile << tab(j) << cPar.Vg_mle_null[c];
3454 }
3455 outfile << endl;
3456 }
3457 outfile << "## se(Vg): " << endl;
3458 for (size_t i = 0; i < cPar.n_ph; i++) {
3459 for (size_t j = 0; j <= i; j++) {
3460 c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3461 min(i, j);
3462 outfile << tab(j) << safe_sqrt(cPar.VVg_mle_null[c]);
3463 }
3464 outfile << endl;
3465 }
3466 outfile << "## MLE estimate for Ve in the null model: " << endl;
3467 for (size_t i = 0; i < cPar.n_ph; i++) {
3468 for (size_t j = 0; j < cPar.n_ph; j++) {
3469 c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3470 min(i, j);
3471 outfile << tab(j) << cPar.Ve_mle_null[c];
3472 }
3473 outfile << endl;
3474 }
3475 outfile << "## se(Ve): " << endl;
3476 for (size_t i = 0; i < cPar.n_ph; i++) {
3477 for (size_t j = 0; j <= i; j++) {
3478 c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
3479 min(i, j);
3480 outfile << tab(j) << safe_sqrt(cPar.VVe_mle_null[c]);
3481 }
3482 outfile << endl;
3483 }
3484 outfile << "## estimate for B (d by c) in the null model (columns "
3485 "correspond to the covariates provided in the file): "
3486 << endl;
3487 for (size_t i = 0; i < cPar.n_ph; i++) {
3488 for (size_t j = 0; j < cPar.n_cvt; j++) {
3489 c = i * cPar.n_cvt + j;
3490 outfile << tab(j) << cPar.beta_remle_null[c];
3491 }
3492 outfile << endl;
3493 }
3494 outfile << "## se(B): " << endl;
3495 for (size_t i = 0; i < cPar.n_ph; i++) {
3496 for (size_t j = 0; j < cPar.n_cvt; j++) {
3497 c = i * cPar.n_cvt + j;
3498 outfile << tab(j) << cPar.se_beta_remle_null[c];
3499 }
3500 outfile << endl;
3501 }
3502 }
3503 }
3504
3505 if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13 ||
3506 cPar.a_mode == 14 || cPar.a_mode == 16) {
3507 outfile << "## estimated mean = " << cPar.pheno_mean << endl;
3508 }
3509
3510 if (cPar.a_mode == 11 || cPar.a_mode == 13) {
3511 outfile << "##" << endl;
3512 outfile << "## MCMC related:" << endl;
3513 outfile << "## initial value of h = " << cPar.cHyp_initial.h << endl;
3514 outfile << "## initial value of rho = " << cPar.cHyp_initial.rho << endl;
3515 outfile << "## initial value of pi = " << exp(cPar.cHyp_initial.logp)
3516 << endl;
3517 outfile << "## initial value of |gamma| = " << cPar.cHyp_initial.n_gamma
3518 << endl;
3519 outfile << "## random seed = " << cPar.randseed << endl;
3520 outfile << "## acceptance ratio = "
3521 << (double)cPar.n_accept /
3522 (double)((cPar.w_step + cPar.s_step) * cPar.n_mh)
3523 << endl;
3524 }
3525
3526 outfile << "##" << endl;
3527 outfile << "## Computation Time:" << endl;
3528 outfile << "## total computation time = " << cPar.time_total << " min "
3529 << endl;
3530 outfile << "## computation time break down: " << endl;
3531 if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2 || cPar.a_mode == 11 ||
3532 cPar.a_mode == 13 || cPar.a_mode == 14 || cPar.a_mode == 16) {
3533 outfile << "## time on calculating relatedness matrix = "
3534 << cPar.time_G << " min " << endl;
3535 }
3536 if (cPar.a_mode == M_EIGEN) {
3537 outfile << "## time on eigen-decomposition = " << cPar.time_eigen
3538 << " min " << endl;
3539 }
3540 if (cPar.a_mode == M_LMM1 || cPar.a_mode == M_LMM2 || cPar.a_mode == M_LMM3 ||
3541 cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 || cPar.a_mode == 11 ||
3542 cPar.a_mode == 12 || cPar.a_mode == 13 || cPar.a_mode == 14 ||
3543 cPar.a_mode == 16) {
3544 outfile << "## time on eigen-decomposition = " << cPar.time_eigen
3545 << " min " << endl;
3546 outfile << "## time on calculating UtX = " << cPar.time_UtX << " min "
3547 << endl;
3548 }
3549 if ((cPar.a_mode >= 1 && cPar.a_mode <= 4) ||
3550 (cPar.a_mode >= 51 && cPar.a_mode <= 54)) {
3551 outfile << "## time on optimization = " << cPar.time_opt << " min "
3552 << endl;
3553 }
3554 if (cPar.a_mode == 11 || cPar.a_mode == 13) {
3555 outfile << "## time on proposal = " << cPar.time_Proposal << " min "
3556 << endl;
3557 outfile << "## time on mcmc = " << cPar.time_opt << " min " << endl;
3558 outfile << "## time on Omega = " << cPar.time_Omega << " min " << endl;
3559 }
3560 if (cPar.a_mode == 41 || cPar.a_mode == 42) {
3561 outfile << "## time on eigen-decomposition = " << cPar.time_eigen
3562 << " min " << endl;
3563 }
3564 if (cPar.a_mode == 43) {
3565 outfile << "## time on eigen-decomposition = " << cPar.time_eigen
3566 << " min " << endl;
3567 outfile << "## time on predicting phenotypes = " << cPar.time_opt
3568 << " min " << endl;
3569 }
3570 outfile << "##" << endl;
3571
3572 outfile.close();
3573 outfile.clear();
3574
3575 info_msg("Done");
3576 return;
3577 }
3578