1 /*
2    This file is part of the BOLT-LMM linear mixed model software package
3    developed by Po-Ru Loh.  Copyright (C) 2014-2019 Harvard University.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #ifndef BOLTPARAMS_HPP
20 #define BOLTPARAMS_HPP
21 
22 #include <vector>
23 #include <string>
24 #include <utility>
25 
26 #include <boost/program_options.hpp>
27 #include <boost/numeric/ublas/matrix.hpp>
28 
29 #include "DataMatrix.hpp"
30 
31 namespace LMM {
32 
33   class BoltParams {
34   public:
35 
36     static const double MIX_PARAM_ESTIMATE_FLAG; // flag for estimating mixture params using CV
37 
38     // main input files
39     std::string famFile;
40     std::vector <std::string> bimFiles, bedFiles; // possibly multiple files and/or range templates
41 
42     // optional reference map file for filling in genpos
43     std::string geneticMapFile;
44 
45     std::vector <std::string> removeFiles; // list(s) of indivs to remove
46     std::vector <std::string> excludeFiles; // list(s) of SNPs to exclude
47     std::vector <std::string> modelSnpsFiles; // list(s) of SNPs to use in model (i.e., GRM)
48 
49     // QC params
50     double maxMissingPerSnp, maxMissingPerIndiv;
51 
52     bool noMapCheck; // disable automatic check of genetic map scale
53     int maxModelSnps; // error-check to discourage use of too many snps (e.g., imputed)
54 
55     // for real phenotype input
56     std::string phenoFile;
57     std::vector <std::string> phenoCols;
58     bool phenoUseFam;
59 
60     // for real covariate input
61     std::string covarFile;
62     std::vector < std::pair <std::string, DataMatrix::ValueType> > covarCols;
63     int covarMaxLevels;
64     bool covarUseMissingIndic;
65 
66     // for analysis
67     bool reml; // flag to run variance components analysis (automatic if computing assoc stats)
68     bool lmmInf, lmmBayes, lmmBayesMCMC, lmmForceNonInf;
69     double h2gGuess;
70     int MCMCiters;
71     int numLeaveOutChunks;
72     int numCalibSnps;
73 
74     double pEst, varFrac2Est;
75     int CVfoldsSplit, CVfoldsCompute;
76     bool CVnoEarlyExit;
77 
78     int h2EstMCtrials, reEstMCtrials;
79     int remlMCtrials;
80     bool remlNoRefine;
81     std::vector < boost::numeric::ublas::matrix <double> > remlGuessVegs;
82     std::vector <std::string> remlGuessVCnames;
83     bool runUnivarRemls;
84     bool allowh2g01;
85 
86     // for avoiding proximal contamination
87     double genWindow; int physWindow;
88 
89     // for calibration of lmmBayes[MCMC]
90     std::string LDscoresFile, LDscoresCol, LDscoresChipCol;
91     bool LDscoresUseChip;
92     bool LDscoresMatchBp;
93 
94     // for stopping algorithm
95     int maxIters;
96     double CGtol, approxLLtol;
97 
98     int mBlockMultX, Nautosomes;
99 
100     int numThreads;
101 
102     // for final output
103     std::string statsFile;
104     bool verboseStats;
105     std::string predBetasFile; // for (Bayesian) MLMi prediction
106 
107     // for dosage-format imputed SNPs
108     std::vector <std::string> dosageFiles;
109     std::string dosageFidIidFile;
110     std::string statsFileDosageSnps;
111     bool noDosageIDcheck;
112     bool noDosage2IDcheck;
113     bool noImpute2IDcheck;
114     bool noBgenIDcheck;
115 
116     std::vector <std::string> dosage2MapFiles, dosage2GenoFiles;
117     std::string statsFileDosage2Snps;
118 
119     std::vector <std::string> impute2Files;
120     std::vector <int> impute2Chroms;
121     std::string impute2FidIidFile;
122     std::string statsFileImpute2Snps;
123     double impute2MinMAF;
124 
125     std::vector <std::string> bgenFiles;
126     std::vector <std::string> sampleFiles;
127     std::string statsFileBgenSnps;
128     std::vector <int> bgenLayouts; // 1 = v1.1, 2 = v1.2
129     double bgenMinMAF, bgenMinINFO;
130     bool domRecHetTest;
131 
132     // for output of simulated betas and chip LD Scores
133     std::string snpInfoFile;
134 
135     // for PhenoBuilder
136     uint seed;
137     std::string MAFhistFile;
138     int Mcausal, Mcandidate;
139     double stdPow;
140     int highH2ChromMax;
141     int midChromHalfBufferPhyspos;
142     std::string phenoStratFile;
143     std::vector <double> h2causal;
144     double h2candidate, h2strat;
145     double lambdaRegion, pRegion;
146     std::string phenoOutFile;
147     int effectDist;
148 
149     // populates members; error-checks
150     bool processCommandLineArgs(int argc, char *argv[]);
151   };
152 }
153 
154 #endif
155