1 /* 2 This file is part of the BOLT-LMM linear mixed model software package 3 developed by Po-Ru Loh. Copyright (C) 2014-2019 Harvard University. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #ifndef BOLTPARAMS_HPP 20 #define BOLTPARAMS_HPP 21 22 #include <vector> 23 #include <string> 24 #include <utility> 25 26 #include <boost/program_options.hpp> 27 #include <boost/numeric/ublas/matrix.hpp> 28 29 #include "DataMatrix.hpp" 30 31 namespace LMM { 32 33 class BoltParams { 34 public: 35 36 static const double MIX_PARAM_ESTIMATE_FLAG; // flag for estimating mixture params using CV 37 38 // main input files 39 std::string famFile; 40 std::vector <std::string> bimFiles, bedFiles; // possibly multiple files and/or range templates 41 42 // optional reference map file for filling in genpos 43 std::string geneticMapFile; 44 45 std::vector <std::string> removeFiles; // list(s) of indivs to remove 46 std::vector <std::string> excludeFiles; // list(s) of SNPs to exclude 47 std::vector <std::string> modelSnpsFiles; // list(s) of SNPs to use in model (i.e., GRM) 48 49 // QC params 50 double maxMissingPerSnp, maxMissingPerIndiv; 51 52 bool noMapCheck; // disable automatic check of genetic map scale 53 int maxModelSnps; // error-check to discourage use of too many snps (e.g., imputed) 54 55 // for real phenotype input 56 std::string phenoFile; 57 std::vector <std::string> phenoCols; 58 bool phenoUseFam; 59 60 // for real covariate input 61 std::string covarFile; 62 std::vector < std::pair <std::string, DataMatrix::ValueType> > covarCols; 63 int covarMaxLevels; 64 bool covarUseMissingIndic; 65 66 // for analysis 67 bool reml; // flag to run variance components analysis (automatic if computing assoc stats) 68 bool lmmInf, lmmBayes, lmmBayesMCMC, lmmForceNonInf; 69 double h2gGuess; 70 int MCMCiters; 71 int numLeaveOutChunks; 72 int numCalibSnps; 73 74 double pEst, varFrac2Est; 75 int CVfoldsSplit, CVfoldsCompute; 76 bool CVnoEarlyExit; 77 78 int h2EstMCtrials, reEstMCtrials; 79 int remlMCtrials; 80 bool remlNoRefine; 81 std::vector < boost::numeric::ublas::matrix <double> > remlGuessVegs; 82 std::vector <std::string> remlGuessVCnames; 83 bool runUnivarRemls; 84 bool allowh2g01; 85 86 // for avoiding proximal contamination 87 double genWindow; int physWindow; 88 89 // for calibration of lmmBayes[MCMC] 90 std::string LDscoresFile, LDscoresCol, LDscoresChipCol; 91 bool LDscoresUseChip; 92 bool LDscoresMatchBp; 93 94 // for stopping algorithm 95 int maxIters; 96 double CGtol, approxLLtol; 97 98 int mBlockMultX, Nautosomes; 99 100 int numThreads; 101 102 // for final output 103 std::string statsFile; 104 bool verboseStats; 105 std::string predBetasFile; // for (Bayesian) MLMi prediction 106 107 // for dosage-format imputed SNPs 108 std::vector <std::string> dosageFiles; 109 std::string dosageFidIidFile; 110 std::string statsFileDosageSnps; 111 bool noDosageIDcheck; 112 bool noDosage2IDcheck; 113 bool noImpute2IDcheck; 114 bool noBgenIDcheck; 115 116 std::vector <std::string> dosage2MapFiles, dosage2GenoFiles; 117 std::string statsFileDosage2Snps; 118 119 std::vector <std::string> impute2Files; 120 std::vector <int> impute2Chroms; 121 std::string impute2FidIidFile; 122 std::string statsFileImpute2Snps; 123 double impute2MinMAF; 124 125 std::vector <std::string> bgenFiles; 126 std::vector <std::string> sampleFiles; 127 std::string statsFileBgenSnps; 128 std::vector <int> bgenLayouts; // 1 = v1.1, 2 = v1.2 129 double bgenMinMAF, bgenMinINFO; 130 bool domRecHetTest; 131 132 // for output of simulated betas and chip LD Scores 133 std::string snpInfoFile; 134 135 // for PhenoBuilder 136 uint seed; 137 std::string MAFhistFile; 138 int Mcausal, Mcandidate; 139 double stdPow; 140 int highH2ChromMax; 141 int midChromHalfBufferPhyspos; 142 std::string phenoStratFile; 143 std::vector <double> h2causal; 144 double h2candidate, h2strat; 145 double lambdaRegion, pRegion; 146 std::string phenoOutFile; 147 int effectDist; 148 149 // populates members; error-checks 150 bool processCommandLineArgs(int argc, char *argv[]); 151 }; 152 } 153 154 #endif 155