1 /* 2 This file is part of the BOLT-LMM linear mixed model software package 3 developed by Po-Ru Loh. Copyright (C) 2014-2019 Harvard University. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #ifndef PHENOBUILDER_HPP 20 #define PHENOBUILDER_HPP 21 22 #include <vector> 23 #include <string> 24 #include <set> 25 #include <boost/random.hpp> 26 #include <boost/random/mersenne_twister.hpp> 27 #include <boost/random/normal_distribution.hpp> 28 #include <boost/random/uniform_01.hpp> 29 30 #include "Bolt.hpp" 31 32 namespace LMM { 33 34 class PhenoBuilder { 35 36 public: 37 38 enum SnpRegionType { 39 CAUSAL_REGION, // chrom 1st-halves (pre-buffer): used in LD Score regression in simulations 40 CANDIDATE_SNP, // simulated candidate snp with fixed large effect sizes 41 BUFFER_REGION, // buffer in middle of chrom (not used for anything) 42 NULL_REGION_H2HI_CHROMS, // 2nd-halves (post-buffer) of hi-h2 chroms: used to check avg null 43 NULL_REGION_H2LO_CHROMS, // 2nd-halves (post-buffer) of lo-h2 chroms: used to check avg null 44 BAD_REGION // masked SNPs (not used for anything) 45 }; 46 enum EffectDistType { 47 GAUSSIAN, 48 LAPLACE 49 }; 50 51 private: 52 53 const Bolt &bolt; 54 uint64 Nstride; 55 EffectDistType effectDist; 56 57 boost::mt19937 rng; 58 boost::variate_generator<boost::mt19937&, boost::normal_distribution<> > randn; 59 boost::variate_generator<boost::mt19937&, boost::uniform_01<> > rand; 60 boost::variate_generator<boost::mt19937&, boost::exponential_distribution<> > exprnd; 61 62 // build masked, mean-centered, normalized genetic component of phenotype from input betas 63 std::vector <double> buildPhenoGenetic(const std::vector <double> &betas); 64 void maskCenterNormalize(std::vector <double> &pheno); 65 66 public: 67 68 static const int DEFAULT_MID_CHROM_HALF_BUFFER_PHYSPOS; 69 static const double DEFAULT_MID_CHROM_HALF_BUFFER_GENPOS; 70 71 // note: the Bolt instance should apply any additional masking needed (indivs, snps) 72 PhenoBuilder(const Bolt &_bolt, uint seed, EffectDistType _effectDist); 73 74 static bool isNullRegion(SnpRegionType snpRegion); 75 76 /** 77 * build masked, mean-centered, normalized genetic component of phenotype 78 * 79 * params: 80 * - pCausal: prob of a 1st-half chrom being causal, BEFORE further reduction from MAF matching 81 * - MAFhistFile: file containing allele freq spectrum to match 82 * - stdPow: MAF-dependence; 0 for equal per-normalized-genotype, 1 for equal per-allele 83 */ 84 std::vector <double> genPhenoCausal(std::vector <double> &simBetas, 85 std::vector <SnpRegionType> &simRegions, int vcNum, 86 int Mcausal, const std::string &MAFhistFile, double stdPow, 87 std::set <int> highH2Chroms, 88 int midChromHalfBufferPhyspos); 89 90 std::vector <double> genPhenoCausalRegions(std::vector <double> &simBetas, 91 std::vector <SnpRegionType> &simRegions, 92 double lambdaRegion, double pRegion); 93 /** 94 * build masked, mean-centered, normalized candidate snp genetic component of phenotype 95 * update simRegions accordingly to identify chosen candidate snps 96 */ 97 std::vector <double> genPhenoCandidate(std::vector <SnpRegionType> &simRegions, 98 int Mcandidate); 99 100 // build masked, mean-centered, normalized stratification component of phenotype 101 std::vector <double> genPhenoStrat(const std::string &phenoStratFile); 102 103 /** 104 * build masked, mean-centered, normalized relatedness component of phenotype 105 * - input matrix will be N x N 106 * - also input ibd thresh 107 * - mask (i,j) entries for i!=j, at least one of i, j in masked-out indivs 108 * - cholesky factor 109 * - create random pheno 110 * - mask, mean-center, normalize 111 // input file: IBS mat (unmasked: N x N for N indivs in fam file... need to augment if < Nstride) 112 // compute IBS mat with GCTA; only do this for FHS testing on relatedness 113 vector <double> genPhenoRelated(const string &IBSfile, double IBSthresh) { 114 vector <double> pheno(Nstride); 115 // todo (later) 116 maskCenterNormalize(pheno); return pheno; 117 } 118 */ 119 120 // build masked, mean-centered, normalized environmental (random) component of phenotype 121 std::vector <double> genPhenoEnv(void); 122 123 std::vector <double> combinePhenoComps 124 (const std::vector <double> &h2causal, double h2candidate, double h2strat, 125 const std::vector < std::vector <double> > &phenoCausal, 126 const std::vector <double> &phenoCandidate, const std::vector <double> &phenoStrat, 127 const std::vector <double> &phenoEnv); 128 }; 129 } 130 131 #endif 132