1 /* 2 This file is part of the BOLT-LMM linear mixed model software package 3 developed by Po-Ru Loh. Copyright (C) 2014-2019 Harvard University. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #ifndef COVARIATEBASIS_HPP 20 #define COVARIATEBASIS_HPP 21 22 #include <vector> 23 #include <string> 24 #include <utility> 25 #include <boost/utility.hpp> 26 27 #include "DataMatrix.hpp" 28 29 namespace LMM { 30 31 class CovariateBasis : boost::noncopyable { 32 33 private: 34 uint64 C; // number of covariates (including all-1s) chosen from DataMatrix of covariates 35 uint64 Cindep; // number of independent covariates 36 uint64 Nstride; // minor dimension of maskedCovars and basis arrays (for data layout) 37 uint64 Nused; // popcnt(maskIndivs) 38 39 // beyond input _maskIndivs, samples with missing covariates are masked if !covarUseMissingIndic 40 double *maskIndivs; // [VECTOR]: 0-1 vector of length Nstride 41 double *basis; // [[MATRIX]]: C x Nstride; normalized to have vector norm 1 (unlike snps) 42 double *basisExtAllIndivs; // [[MATRIX]]: Cindep x Nstride; basis "extended" for OOS prediction 43 44 public: 45 static const double MIN_REL_SINGULAR_VALUE; 46 47 CovariateBasis(const DataMatrix &covarDataT, const double _maskIndivs[], 48 const std::vector < std::pair <std::string, DataMatrix::ValueType> > &covars, 49 int covarMaxLevels, bool covarUseMissingIndic); 50 51 /** 52 * input: 53 * - covarDataT is assumed to contain the all-1s vector as its first row (indexed 0) 54 * - _maskIndivs has dimension >= covarDataT.ncols = N (if > N, additional cols are masked) 55 * - assumed to be a subset of maskIndivs from original SnpData instance 56 * - (presumably obtained by using SnpData.writeMaskIndivs and taking a subset) 57 * action: 58 * - copies _maskIndivs into member maskIndivs (and optionally performs missing masking) 59 * - builds masked copy of selected covariates (from covarDataT) in maskedCovars[C x Nstride] 60 * - (possible later use: DGELS to get least squares coeffs wrt original covars) 61 * - mean-fills missing covariate values (using mean of non-masked, non-missing) 62 * - computes SVD; stores in basis[C x Nstride] and sets Cindep (warns if < C) 63 */ 64 void init(const DataMatrix &covarDataT, const double _maskIndivs[], 65 std::vector < std::pair <std::string, DataMatrix::ValueType> > covars, 66 int covarMaxLevels, bool covarUseMissingIndic); 67 68 ~CovariateBasis(); 69 70 //uint64 getC(void) const { return C; } -- don't expose this! 71 uint64 getCindep(void) const; 72 const double *getMaskIndivs(void) const; 73 uint64 getNused(void) const; 74 const double *getBasis(bool extAllIndivs) const; 75 76 // vec is assumed to have dimension Nstride 77 // don't assume memory alignment (no SSE) 78 void applyMaskIndivs(double vec[]) const; 79 80 /** 81 * Cindep values will be written to out[] 82 * vec[] has size Nstride 83 */ 84 void computeCindepComponents(double out[], const double vec[]) const; 85 86 /** 87 * vec[] has size Nstride 88 * don't assume memory alignment (no SSE) 89 */ 90 void projectCovars(double vec[]) const; 91 92 // debugging function 93 void printProj(const double vec[], const char name[]) const; 94 95 }; 96 } 97 98 #endif 99