1 /*
2    This file is part of the BOLT-LMM linear mixed model software package
3    developed by Po-Ru Loh.  Copyright (C) 2014-2019 Harvard University.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #ifndef BOLTPARESTCV_HPP
20 #define BOLTPARESTCV_HPP
21 
22 #include <vector>
23 #include <string>
24 #include <utility>
25 #include <boost/utility.hpp>
26 
27 #include "Bolt.hpp"
28 #include "DataMatrix.hpp"
29 #include "SnpData.hpp"
30 
31 namespace LMM {
32 
33   class BoltParEstCV : boost::noncopyable {
34 
35   private:
36     const SnpData &snpData;
37     const DataMatrix &covarDataT; // transposed covariate data matrix
38     const Bolt bolt; // analyses will be performed on indivs in bolt.getMaskIndivs()
39     const std::vector < std::pair <std::string, DataMatrix::ValueType> > covars;
40     const bool covarUseMissingIndic;
41 
42     struct ParamData {
43       double f2;
44       double p;
45       std::vector <double> PVEs, MSEs;
46       ParamData(double _f2, double _p);
47       bool operator < (const ParamData &paramData2) const;
48     };
49 
50   public:
51 
52     BoltParEstCV(const SnpData& _snpData, const DataMatrix& _covarDataT,
53 		 const double subMaskIndivs[],
54 		 const std::vector < std::pair <std::string, DataMatrix::ValueType> > &_covars,
55 		 int covarMaxLevels, bool missingIndicator, int mBlockMultX, int Nautosomes);
56 
57     /**
58      * (f2, p) parameter estimation via cross-validation
59      * - after each fold, compare PVEs of putative (f2, p) param pairs
60      * - eliminate clearly suboptimal param pairs from future folds
61      * - stop when only one param pair left
62      *
63      * return: iterations used in last CV fold
64      */
65     int estMixtureParams
66     (double *f2Est, double *pEst, double *predBoost, const std::vector <double> &pheno,
67      double logDeltaEst, double sigma2Kest, int CVfoldsSplit, int CVfoldsCompute,
68      bool CVnoEarlyExit, double predBoostMin, bool MCMC, int maxIters, double approxLLtol,
69      int mBlockMultX, int Nautosomes) const;
70 
71     // for use in PhenoBuilder to generate random phenotypes
72     const Bolt &getBoltRef(void) const;
73 
74   };
75 }
76 
77 #endif
78