1 // -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 8 -*-
2 //
3 // fastLm.cpp: Rcpp/Eigen example of a simple lm() alternative
4 //
5 // Copyright (C) 2011 - 2015  Douglas Bates, Dirk Eddelbuettel and Romain Francois
6 //
7 // This file is part of RcppEigen.
8 //
9 // RcppEigen is free software: you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License as published by
11 // the Free Software Foundation, either version 2 of the License, or
12 // (at your option) any later version.
13 //
14 // RcppEigen is distributed in the hope that it will be useful, but
15 // WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // in file.path(R.home("share"), "licenses").  If not, see
21 // <http://www.gnu.org/licenses/>.
22 
23 #include "fastLm.h"
24 #if !defined(EIGEN_USE_MKL) // don't use R Lapack.h if MKL is enabled
25 #include <R_ext/Lapack.h>
26 #endif
27 
28 namespace lmsol {
29     using Rcpp::_;
30     using Rcpp::as;
31     using Rcpp::CharacterVector;
32     using Rcpp::clone;
33     using Rcpp::List;
34     using Rcpp::NumericMatrix;
35     using Rcpp::NumericVector;
36     using Rcpp::RObject;
37     using Rcpp::wrap;
38 
39     using std::invalid_argument;
40     using std::numeric_limits;
41 
lm(const Map<MatrixXd> & X,const Map<VectorXd> & y)42     lm::lm(const Map<MatrixXd> &X, const Map<VectorXd> &y)
43 	: m_X(X),
44 	  m_y(y),
45 	  m_n(X.rows()),
46 	  m_p(X.cols()),
47 	  m_coef(VectorXd::Constant(m_p, ::NA_REAL)),
48 	  m_r(::NA_INTEGER),
49 	  m_fitted(m_n),
50 	  m_se(VectorXd::Constant(m_p, ::NA_REAL)),
51 	  m_usePrescribedThreshold(false) {
52     }
53 
setThreshold(const RealScalar & threshold)54     lm& lm::setThreshold(const RealScalar& threshold) {
55 	m_usePrescribedThreshold = true;
56 	m_prescribedThreshold = threshold;
57 	return *this;
58     }
59 
Dplus(const ArrayXd & d)60     inline ArrayXd lm::Dplus(const ArrayXd& d) {
61 	ArrayXd   di(d.size());
62 	double  comp(d.maxCoeff() * threshold());
63 	for (int j = 0; j < d.size(); ++j) di[j] = (d[j] < comp) ? 0. : 1./d[j];
64 	m_r          = (di != 0.).count();
65 	return di;
66     }
67 
XtX() const68     MatrixXd lm::XtX() const {
69 	return MatrixXd(m_p, m_p).setZero().selfadjointView<Lower>().
70 	    rankUpdate(m_X.adjoint());
71     }
72 
73     /** Returns the threshold that will be used by certain methods such as rank().
74      *
75      *  The default value comes from experimenting (see "LU precision
76      *  tuning" thread on the Eigen list) and turns out to be
77      *  identical to Higham's formula used already in LDLt.
78      *
79      *  @return The user-prescribed threshold or the default.
80      */
threshold() const81     RealScalar lm::threshold() const {
82 	return m_usePrescribedThreshold ? m_prescribedThreshold
83 	    : numeric_limits<double>::epsilon() * m_p;
84     }
85 
ColPivQR(const Map<MatrixXd> & X,const Map<VectorXd> & y)86     ColPivQR::ColPivQR(const Map<MatrixXd> &X, const Map<VectorXd> &y)
87 	: lm(X, y) {
88 	ColPivHouseholderQR<MatrixXd> PQR(X); // decompose the model matrix
89 	Permutation                  Pmat(PQR.colsPermutation());
90 	m_r                               = PQR.rank();
91 	if (m_r == m_p) {	// full rank case
92 	    m_coef     = PQR.solve(y);
93 	    m_fitted   = X * m_coef;
94 	    m_se       = Pmat * PQR.matrixQR().topRows(m_p).
95 		triangularView<Upper>().solve(I_p()).rowwise().norm();
96 	    return;
97 	}
98 	MatrixXd                     Rinv(PQR.matrixQR().topLeftCorner(m_r, m_r).
99 					  triangularView<Upper>().
100 					  solve(MatrixXd::Identity(m_r, m_r)));
101 	VectorXd                  effects(PQR.householderQ().adjoint() * y);
102 	m_coef.head(m_r)                  = Rinv * effects.head(m_r);
103 	m_coef                            = Pmat * m_coef;
104 				// create fitted values from effects
105 				// (can't use X*m_coef if X is rank-deficient)
106 	effects.tail(m_n - m_r).setZero();
107 	m_fitted                          = PQR.householderQ() * effects;
108 	m_se.head(m_r)                    = Rinv.rowwise().norm();
109 	m_se                              = Pmat * m_se;
110     }
111 
QR(const Map<MatrixXd> & X,const Map<VectorXd> & y)112     QR::QR(const Map<MatrixXd> &X, const Map<VectorXd> &y) : lm(X, y) {
113 	HouseholderQR<MatrixXd> QR(X);
114 	m_coef                     = QR.solve(y);
115 	m_fitted                   = X * m_coef;
116 	m_se                       = QR.matrixQR().topRows(m_p).
117 	    triangularView<Upper>().solve(I_p()).rowwise().norm();
118     }
119 
120 
Llt(const Map<MatrixXd> & X,const Map<VectorXd> & y)121     Llt::Llt(const Map<MatrixXd> &X, const Map<VectorXd> &y) : lm(X, y) {
122 	LLT<MatrixXd>  Ch(XtX().selfadjointView<Lower>());
123 	m_coef            = Ch.solve(X.adjoint() * y);
124 	m_fitted          = X * m_coef;
125 	m_se              = Ch.matrixL().solve(I_p()).colwise().norm();
126     }
127 
Ldlt(const Map<MatrixXd> & X,const Map<VectorXd> & y)128     Ldlt::Ldlt(const Map<MatrixXd> &X, const Map<VectorXd> &y) : lm(X, y) {
129 	LDLT<MatrixXd> Ch(XtX().selfadjointView<Lower>());
130 	Dplus(Ch.vectorD());	// to set the rank
131 //FIXME: Check on the permutation in the LDLT and incorporate it in
132 //the coefficients and the standard error computation.
133 //	m_coef            = Ch.matrixL().adjoint().
134 //	    solve(Dplus(D) * Ch.matrixL().solve(X.adjoint() * y));
135 	m_coef            = Ch.solve(X.adjoint() * y);
136 	m_fitted          = X * m_coef;
137 	m_se              = Ch.solve(I_p()).diagonal().array().sqrt();
138     }
139 
gesdd(MatrixXd & A,ArrayXd & S,MatrixXd & Vt)140     int gesdd(MatrixXd& A, ArrayXd& S, MatrixXd& Vt) {
141 	int info, mone = -1, m = A.rows(), n = A.cols();
142 	std::vector<int> iwork(8 * n);
143 	double wrk;
144 	if (m < n || S.size() != n || Vt.rows() != n || Vt.cols() != n)
145 	    throw std::invalid_argument("dimension mismatch in gesvd");
146 	F77_CALL(dgesdd)("O", &m, &n, A.data(), &m, S.data(), A.data(),
147 			 &m, Vt.data(), &n, &wrk, &mone, &iwork[0], &info);
148 	int lwork(wrk);
149 	std::vector<double> work(lwork);
150 	F77_CALL(dgesdd)("O", &m, &n, A.data(), &m, S.data(), A.data(),
151 			 &m, Vt.data(), &n, &work[0], &lwork, &iwork[0], &info);
152 	return info;
153     }
154 
GESDD(const Map<MatrixXd> & X,const Map<VectorXd> & y)155     GESDD::GESDD(const Map<MatrixXd>& X, const Map<VectorXd> &y) : lm(X, y) {
156 	MatrixXd   U(X), Vt(m_p, m_p);
157 	ArrayXd   S(m_p);
158 	if (gesdd(U, S, Vt)) throw std::runtime_error("error in gesdd");
159 	MatrixXd VDi(Vt.adjoint() * Dplus(S).matrix().asDiagonal());
160 	m_coef      = VDi * U.adjoint() * y;
161 	m_fitted    = X * m_coef;
162 	m_se        = VDi.rowwise().norm();
163     }
164 
SVD(const Map<MatrixXd> & X,const Map<VectorXd> & y)165     SVD::SVD(const Map<MatrixXd> &X, const Map<VectorXd> &y) : lm(X, y) {
166 	JacobiSVD<MatrixXd>  UDV(X.jacobiSvd(ComputeThinU|ComputeThinV));
167 	MatrixXd             VDi(UDV.matrixV() *
168 				 Dplus(UDV.singularValues().array()).matrix().asDiagonal());
169 	m_coef                   = VDi * UDV.matrixU().adjoint() * y;
170 	m_fitted                 = X * m_coef;
171 	m_se                     = VDi.rowwise().norm();
172     }
173 
SymmEigen(const Map<MatrixXd> & X,const Map<VectorXd> & y)174     SymmEigen::SymmEigen(const Map<MatrixXd> &X, const Map<VectorXd> &y)
175 	: lm(X, y) {
176 	SelfAdjointEigenSolver<MatrixXd> eig(XtX().selfadjointView<Lower>());
177 	MatrixXd   VDi(eig.eigenvectors() *
178 		       Dplus(eig.eigenvalues().array()).sqrt().matrix().asDiagonal());
179 	m_coef         = VDi * VDi.adjoint() * X.adjoint() * y;
180 	m_fitted       = X * m_coef;
181 	m_se           = VDi.rowwise().norm();
182     }
183 
184     enum {ColPivQR_t = 0, QR_t, LLT_t, LDLT_t, SVD_t, SymmEigen_t, GESDD_t};
185 
do_lm(const Map<MatrixXd> & X,const Map<VectorXd> & y,int type)186     static inline lm do_lm(const Map<MatrixXd> &X, const Map<VectorXd> &y, int type) {
187 	switch(type) {
188 	case ColPivQR_t:
189 	    return ColPivQR(X, y);
190 	case QR_t:
191 	    return QR(X, y);
192 	case LLT_t:
193 	    return Llt(X, y);
194 	case LDLT_t:
195 	    return Ldlt(X, y);
196 	case SVD_t:
197 	    return SVD(X, y);
198 	case SymmEigen_t:
199 	    return SymmEigen(X, y);
200 	case GESDD_t:
201 	    return GESDD(X, y);
202 	}
203 	throw invalid_argument("invalid type");
204 	return ColPivQR(X, y);	// -Wall
205     }
206 
fastLm(Rcpp::NumericMatrix Xs,Rcpp::NumericVector ys,int type)207     List fastLm(Rcpp::NumericMatrix Xs, Rcpp::NumericVector ys, int type) {
208         const Map<MatrixXd>  X(as<Map<MatrixXd> >(Xs));
209         const Map<VectorXd>  y(as<Map<VectorXd> >(ys));
210         Index                n = X.rows();
211         if ((Index)y.size() != n) throw invalid_argument("size mismatch");
212 
213 			    // Select and apply the least squares method
214         lm                 ans(do_lm(X, y, type));
215 
216 			    // Copy coefficients and install names, if any
217         NumericVector     coef(wrap(ans.coef()));
218 
219         List          dimnames(NumericMatrix(Xs).attr("dimnames"));
220         if (dimnames.size() > 1) {
221             RObject   colnames = dimnames[1];
222             if (!(colnames).isNULL())
223                 coef.attr("names") = clone(CharacterVector(colnames));
224         }
225 
226         VectorXd         resid = y - ans.fitted();
227         int               rank = ans.rank();
228         int                 df = (rank == ::NA_INTEGER) ? n - X.cols() : n - rank;
229         double               s = resid.norm() / std::sqrt(double(df));
230 			    // Create the standard errors
231         VectorXd            se = s * ans.se();
232 
233         return List::create(_["coefficients"]  = coef,
234                             _["se"]            = se,
235                             _["rank"]          = rank,
236                             _["df.residual"]   = df,
237                             _["residuals"]     = resid,
238                             _["s"]             = s,
239                             _["fitted.values"] = ans.fitted());
240     }
241 }
242 
243 // This defines the R-callable function 'fastLm'
244 // [[Rcpp::export]]
fastLm_Impl(Rcpp::NumericMatrix X,Rcpp::NumericVector y,int type)245 Rcpp::List fastLm_Impl(Rcpp::NumericMatrix X, Rcpp::NumericVector y, int type) {
246     return lmsol::fastLm(X, y, type);
247 }
248 
249