1 // -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 8 -*-
2 //
3 // fastLm.cpp: Rcpp/Eigen example of a simple lm() alternative
4 //
5 // Copyright (C) 2011 - 2015 Douglas Bates, Dirk Eddelbuettel and Romain Francois
6 //
7 // This file is part of RcppEigen.
8 //
9 // RcppEigen is free software: you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License as published by
11 // the Free Software Foundation, either version 2 of the License, or
12 // (at your option) any later version.
13 //
14 // RcppEigen is distributed in the hope that it will be useful, but
15 // WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // in file.path(R.home("share"), "licenses"). If not, see
21 // <http://www.gnu.org/licenses/>.
22
23 #include "fastLm.h"
24 #if !defined(EIGEN_USE_MKL) // don't use R Lapack.h if MKL is enabled
25 #include <R_ext/Lapack.h>
26 #endif
27
28 namespace lmsol {
29 using Rcpp::_;
30 using Rcpp::as;
31 using Rcpp::CharacterVector;
32 using Rcpp::clone;
33 using Rcpp::List;
34 using Rcpp::NumericMatrix;
35 using Rcpp::NumericVector;
36 using Rcpp::RObject;
37 using Rcpp::wrap;
38
39 using std::invalid_argument;
40 using std::numeric_limits;
41
lm(const Map<MatrixXd> & X,const Map<VectorXd> & y)42 lm::lm(const Map<MatrixXd> &X, const Map<VectorXd> &y)
43 : m_X(X),
44 m_y(y),
45 m_n(X.rows()),
46 m_p(X.cols()),
47 m_coef(VectorXd::Constant(m_p, ::NA_REAL)),
48 m_r(::NA_INTEGER),
49 m_fitted(m_n),
50 m_se(VectorXd::Constant(m_p, ::NA_REAL)),
51 m_usePrescribedThreshold(false) {
52 }
53
setThreshold(const RealScalar & threshold)54 lm& lm::setThreshold(const RealScalar& threshold) {
55 m_usePrescribedThreshold = true;
56 m_prescribedThreshold = threshold;
57 return *this;
58 }
59
Dplus(const ArrayXd & d)60 inline ArrayXd lm::Dplus(const ArrayXd& d) {
61 ArrayXd di(d.size());
62 double comp(d.maxCoeff() * threshold());
63 for (int j = 0; j < d.size(); ++j) di[j] = (d[j] < comp) ? 0. : 1./d[j];
64 m_r = (di != 0.).count();
65 return di;
66 }
67
XtX() const68 MatrixXd lm::XtX() const {
69 return MatrixXd(m_p, m_p).setZero().selfadjointView<Lower>().
70 rankUpdate(m_X.adjoint());
71 }
72
73 /** Returns the threshold that will be used by certain methods such as rank().
74 *
75 * The default value comes from experimenting (see "LU precision
76 * tuning" thread on the Eigen list) and turns out to be
77 * identical to Higham's formula used already in LDLt.
78 *
79 * @return The user-prescribed threshold or the default.
80 */
threshold() const81 RealScalar lm::threshold() const {
82 return m_usePrescribedThreshold ? m_prescribedThreshold
83 : numeric_limits<double>::epsilon() * m_p;
84 }
85
ColPivQR(const Map<MatrixXd> & X,const Map<VectorXd> & y)86 ColPivQR::ColPivQR(const Map<MatrixXd> &X, const Map<VectorXd> &y)
87 : lm(X, y) {
88 ColPivHouseholderQR<MatrixXd> PQR(X); // decompose the model matrix
89 Permutation Pmat(PQR.colsPermutation());
90 m_r = PQR.rank();
91 if (m_r == m_p) { // full rank case
92 m_coef = PQR.solve(y);
93 m_fitted = X * m_coef;
94 m_se = Pmat * PQR.matrixQR().topRows(m_p).
95 triangularView<Upper>().solve(I_p()).rowwise().norm();
96 return;
97 }
98 MatrixXd Rinv(PQR.matrixQR().topLeftCorner(m_r, m_r).
99 triangularView<Upper>().
100 solve(MatrixXd::Identity(m_r, m_r)));
101 VectorXd effects(PQR.householderQ().adjoint() * y);
102 m_coef.head(m_r) = Rinv * effects.head(m_r);
103 m_coef = Pmat * m_coef;
104 // create fitted values from effects
105 // (can't use X*m_coef if X is rank-deficient)
106 effects.tail(m_n - m_r).setZero();
107 m_fitted = PQR.householderQ() * effects;
108 m_se.head(m_r) = Rinv.rowwise().norm();
109 m_se = Pmat * m_se;
110 }
111
QR(const Map<MatrixXd> & X,const Map<VectorXd> & y)112 QR::QR(const Map<MatrixXd> &X, const Map<VectorXd> &y) : lm(X, y) {
113 HouseholderQR<MatrixXd> QR(X);
114 m_coef = QR.solve(y);
115 m_fitted = X * m_coef;
116 m_se = QR.matrixQR().topRows(m_p).
117 triangularView<Upper>().solve(I_p()).rowwise().norm();
118 }
119
120
Llt(const Map<MatrixXd> & X,const Map<VectorXd> & y)121 Llt::Llt(const Map<MatrixXd> &X, const Map<VectorXd> &y) : lm(X, y) {
122 LLT<MatrixXd> Ch(XtX().selfadjointView<Lower>());
123 m_coef = Ch.solve(X.adjoint() * y);
124 m_fitted = X * m_coef;
125 m_se = Ch.matrixL().solve(I_p()).colwise().norm();
126 }
127
Ldlt(const Map<MatrixXd> & X,const Map<VectorXd> & y)128 Ldlt::Ldlt(const Map<MatrixXd> &X, const Map<VectorXd> &y) : lm(X, y) {
129 LDLT<MatrixXd> Ch(XtX().selfadjointView<Lower>());
130 Dplus(Ch.vectorD()); // to set the rank
131 //FIXME: Check on the permutation in the LDLT and incorporate it in
132 //the coefficients and the standard error computation.
133 // m_coef = Ch.matrixL().adjoint().
134 // solve(Dplus(D) * Ch.matrixL().solve(X.adjoint() * y));
135 m_coef = Ch.solve(X.adjoint() * y);
136 m_fitted = X * m_coef;
137 m_se = Ch.solve(I_p()).diagonal().array().sqrt();
138 }
139
gesdd(MatrixXd & A,ArrayXd & S,MatrixXd & Vt)140 int gesdd(MatrixXd& A, ArrayXd& S, MatrixXd& Vt) {
141 int info, mone = -1, m = A.rows(), n = A.cols();
142 std::vector<int> iwork(8 * n);
143 double wrk;
144 if (m < n || S.size() != n || Vt.rows() != n || Vt.cols() != n)
145 throw std::invalid_argument("dimension mismatch in gesvd");
146 F77_CALL(dgesdd)("O", &m, &n, A.data(), &m, S.data(), A.data(),
147 &m, Vt.data(), &n, &wrk, &mone, &iwork[0], &info);
148 int lwork(wrk);
149 std::vector<double> work(lwork);
150 F77_CALL(dgesdd)("O", &m, &n, A.data(), &m, S.data(), A.data(),
151 &m, Vt.data(), &n, &work[0], &lwork, &iwork[0], &info);
152 return info;
153 }
154
GESDD(const Map<MatrixXd> & X,const Map<VectorXd> & y)155 GESDD::GESDD(const Map<MatrixXd>& X, const Map<VectorXd> &y) : lm(X, y) {
156 MatrixXd U(X), Vt(m_p, m_p);
157 ArrayXd S(m_p);
158 if (gesdd(U, S, Vt)) throw std::runtime_error("error in gesdd");
159 MatrixXd VDi(Vt.adjoint() * Dplus(S).matrix().asDiagonal());
160 m_coef = VDi * U.adjoint() * y;
161 m_fitted = X * m_coef;
162 m_se = VDi.rowwise().norm();
163 }
164
SVD(const Map<MatrixXd> & X,const Map<VectorXd> & y)165 SVD::SVD(const Map<MatrixXd> &X, const Map<VectorXd> &y) : lm(X, y) {
166 JacobiSVD<MatrixXd> UDV(X.jacobiSvd(ComputeThinU|ComputeThinV));
167 MatrixXd VDi(UDV.matrixV() *
168 Dplus(UDV.singularValues().array()).matrix().asDiagonal());
169 m_coef = VDi * UDV.matrixU().adjoint() * y;
170 m_fitted = X * m_coef;
171 m_se = VDi.rowwise().norm();
172 }
173
SymmEigen(const Map<MatrixXd> & X,const Map<VectorXd> & y)174 SymmEigen::SymmEigen(const Map<MatrixXd> &X, const Map<VectorXd> &y)
175 : lm(X, y) {
176 SelfAdjointEigenSolver<MatrixXd> eig(XtX().selfadjointView<Lower>());
177 MatrixXd VDi(eig.eigenvectors() *
178 Dplus(eig.eigenvalues().array()).sqrt().matrix().asDiagonal());
179 m_coef = VDi * VDi.adjoint() * X.adjoint() * y;
180 m_fitted = X * m_coef;
181 m_se = VDi.rowwise().norm();
182 }
183
184 enum {ColPivQR_t = 0, QR_t, LLT_t, LDLT_t, SVD_t, SymmEigen_t, GESDD_t};
185
do_lm(const Map<MatrixXd> & X,const Map<VectorXd> & y,int type)186 static inline lm do_lm(const Map<MatrixXd> &X, const Map<VectorXd> &y, int type) {
187 switch(type) {
188 case ColPivQR_t:
189 return ColPivQR(X, y);
190 case QR_t:
191 return QR(X, y);
192 case LLT_t:
193 return Llt(X, y);
194 case LDLT_t:
195 return Ldlt(X, y);
196 case SVD_t:
197 return SVD(X, y);
198 case SymmEigen_t:
199 return SymmEigen(X, y);
200 case GESDD_t:
201 return GESDD(X, y);
202 }
203 throw invalid_argument("invalid type");
204 return ColPivQR(X, y); // -Wall
205 }
206
fastLm(Rcpp::NumericMatrix Xs,Rcpp::NumericVector ys,int type)207 List fastLm(Rcpp::NumericMatrix Xs, Rcpp::NumericVector ys, int type) {
208 const Map<MatrixXd> X(as<Map<MatrixXd> >(Xs));
209 const Map<VectorXd> y(as<Map<VectorXd> >(ys));
210 Index n = X.rows();
211 if ((Index)y.size() != n) throw invalid_argument("size mismatch");
212
213 // Select and apply the least squares method
214 lm ans(do_lm(X, y, type));
215
216 // Copy coefficients and install names, if any
217 NumericVector coef(wrap(ans.coef()));
218
219 List dimnames(NumericMatrix(Xs).attr("dimnames"));
220 if (dimnames.size() > 1) {
221 RObject colnames = dimnames[1];
222 if (!(colnames).isNULL())
223 coef.attr("names") = clone(CharacterVector(colnames));
224 }
225
226 VectorXd resid = y - ans.fitted();
227 int rank = ans.rank();
228 int df = (rank == ::NA_INTEGER) ? n - X.cols() : n - rank;
229 double s = resid.norm() / std::sqrt(double(df));
230 // Create the standard errors
231 VectorXd se = s * ans.se();
232
233 return List::create(_["coefficients"] = coef,
234 _["se"] = se,
235 _["rank"] = rank,
236 _["df.residual"] = df,
237 _["residuals"] = resid,
238 _["s"] = s,
239 _["fitted.values"] = ans.fitted());
240 }
241 }
242
243 // This defines the R-callable function 'fastLm'
244 // [[Rcpp::export]]
fastLm_Impl(Rcpp::NumericMatrix X,Rcpp::NumericVector y,int type)245 Rcpp::List fastLm_Impl(Rcpp::NumericMatrix X, Rcpp::NumericVector y, int type) {
246 return lmsol::fastLm(X, y, type);
247 }
248
249