1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.math3.stat.regression;
18 
19 import java.util.Arrays;
20 import org.apache.commons.math3.exception.util.LocalizedFormats;
21 import org.apache.commons.math3.util.FastMath;
22 import org.apache.commons.math3.util.Precision;
23 import org.apache.commons.math3.util.MathArrays;
24 
25 /**
26  * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
27  *
28  * <p>The algorithm is described in: <pre>
29  * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
30  * Author(s): Alan J. Miller
31  * Source: Journal of the Royal Statistical Society.
32  * Series C (Applied Statistics), Vol. 41, No. 2
33  * (1992), pp. 458-478
34  * Published by: Blackwell Publishing for the Royal Statistical Society
35  * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
36  *
37  * <p>This method for multiple regression forms the solution to the OLS problem
38  * by updating the QR decomposition as described by Gentleman.</p>
39  *
40  * @since 3.0
41  */
42 public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
43 
44     /** number of variables in regression */
45     private final int nvars;
46     /** diagonals of cross products matrix */
47     private final double[] d;
48     /** the elements of the R`Y */
49     private final double[] rhs;
50     /** the off diagonal portion of the R matrix */
51     private final double[] r;
52     /** the tolerance for each of the variables */
53     private final double[] tol;
54     /** residual sum of squares for all nested regressions */
55     private final double[] rss;
56     /** order of the regressors */
57     private final int[] vorder;
58     /** scratch space for tolerance calc */
59     private final double[] work_tolset;
60     /** number of observations entered */
61     private long nobs = 0;
62     /** sum of squared errors of largest regression */
63     private double sserr = 0.0;
64     /** has rss been called? */
65     private boolean rss_set = false;
66     /** has the tolerance setting method been called */
67     private boolean tol_set = false;
68     /** flags for variables with linear dependency problems */
69     private final boolean[] lindep;
70     /** singular x values */
71     private final double[] x_sing;
72     /** workspace for singularity method */
73     private final double[] work_sing;
74     /** summation of Y variable */
75     private double sumy = 0.0;
76     /** summation of squared Y values */
77     private double sumsqy = 0.0;
78     /** boolean flag whether a regression constant is added */
79     private boolean hasIntercept;
80     /** zero tolerance */
81     private final double epsilon;
82     /**
83      *  Set the default constructor to private access
84      *  to prevent inadvertent instantiation
85      */
86     @SuppressWarnings("unused")
MillerUpdatingRegression()87     private MillerUpdatingRegression() {
88         this(-1, false, Double.NaN);
89     }
90 
91     /**
92      * This is the augmented constructor for the MillerUpdatingRegression class.
93      *
94      * @param numberOfVariables number of regressors to expect, not including constant
95      * @param includeConstant include a constant automatically
96      * @param errorTolerance  zero tolerance, how machine zero is determined
97      * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
98      */
MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)99     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
100     throws ModelSpecificationException {
101         if (numberOfVariables < 1) {
102             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
103         }
104         if (includeConstant) {
105             this.nvars = numberOfVariables + 1;
106         } else {
107             this.nvars = numberOfVariables;
108         }
109         this.hasIntercept = includeConstant;
110         this.nobs = 0;
111         this.d = new double[this.nvars];
112         this.rhs = new double[this.nvars];
113         this.r = new double[this.nvars * (this.nvars - 1) / 2];
114         this.tol = new double[this.nvars];
115         this.rss = new double[this.nvars];
116         this.vorder = new int[this.nvars];
117         this.x_sing = new double[this.nvars];
118         this.work_sing = new double[this.nvars];
119         this.work_tolset = new double[this.nvars];
120         this.lindep = new boolean[this.nvars];
121         for (int i = 0; i < this.nvars; i++) {
122             vorder[i] = i;
123         }
124         if (errorTolerance > 0) {
125             this.epsilon = errorTolerance;
126         } else {
127             this.epsilon = -errorTolerance;
128         }
129     }
130 
131     /**
132      * Primary constructor for the MillerUpdatingRegression.
133      *
134      * @param numberOfVariables maximum number of potential regressors
135      * @param includeConstant include a constant automatically
136      * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
137      */
MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)138     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
139     throws ModelSpecificationException {
140         this(numberOfVariables, includeConstant, Precision.EPSILON);
141     }
142 
143     /**
144      * A getter method which determines whether a constant is included.
145      * @return true regression has an intercept, false no intercept
146      */
hasIntercept()147     public boolean hasIntercept() {
148         return this.hasIntercept;
149     }
150 
151     /**
152      * Gets the number of observations added to the regression model.
153      * @return number of observations
154      */
getN()155     public long getN() {
156         return this.nobs;
157     }
158 
159     /**
160      * Adds an observation to the regression model.
161      * @param x the array with regressor values
162      * @param y  the value of dependent variable given these regressors
163      * @exception ModelSpecificationException if the length of {@code x} does not equal
164      * the number of independent variables in the model
165      */
addObservation(final double[] x, final double y)166     public void addObservation(final double[] x, final double y)
167     throws ModelSpecificationException {
168 
169         if ((!this.hasIntercept && x.length != nvars) ||
170                (this.hasIntercept && x.length + 1 != nvars)) {
171             throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
172                     x.length, nvars);
173         }
174         if (!this.hasIntercept) {
175             include(MathArrays.copyOf(x, x.length), 1.0, y);
176         } else {
177             final double[] tmp = new double[x.length + 1];
178             System.arraycopy(x, 0, tmp, 1, x.length);
179             tmp[0] = 1.0;
180             include(tmp, 1.0, y);
181         }
182         ++nobs;
183 
184     }
185 
186     /**
187      * Adds multiple observations to the model.
188      * @param x observations on the regressors
189      * @param y observations on the regressand
190      * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
191      * the length of {@code y} or does not contain sufficient data to estimate the model
192      */
addObservations(double[][] x, double[] y)193     public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
194         if ((x == null) || (y == null) || (x.length != y.length)) {
195             throw new ModelSpecificationException(
196                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
197                   (x == null) ? 0 : x.length,
198                   (y == null) ? 0 : y.length);
199         }
200         if (x.length == 0) {  // Must be no y data either
201             throw new ModelSpecificationException(
202                     LocalizedFormats.NO_DATA);
203         }
204         if (x[0].length + 1 > x.length) {
205             throw new ModelSpecificationException(
206                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
207                   x.length, x[0].length);
208         }
209         for (int i = 0; i < x.length; i++) {
210             addObservation(x[i], y[i]);
211         }
212     }
213 
214     /**
215      * The include method is where the QR decomposition occurs. This statement forms all
216      * intermediate data which will be used for all derivative measures.
217      * According to the miller paper, note that in the original implementation the x vector
218      * is overwritten. In this implementation, the include method is passed a copy of the
219      * original data vector so that there is no contamination of the data. Additionally,
220      * this method differs slightly from Gentleman's method, in that the assumption is
221      * of dense design matrices, there is some advantage in using the original gentleman algorithm
222      * on sparse matrices.
223      *
224      * @param x observations on the regressors
225      * @param wi weight of the this observation (-1,1)
226      * @param yi observation on the regressand
227      */
include(final double[] x, final double wi, final double yi)228     private void include(final double[] x, final double wi, final double yi) {
229         int nextr = 0;
230         double w = wi;
231         double y = yi;
232         double xi;
233         double di;
234         double wxi;
235         double dpi;
236         double xk;
237         double _w;
238         this.rss_set = false;
239         sumy = smartAdd(yi, sumy);
240         sumsqy = smartAdd(sumsqy, yi * yi);
241         for (int i = 0; i < x.length; i++) {
242             if (w == 0.0) {
243                 return;
244             }
245             xi = x[i];
246 
247             if (xi == 0.0) {
248                 nextr += nvars - i - 1;
249                 continue;
250             }
251             di = d[i];
252             wxi = w * xi;
253             _w = w;
254             if (di != 0.0) {
255                 dpi = smartAdd(di, wxi * xi);
256                 final double tmp = wxi * xi / di;
257                 if (FastMath.abs(tmp) > Precision.EPSILON) {
258                     w = (di * w) / dpi;
259                 }
260             } else {
261                 dpi = wxi * xi;
262                 w = 0.0;
263             }
264             d[i] = dpi;
265             for (int k = i + 1; k < nvars; k++) {
266                 xk = x[k];
267                 x[k] = smartAdd(xk, -xi * r[nextr]);
268                 if (di != 0.0) {
269                     r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
270                 } else {
271                     r[nextr] = xk / xi;
272                 }
273                 ++nextr;
274             }
275             xk = y;
276             y = smartAdd(xk, -xi * rhs[i]);
277             if (di != 0.0) {
278                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
279             } else {
280                 rhs[i] = xk / xi;
281             }
282         }
283         sserr = smartAdd(sserr, w * y * y);
284     }
285 
286     /**
287      * Adds to number a and b such that the contamination due to
288      * numerical smallness of one addend does not corrupt the sum.
289      * @param a - an addend
290      * @param b - an addend
291      * @return the sum of the a and b
292      */
smartAdd(double a, double b)293     private double smartAdd(double a, double b) {
294         final double _a = FastMath.abs(a);
295         final double _b = FastMath.abs(b);
296         if (_a > _b) {
297             final double eps = _a * Precision.EPSILON;
298             if (_b > eps) {
299                 return a + b;
300             }
301             return a;
302         } else {
303             final double eps = _b * Precision.EPSILON;
304             if (_a > eps) {
305                 return a + b;
306             }
307             return b;
308         }
309     }
310 
311     /**
312      * As the name suggests,  clear wipes the internals and reorders everything in the
313      * canonical order.
314      */
clear()315     public void clear() {
316         Arrays.fill(this.d, 0.0);
317         Arrays.fill(this.rhs, 0.0);
318         Arrays.fill(this.r, 0.0);
319         Arrays.fill(this.tol, 0.0);
320         Arrays.fill(this.rss, 0.0);
321         Arrays.fill(this.work_tolset, 0.0);
322         Arrays.fill(this.work_sing, 0.0);
323         Arrays.fill(this.x_sing, 0.0);
324         Arrays.fill(this.lindep, false);
325         for (int i = 0; i < nvars; i++) {
326             this.vorder[i] = i;
327         }
328         this.nobs = 0;
329         this.sserr = 0.0;
330         this.sumy = 0.0;
331         this.sumsqy = 0.0;
332         this.rss_set = false;
333         this.tol_set = false;
334     }
335 
336     /**
337      * This sets up tolerances for singularity testing.
338      */
tolset()339     private void tolset() {
340         int pos;
341         double total;
342         final double eps = this.epsilon;
343         for (int i = 0; i < nvars; i++) {
344             this.work_tolset[i] = FastMath.sqrt(d[i]);
345         }
346         tol[0] = eps * this.work_tolset[0];
347         for (int col = 1; col < nvars; col++) {
348             pos = col - 1;
349             total = work_tolset[col];
350             for (int row = 0; row < col; row++) {
351                 total += FastMath.abs(r[pos]) * work_tolset[row];
352                 pos += nvars - row - 2;
353             }
354             tol[col] = eps * total;
355         }
356         tol_set = true;
357     }
358 
359     /**
360      * The regcf method conducts the linear regression and extracts the
361      * parameter vector. Notice that the algorithm can do subset regression
362      * with no alteration.
363      *
364      * @param nreq how many of the regressors to include (either in canonical
365      * order, or in the current reordered state)
366      * @return an array with the estimated slope coefficients
367      * @throws ModelSpecificationException if {@code nreq} is less than 1
368      * or greater than the number of independent variables
369      */
regcf(int nreq)370     private double[] regcf(int nreq) throws ModelSpecificationException {
371         int nextr;
372         if (nreq < 1) {
373             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
374         }
375         if (nreq > this.nvars) {
376             throw new ModelSpecificationException(
377                     LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
378         }
379         if (!this.tol_set) {
380             tolset();
381         }
382         final double[] ret = new double[nreq];
383         boolean rankProblem = false;
384         for (int i = nreq - 1; i > -1; i--) {
385             if (FastMath.sqrt(d[i]) < tol[i]) {
386                 ret[i] = 0.0;
387                 d[i] = 0.0;
388                 rankProblem = true;
389             } else {
390                 ret[i] = rhs[i];
391                 nextr = i * (nvars + nvars - i - 1) / 2;
392                 for (int j = i + 1; j < nreq; j++) {
393                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
394                     ++nextr;
395                 }
396             }
397         }
398         if (rankProblem) {
399             for (int i = 0; i < nreq; i++) {
400                 if (this.lindep[i]) {
401                     ret[i] = Double.NaN;
402                 }
403             }
404         }
405         return ret;
406     }
407 
408     /**
409      * The method which checks for singularities and then eliminates the offending
410      * columns.
411      */
singcheck()412     private void singcheck() {
413         int pos;
414         for (int i = 0; i < nvars; i++) {
415             work_sing[i] = FastMath.sqrt(d[i]);
416         }
417         for (int col = 0; col < nvars; col++) {
418             // Set elements within R to zero if they are less than tol(col) in
419             // absolute value after being scaled by the square root of their row
420             // multiplier
421             final double temp = tol[col];
422             pos = col - 1;
423             for (int row = 0; row < col - 1; row++) {
424                 if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
425                     r[pos] = 0.0;
426                 }
427                 pos += nvars - row - 2;
428             }
429             // If diagonal element is near zero, set it to zero, set appropriate
430             // element of LINDEP, and use INCLUD to augment the projections in
431             // the lower rows of the orthogonalization.
432             lindep[col] = false;
433             if (work_sing[col] < temp) {
434                 lindep[col] = true;
435                 if (col < nvars - 1) {
436                     Arrays.fill(x_sing, 0.0);
437                     int _pi = col * (nvars + nvars - col - 1) / 2;
438                     for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
439                         x_sing[_xi] = r[_pi];
440                         r[_pi] = 0.0;
441                     }
442                     final double y = rhs[col];
443                     final double weight = d[col];
444                     d[col] = 0.0;
445                     rhs[col] = 0.0;
446                     this.include(x_sing, weight, y);
447                 } else {
448                     sserr += d[col] * rhs[col] * rhs[col];
449                 }
450             }
451         }
452     }
453 
454     /**
455      * Calculates the sum of squared errors for the full regression
456      * and all subsets in the following manner: <pre>
457      * rss[] ={
458      * ResidualSumOfSquares_allNvars,
459      * ResidualSumOfSquares_FirstNvars-1,
460      * ResidualSumOfSquares_FirstNvars-2,
461      * ..., ResidualSumOfSquares_FirstVariable} </pre>
462      */
ss()463     private void ss() {
464         double total = sserr;
465         rss[nvars - 1] = sserr;
466         for (int i = nvars - 1; i > 0; i--) {
467             total += d[i] * rhs[i] * rhs[i];
468             rss[i - 1] = total;
469         }
470         rss_set = true;
471     }
472 
473     /**
474      * Calculates the cov matrix assuming only the first nreq variables are
475      * included in the calculation. The returned array contains a symmetric
476      * matrix stored in lower triangular form. The matrix will have
477      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
478      * cov =
479      * {
480      *  cov_00,
481      *  cov_10, cov_11,
482      *  cov_20, cov_21, cov22,
483      *  ...
484      * } </pre>
485      *
486      * @param nreq how many of the regressors to include (either in canonical
487      * order, or in the current reordered state)
488      * @return an array with the variance covariance of the included
489      * regressors in lower triangular form
490      */
cov(int nreq)491     private double[] cov(int nreq) {
492         if (this.nobs <= nreq) {
493             return null;
494         }
495         double rnk = 0.0;
496         for (int i = 0; i < nreq; i++) {
497             if (!this.lindep[i]) {
498                 rnk += 1.0;
499             }
500         }
501         final double var = rss[nreq - 1] / (nobs - rnk);
502         final double[] rinv = new double[nreq * (nreq - 1) / 2];
503         inverse(rinv, nreq);
504         final double[] covmat = new double[nreq * (nreq + 1) / 2];
505         Arrays.fill(covmat, Double.NaN);
506         int pos2;
507         int pos1;
508         int start = 0;
509         double total = 0;
510         for (int row = 0; row < nreq; row++) {
511             pos2 = start;
512             if (!this.lindep[row]) {
513                 for (int col = row; col < nreq; col++) {
514                     if (!this.lindep[col]) {
515                         pos1 = start + col - row;
516                         if (row == col) {
517                             total = 1.0 / d[col];
518                         } else {
519                             total = rinv[pos1 - 1] / d[col];
520                         }
521                         for (int k = col + 1; k < nreq; k++) {
522                             if (!this.lindep[k]) {
523                                 total += rinv[pos1] * rinv[pos2] / d[k];
524                             }
525                             ++pos1;
526                             ++pos2;
527                         }
528                         covmat[ (col + 1) * col / 2 + row] = total * var;
529                     } else {
530                         pos2 += nreq - col - 1;
531                     }
532                 }
533             }
534             start += nreq - row - 1;
535         }
536         return covmat;
537     }
538 
539     /**
540      * This internal method calculates the inverse of the upper-triangular portion
541      * of the R matrix.
542      * @param rinv  the storage for the inverse of r
543      * @param nreq how many of the regressors to include (either in canonical
544      * order, or in the current reordered state)
545      */
inverse(double[] rinv, int nreq)546     private void inverse(double[] rinv, int nreq) {
547         int pos = nreq * (nreq - 1) / 2 - 1;
548         int pos1 = -1;
549         int pos2 = -1;
550         double total = 0.0;
551         Arrays.fill(rinv, Double.NaN);
552         for (int row = nreq - 1; row > 0; --row) {
553             if (!this.lindep[row]) {
554                 final int start = (row - 1) * (nvars + nvars - row) / 2;
555                 for (int col = nreq; col > row; --col) {
556                     pos1 = start;
557                     pos2 = pos;
558                     total = 0.0;
559                     for (int k = row; k < col - 1; k++) {
560                         pos2 += nreq - k - 1;
561                         if (!this.lindep[k]) {
562                             total += -r[pos1] * rinv[pos2];
563                         }
564                         ++pos1;
565                     }
566                     rinv[pos] = total - r[pos1];
567                     --pos;
568                 }
569             } else {
570                 pos -= nreq - row;
571             }
572         }
573     }
574 
575     /**
576      * In the original algorithm only the partial correlations of the regressors
577      * is returned to the user. In this implementation, we have <pre>
578      * corr =
579      * {
580      *   corrxx - lower triangular
581      *   corrxy - bottom row of the matrix
582      * }
583      * Replaces subroutines PCORR and COR of:
584      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
585      *
586      * <p>Calculate partial correlations after the variables in rows
587      * 1, 2, ..., IN have been forced into the regression.
588      * If IN = 1, and the first row of R represents a constant in the
589      * model, then the usual simple correlations are returned.</p>
590      *
591      * <p>If IN = 0, the value returned in array CORMAT for the correlation
592      * of variables Xi & Xj is: <pre>
593      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
594      *
595      * <p>On return, array CORMAT contains the upper triangle of the matrix of
596      * partial correlations stored by rows, excluding the 1's on the diagonal.
597      * e.g. if IN = 2, the consecutive elements returned are:
598      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
599      * Array YCORR stores the partial correlations with the Y-variable
600      * starting with YCORR(IN+1) = partial correlation with the variable in
601      * position (IN+1). </p>
602      *
603      * @param in how many of the regressors to include (either in canonical
604      * order, or in the current reordered state)
605      * @return an array with the partial correlations of the remainder of
606      * regressors with each other and the regressand, in lower triangular form
607      */
getPartialCorrelations(int in)608     public double[] getPartialCorrelations(int in) {
609         final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
610         int pos;
611         int pos1;
612         int pos2;
613         final int rms_off = -in;
614         final int wrk_off = -(in + 1);
615         final double[] rms = new double[nvars - in];
616         final double[] work = new double[nvars - in - 1];
617         double sumxx;
618         double sumxy;
619         double sumyy;
620         final int offXX = (nvars - in) * (nvars - in - 1) / 2;
621         if (in < -1 || in >= nvars) {
622             return null;
623         }
624         final int nvm = nvars - 1;
625         final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
626         if (d[in] > 0.0) {
627             rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
628         }
629         for (int col = in + 1; col < nvars; col++) {
630             pos = base_pos + col - 1 - in;
631             sumxx = d[col];
632             for (int row = in; row < col; row++) {
633                 sumxx += d[row] * r[pos] * r[pos];
634                 pos += nvars - row - 2;
635             }
636             if (sumxx > 0.0) {
637                 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
638             } else {
639                 rms[col + rms_off] = 0.0;
640             }
641         }
642         sumyy = sserr;
643         for (int row = in; row < nvars; row++) {
644             sumyy += d[row] * rhs[row] * rhs[row];
645         }
646         if (sumyy > 0.0) {
647             sumyy = 1.0 / FastMath.sqrt(sumyy);
648         }
649         pos = 0;
650         for (int col1 = in; col1 < nvars; col1++) {
651             sumxy = 0.0;
652             Arrays.fill(work, 0.0);
653             pos1 = base_pos + col1 - in - 1;
654             for (int row = in; row < col1; row++) {
655                 pos2 = pos1 + 1;
656                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
657                     work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
658                     pos2++;
659                 }
660                 sumxy += d[row] * r[pos1] * rhs[row];
661                 pos1 += nvars - row - 2;
662             }
663             pos2 = pos1 + 1;
664             for (int col2 = col1 + 1; col2 < nvars; col2++) {
665                 work[col2 + wrk_off] += d[col1] * r[pos2];
666                 ++pos2;
667                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
668                         work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
669                 ++pos;
670             }
671             sumxy += d[col1] * rhs[col1];
672             output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
673         }
674 
675         return output;
676     }
677 
678     /**
679      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
680      * Move variable from position FROM to position TO in an
681      * orthogonal reduction produced by AS75.1.
682      *
683      * @param from initial position
684      * @param to destination
685      */
vmove(int from, int to)686     private void vmove(int from, int to) {
687         double d1;
688         double d2;
689         double X;
690         double d1new;
691         double d2new;
692         double cbar;
693         double sbar;
694         double Y;
695         int first;
696         int inc;
697         int m1;
698         int m2;
699         int mp1;
700         int pos;
701         boolean bSkipTo40 = false;
702         if (from == to) {
703             return;
704         }
705         if (!this.rss_set) {
706             ss();
707         }
708         int count = 0;
709         if (from < to) {
710             first = from;
711             inc = 1;
712             count = to - from;
713         } else {
714             first = from - 1;
715             inc = -1;
716             count = from - to;
717         }
718 
719         int m = first;
720         int idx = 0;
721         while (idx < count) {
722             m1 = m * (nvars + nvars - m - 1) / 2;
723             m2 = m1 + nvars - m - 1;
724             mp1 = m + 1;
725 
726             d1 = d[m];
727             d2 = d[mp1];
728             // Special cases.
729             if (d1 > this.epsilon || d2 > this.epsilon) {
730                 X = r[m1];
731                 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
732                     X = 0.0;
733                 }
734                 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
735                     d[m] = d2;
736                     d[mp1] = d1;
737                     r[m1] = 0.0;
738                     for (int col = m + 2; col < nvars; col++) {
739                         ++m1;
740                         X = r[m1];
741                         r[m1] = r[m2];
742                         r[m2] = X;
743                         ++m2;
744                     }
745                     X = rhs[m];
746                     rhs[m] = rhs[mp1];
747                     rhs[mp1] = X;
748                     bSkipTo40 = true;
749                     //break;
750                 } else if (d2 < this.epsilon) {
751                     d[m] = d1 * X * X;
752                     r[m1] = 1.0 / X;
753                     for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
754                         r[_i] /= X;
755                     }
756                     rhs[m] /= X;
757                     bSkipTo40 = true;
758                     //break;
759                 }
760                 if (!bSkipTo40) {
761                     d1new = d2 + d1 * X * X;
762                     cbar = d2 / d1new;
763                     sbar = X * d1 / d1new;
764                     d2new = d1 * cbar;
765                     d[m] = d1new;
766                     d[mp1] = d2new;
767                     r[m1] = sbar;
768                     for (int col = m + 2; col < nvars; col++) {
769                         ++m1;
770                         Y = r[m1];
771                         r[m1] = cbar * r[m2] + sbar * Y;
772                         r[m2] = Y - X * r[m2];
773                         ++m2;
774                     }
775                     Y = rhs[m];
776                     rhs[m] = cbar * rhs[mp1] + sbar * Y;
777                     rhs[mp1] = Y - X * rhs[mp1];
778                 }
779             }
780             if (m > 0) {
781                 pos = m;
782                 for (int row = 0; row < m; row++) {
783                     X = r[pos];
784                     r[pos] = r[pos - 1];
785                     r[pos - 1] = X;
786                     pos += nvars - row - 2;
787                 }
788             }
789             // Adjust variable order (VORDER), the tolerances (TOL) and
790             // the vector of residual sums of squares (RSS).
791             m1 = vorder[m];
792             vorder[m] = vorder[mp1];
793             vorder[mp1] = m1;
794             X = tol[m];
795             tol[m] = tol[mp1];
796             tol[mp1] = X;
797             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
798 
799             m += inc;
800             ++idx;
801         }
802     }
803 
804     /**
805      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
806      *
807      * <p> Re-order the variables in an orthogonal reduction produced by
808      * AS75.1 so that the N variables in LIST start at position POS1,
809      * though will not necessarily be in the same order as in LIST.
810      * Any variables in VORDER before position POS1 are not moved.
811      * Auxiliary routine called: VMOVE. </p>
812      *
813      * <p>This internal method reorders the regressors.</p>
814      *
815      * @param list the regressors to move
816      * @param pos1 where the list will be placed
817      * @return -1 error, 0 everything ok
818      */
reorderRegressors(int[] list, int pos1)819     private int reorderRegressors(int[] list, int pos1) {
820         int next;
821         int i;
822         int l;
823         if (list.length < 1 || list.length > nvars + 1 - pos1) {
824             return -1;
825         }
826         next = pos1;
827         i = pos1;
828         while (i < nvars) {
829             l = vorder[i];
830             for (int j = 0; j < list.length; j++) {
831                 if (l == list[j] && i > next) {
832                     this.vmove(i, next);
833                     ++next;
834                     if (next >= list.length + pos1) {
835                         return 0;
836                     } else {
837                         break;
838                     }
839                 }
840             }
841             ++i;
842         }
843         return 0;
844     }
845 
846     /**
847      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
848      *
849      * @param  row_data returns the diagonal of the hat matrix for this observation
850      * @return the diagonal element of the hatmatrix
851      */
getDiagonalOfHatMatrix(double[] row_data)852     public double getDiagonalOfHatMatrix(double[] row_data) {
853         double[] wk = new double[this.nvars];
854         int pos;
855         double total;
856 
857         if (row_data.length > nvars) {
858             return Double.NaN;
859         }
860         double[] xrow;
861         if (this.hasIntercept) {
862             xrow = new double[row_data.length + 1];
863             xrow[0] = 1.0;
864             System.arraycopy(row_data, 0, xrow, 1, row_data.length);
865         } else {
866             xrow = row_data;
867         }
868         double hii = 0.0;
869         for (int col = 0; col < xrow.length; col++) {
870             if (FastMath.sqrt(d[col]) < tol[col]) {
871                 wk[col] = 0.0;
872             } else {
873                 pos = col - 1;
874                 total = xrow[col];
875                 for (int row = 0; row < col; row++) {
876                     total = smartAdd(total, -wk[row] * r[pos]);
877                     pos += nvars - row - 2;
878                 }
879                 wk[col] = total;
880                 hii = smartAdd(hii, (total * total) / d[col]);
881             }
882         }
883         return hii;
884     }
885 
886     /**
887      * Gets the order of the regressors, useful if some type of reordering
888      * has been called. Calling regress with int[]{} args will trigger
889      * a reordering.
890      *
891      * @return int[] with the current order of the regressors
892      */
getOrderOfRegressors()893     public int[] getOrderOfRegressors(){
894         return MathArrays.copyOf(vorder);
895     }
896 
897     /**
898      * Conducts a regression on the data in the model, using all regressors.
899      *
900      * @return RegressionResults the structure holding all regression results
901      * @exception  ModelSpecificationException - thrown if number of observations is
902      * less than the number of variables
903      */
regress()904     public RegressionResults regress() throws ModelSpecificationException {
905         return regress(this.nvars);
906     }
907 
908     /**
909      * Conducts a regression on the data in the model, using a subset of regressors.
910      *
911      * @param numberOfRegressors many of the regressors to include (either in canonical
912      * order, or in the current reordered state)
913      * @return RegressionResults the structure holding all regression results
914      * @exception  ModelSpecificationException - thrown if number of observations is
915      * less than the number of variables or number of regressors requested
916      * is greater than the regressors in the model
917      */
regress(int numberOfRegressors)918     public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
919         if (this.nobs <= numberOfRegressors) {
920            throw new ModelSpecificationException(
921                    LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
922                    this.nobs, numberOfRegressors);
923         }
924         if( numberOfRegressors > this.nvars ){
925             throw new ModelSpecificationException(
926                     LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
927         }
928 
929         tolset();
930         singcheck();
931 
932         double[] beta = this.regcf(numberOfRegressors);
933 
934         ss();
935 
936         double[] cov = this.cov(numberOfRegressors);
937 
938         int rnk = 0;
939         for (int i = 0; i < this.lindep.length; i++) {
940             if (!this.lindep[i]) {
941                 ++rnk;
942             }
943         }
944 
945         boolean needsReorder = false;
946         for (int i = 0; i < numberOfRegressors; i++) {
947             if (this.vorder[i] != i) {
948                 needsReorder = true;
949                 break;
950             }
951         }
952         if (!needsReorder) {
953             return new RegressionResults(
954                     beta, new double[][]{cov}, true, this.nobs, rnk,
955                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
956         } else {
957             double[] betaNew = new double[beta.length];
958             double[] covNew = new double[cov.length];
959 
960             int[] newIndices = new int[beta.length];
961             for (int i = 0; i < nvars; i++) {
962                 for (int j = 0; j < numberOfRegressors; j++) {
963                     if (this.vorder[j] == i) {
964                         betaNew[i] = beta[ j];
965                         newIndices[i] = j;
966                     }
967                 }
968             }
969 
970             int idx1 = 0;
971             int idx2;
972             int _i;
973             int _j;
974             for (int i = 0; i < beta.length; i++) {
975                 _i = newIndices[i];
976                 for (int j = 0; j <= i; j++, idx1++) {
977                     _j = newIndices[j];
978                     if (_i > _j) {
979                         idx2 = _i * (_i + 1) / 2 + _j;
980                     } else {
981                         idx2 = _j * (_j + 1) / 2 + _i;
982                     }
983                     covNew[idx1] = cov[idx2];
984                 }
985             }
986             return new RegressionResults(
987                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
988                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
989         }
990     }
991 
992     /**
993      * Conducts a regression on the data in the model, using regressors in array
994      * Calling this method will change the internal order of the regressors
995      * and care is required in interpreting the hatmatrix.
996      *
997      * @param  variablesToInclude array of variables to include in regression
998      * @return RegressionResults the structure holding all regression results
999      * @exception  ModelSpecificationException - thrown if number of observations is
1000      * less than the number of variables, the number of regressors requested
1001      * is greater than the regressors in the model or a regressor index in
1002      * regressor array does not exist
1003      */
regress(int[] variablesToInclude)1004     public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1005         if (variablesToInclude.length > this.nvars) {
1006             throw new ModelSpecificationException(
1007                     LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1008         }
1009         if (this.nobs <= this.nvars) {
1010             throw new ModelSpecificationException(
1011                     LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1012                     this.nobs, this.nvars);
1013         }
1014         Arrays.sort(variablesToInclude);
1015         int iExclude = 0;
1016         for (int i = 0; i < variablesToInclude.length; i++) {
1017             if (i >= this.nvars) {
1018                 throw new ModelSpecificationException(
1019                         LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1020             }
1021             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1022                 variablesToInclude[i] = -1;
1023                 ++iExclude;
1024             }
1025         }
1026         int[] series;
1027         if (iExclude > 0) {
1028             int j = 0;
1029             series = new int[variablesToInclude.length - iExclude];
1030             for (int i = 0; i < variablesToInclude.length; i++) {
1031                 if (variablesToInclude[i] > -1) {
1032                     series[j] = variablesToInclude[i];
1033                     ++j;
1034                 }
1035             }
1036         } else {
1037             series = variablesToInclude;
1038         }
1039 
1040         reorderRegressors(series, 0);
1041         tolset();
1042         singcheck();
1043 
1044         double[] beta = this.regcf(series.length);
1045 
1046         ss();
1047 
1048         double[] cov = this.cov(series.length);
1049 
1050         int rnk = 0;
1051         for (int i = 0; i < this.lindep.length; i++) {
1052             if (!this.lindep[i]) {
1053                 ++rnk;
1054             }
1055         }
1056 
1057         boolean needsReorder = false;
1058         for (int i = 0; i < this.nvars; i++) {
1059             if (this.vorder[i] != series[i]) {
1060                 needsReorder = true;
1061                 break;
1062             }
1063         }
1064         if (!needsReorder) {
1065             return new RegressionResults(
1066                     beta, new double[][]{cov}, true, this.nobs, rnk,
1067                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1068         } else {
1069             double[] betaNew = new double[beta.length];
1070             int[] newIndices = new int[beta.length];
1071             for (int i = 0; i < series.length; i++) {
1072                 for (int j = 0; j < this.vorder.length; j++) {
1073                     if (this.vorder[j] == series[i]) {
1074                         betaNew[i] = beta[ j];
1075                         newIndices[i] = j;
1076                     }
1077                 }
1078             }
1079             double[] covNew = new double[cov.length];
1080             int idx1 = 0;
1081             int idx2;
1082             int _i;
1083             int _j;
1084             for (int i = 0; i < beta.length; i++) {
1085                 _i = newIndices[i];
1086                 for (int j = 0; j <= i; j++, idx1++) {
1087                     _j = newIndices[j];
1088                     if (_i > _j) {
1089                         idx2 = _i * (_i + 1) / 2 + _j;
1090                     } else {
1091                         idx2 = _j * (_j + 1) / 2 + _i;
1092                     }
1093                     covNew[idx1] = cov[idx2];
1094                 }
1095             }
1096             return new RegressionResults(
1097                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1098                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1099         }
1100     }
1101 }
1102