1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.math3.stat.correlation;
18 
19 import org.apache.commons.math3.exception.MathIllegalArgumentException;
20 import org.apache.commons.math3.exception.NotStrictlyPositiveException;
21 import org.apache.commons.math3.exception.util.LocalizedFormats;
22 import org.apache.commons.math3.linear.RealMatrix;
23 import org.apache.commons.math3.linear.BlockRealMatrix;
24 import org.apache.commons.math3.stat.descriptive.moment.Mean;
25 import org.apache.commons.math3.stat.descriptive.moment.Variance;
26 
27 /**
28  * Computes covariances for pairs of arrays or columns of a matrix.
29  *
30  * <p>The constructors that take <code>RealMatrix</code> or
31  * <code>double[][]</code> arguments generate covariance matrices.  The
32  * columns of the input matrices are assumed to represent variable values.</p>
33  *
34  * <p>The constructor argument <code>biasCorrected</code> determines whether or
35  * not computed covariances are bias-corrected.</p>
36  *
37  * <p>Unbiased covariances are given by the formula</p>
38  * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
39  * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
40  * is the mean of the <code>Y</code> values.
41  *
42  * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
43  *
44  * @since 2.0
45  */
46 public class Covariance {
47 
48     /** covariance matrix */
49     private final RealMatrix covarianceMatrix;
50 
51     /**
52      * Create an empty covariance matrix.
53      */
54     /** Number of observations (length of covariate vectors) */
55     private final int n;
56 
57     /**
58      * Create a Covariance with no data
59      */
Covariance()60     public Covariance() {
61         super();
62         covarianceMatrix = null;
63         n = 0;
64     }
65 
66     /**
67      * Create a Covariance matrix from a rectangular array
68      * whose columns represent covariates.
69      *
70      * <p>The <code>biasCorrected</code> parameter determines whether or not
71      * covariance estimates are bias-corrected.</p>
72      *
73      * <p>The input array must be rectangular with at least one column
74      * and two rows.</p>
75      *
76      * @param data rectangular array with columns representing covariates
77      * @param biasCorrected true means covariances are bias-corrected
78      * @throws MathIllegalArgumentException if the input data array is not
79      * rectangular with at least two rows and one column.
80      * @throws NotStrictlyPositiveException if the input data array is not
81      * rectangular with at least one row and one column.
82      */
Covariance(double[][] data, boolean biasCorrected)83     public Covariance(double[][] data, boolean biasCorrected)
84     throws MathIllegalArgumentException, NotStrictlyPositiveException {
85         this(new BlockRealMatrix(data), biasCorrected);
86     }
87 
88     /**
89      * Create a Covariance matrix from a rectangular array
90      * whose columns represent covariates.
91      *
92      * <p>The input array must be rectangular with at least one column
93      * and two rows</p>
94      *
95      * @param data rectangular array with columns representing covariates
96      * @throws MathIllegalArgumentException if the input data array is not
97      * rectangular with at least two rows and one column.
98      * @throws NotStrictlyPositiveException if the input data array is not
99      * rectangular with at least one row and one column.
100      */
Covariance(double[][] data)101     public Covariance(double[][] data)
102     throws MathIllegalArgumentException, NotStrictlyPositiveException {
103         this(data, true);
104     }
105 
106     /**
107      * Create a covariance matrix from a matrix whose columns
108      * represent covariates.
109      *
110      * <p>The <code>biasCorrected</code> parameter determines whether or not
111      * covariance estimates are bias-corrected.</p>
112      *
113      * <p>The matrix must have at least one column and two rows</p>
114      *
115      * @param matrix matrix with columns representing covariates
116      * @param biasCorrected true means covariances are bias-corrected
117      * @throws MathIllegalArgumentException if the input matrix does not have
118      * at least two rows and one column
119      */
Covariance(RealMatrix matrix, boolean biasCorrected)120     public Covariance(RealMatrix matrix, boolean biasCorrected)
121     throws MathIllegalArgumentException {
122        checkSufficientData(matrix);
123        n = matrix.getRowDimension();
124        covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
125     }
126 
127     /**
128      * Create a covariance matrix from a matrix whose columns
129      * represent covariates.
130      *
131      * <p>The matrix must have at least one column and two rows</p>
132      *
133      * @param matrix matrix with columns representing covariates
134      * @throws MathIllegalArgumentException if the input matrix does not have
135      * at least two rows and one column
136      */
Covariance(RealMatrix matrix)137     public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
138         this(matrix, true);
139     }
140 
141     /**
142      * Returns the covariance matrix
143      *
144      * @return covariance matrix
145      */
getCovarianceMatrix()146     public RealMatrix getCovarianceMatrix() {
147         return covarianceMatrix;
148     }
149 
150     /**
151      * Returns the number of observations (length of covariate vectors)
152      *
153      * @return number of observations
154      */
getN()155     public int getN() {
156         return n;
157     }
158 
159     /**
160      * Compute a covariance matrix from a matrix whose columns represent
161      * covariates.
162      * @param matrix input matrix (must have at least one column and two rows)
163      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
164      * @return covariance matrix
165      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
166      */
computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)167     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
168     throws MathIllegalArgumentException {
169         int dimension = matrix.getColumnDimension();
170         Variance variance = new Variance(biasCorrected);
171         RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
172         for (int i = 0; i < dimension; i++) {
173             for (int j = 0; j < i; j++) {
174               double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
175               outMatrix.setEntry(i, j, cov);
176               outMatrix.setEntry(j, i, cov);
177             }
178             outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
179         }
180         return outMatrix;
181     }
182 
183     /**
184      * Create a covariance matrix from a matrix whose columns represent
185      * covariates. Covariances are computed using the bias-corrected formula.
186      * @param matrix input matrix (must have at least one column and two rows)
187      * @return covariance matrix
188      * @throws MathIllegalArgumentException if matrix does not contain sufficient data
189      * @see #Covariance
190      */
computeCovarianceMatrix(RealMatrix matrix)191     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
192     throws MathIllegalArgumentException {
193         return computeCovarianceMatrix(matrix, true);
194     }
195 
196     /**
197      * Compute a covariance matrix from a rectangular array whose columns represent
198      * covariates.
199      * @param data input array (must have at least one column and two rows)
200      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
201      * @return covariance matrix
202      * @throws MathIllegalArgumentException if the data array does not contain sufficient
203      * data
204      * @throws NotStrictlyPositiveException if the input data array is not
205      * rectangular with at least one row and one column.
206      */
computeCovarianceMatrix(double[][] data, boolean biasCorrected)207     protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
208     throws MathIllegalArgumentException, NotStrictlyPositiveException {
209         return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
210     }
211 
212     /**
213      * Create a covariance matrix from a rectangular array whose columns represent
214      * covariates. Covariances are computed using the bias-corrected formula.
215      * @param data input array (must have at least one column and two rows)
216      * @return covariance matrix
217      * @throws MathIllegalArgumentException if the data array does not contain sufficient data
218      * @throws NotStrictlyPositiveException if the input data array is not
219      * rectangular with at least one row and one column.
220      * @see #Covariance
221      */
computeCovarianceMatrix(double[][] data)222     protected RealMatrix computeCovarianceMatrix(double[][] data)
223     throws MathIllegalArgumentException, NotStrictlyPositiveException {
224         return computeCovarianceMatrix(data, true);
225     }
226 
227     /**
228      * Computes the covariance between the two arrays.
229      *
230      * <p>Array lengths must match and the common length must be at least 2.</p>
231      *
232      * @param xArray first data array
233      * @param yArray second data array
234      * @param biasCorrected if true, returned value will be bias-corrected
235      * @return returns the covariance for the two arrays
236      * @throws  MathIllegalArgumentException if the arrays lengths do not match or
237      * there is insufficient data
238      */
covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)239     public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
240         throws MathIllegalArgumentException {
241         Mean mean = new Mean();
242         double result = 0d;
243         int length = xArray.length;
244         if (length != yArray.length) {
245             throw new MathIllegalArgumentException(
246                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
247         } else if (length < 2) {
248             throw new MathIllegalArgumentException(
249                   LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
250         } else {
251             double xMean = mean.evaluate(xArray);
252             double yMean = mean.evaluate(yArray);
253             for (int i = 0; i < length; i++) {
254                 double xDev = xArray[i] - xMean;
255                 double yDev = yArray[i] - yMean;
256                 result += (xDev * yDev - result) / (i + 1);
257             }
258         }
259         return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
260     }
261 
262     /**
263      * Computes the covariance between the two arrays, using the bias-corrected
264      * formula.
265      *
266      * <p>Array lengths must match and the common length must be at least 2.</p>
267      *
268      * @param xArray first data array
269      * @param yArray second data array
270      * @return returns the covariance for the two arrays
271      * @throws  MathIllegalArgumentException if the arrays lengths do not match or
272      * there is insufficient data
273      */
covariance(final double[] xArray, final double[] yArray)274     public double covariance(final double[] xArray, final double[] yArray)
275         throws MathIllegalArgumentException {
276         return covariance(xArray, yArray, true);
277     }
278 
279     /**
280      * Throws MathIllegalArgumentException if the matrix does not have at least
281      * one column and two rows.
282      * @param matrix matrix to check
283      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
284      * to compute covariance
285      */
checkSufficientData(final RealMatrix matrix)286     private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
287         int nRows = matrix.getRowDimension();
288         int nCols = matrix.getColumnDimension();
289         if (nRows < 2 || nCols < 1) {
290             throw new MathIllegalArgumentException(
291                     LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
292                     nRows, nCols);
293         }
294     }
295 }
296