1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math3.stat.correlation; 18 19 import org.apache.commons.math3.exception.MathIllegalArgumentException; 20 import org.apache.commons.math3.exception.NotStrictlyPositiveException; 21 import org.apache.commons.math3.exception.util.LocalizedFormats; 22 import org.apache.commons.math3.linear.RealMatrix; 23 import org.apache.commons.math3.linear.BlockRealMatrix; 24 import org.apache.commons.math3.stat.descriptive.moment.Mean; 25 import org.apache.commons.math3.stat.descriptive.moment.Variance; 26 27 /** 28 * Computes covariances for pairs of arrays or columns of a matrix. 29 * 30 * <p>The constructors that take <code>RealMatrix</code> or 31 * <code>double[][]</code> arguments generate covariance matrices. The 32 * columns of the input matrices are assumed to represent variable values.</p> 33 * 34 * <p>The constructor argument <code>biasCorrected</code> determines whether or 35 * not computed covariances are bias-corrected.</p> 36 * 37 * <p>Unbiased covariances are given by the formula</p> 38 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code> 39 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code> 40 * is the mean of the <code>Y</code> values. 41 * 42 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code> 43 * 44 * @since 2.0 45 */ 46 public class Covariance { 47 48 /** covariance matrix */ 49 private final RealMatrix covarianceMatrix; 50 51 /** 52 * Create an empty covariance matrix. 53 */ 54 /** Number of observations (length of covariate vectors) */ 55 private final int n; 56 57 /** 58 * Create a Covariance with no data 59 */ Covariance()60 public Covariance() { 61 super(); 62 covarianceMatrix = null; 63 n = 0; 64 } 65 66 /** 67 * Create a Covariance matrix from a rectangular array 68 * whose columns represent covariates. 69 * 70 * <p>The <code>biasCorrected</code> parameter determines whether or not 71 * covariance estimates are bias-corrected.</p> 72 * 73 * <p>The input array must be rectangular with at least one column 74 * and two rows.</p> 75 * 76 * @param data rectangular array with columns representing covariates 77 * @param biasCorrected true means covariances are bias-corrected 78 * @throws MathIllegalArgumentException if the input data array is not 79 * rectangular with at least two rows and one column. 80 * @throws NotStrictlyPositiveException if the input data array is not 81 * rectangular with at least one row and one column. 82 */ Covariance(double[][] data, boolean biasCorrected)83 public Covariance(double[][] data, boolean biasCorrected) 84 throws MathIllegalArgumentException, NotStrictlyPositiveException { 85 this(new BlockRealMatrix(data), biasCorrected); 86 } 87 88 /** 89 * Create a Covariance matrix from a rectangular array 90 * whose columns represent covariates. 91 * 92 * <p>The input array must be rectangular with at least one column 93 * and two rows</p> 94 * 95 * @param data rectangular array with columns representing covariates 96 * @throws MathIllegalArgumentException if the input data array is not 97 * rectangular with at least two rows and one column. 98 * @throws NotStrictlyPositiveException if the input data array is not 99 * rectangular with at least one row and one column. 100 */ Covariance(double[][] data)101 public Covariance(double[][] data) 102 throws MathIllegalArgumentException, NotStrictlyPositiveException { 103 this(data, true); 104 } 105 106 /** 107 * Create a covariance matrix from a matrix whose columns 108 * represent covariates. 109 * 110 * <p>The <code>biasCorrected</code> parameter determines whether or not 111 * covariance estimates are bias-corrected.</p> 112 * 113 * <p>The matrix must have at least one column and two rows</p> 114 * 115 * @param matrix matrix with columns representing covariates 116 * @param biasCorrected true means covariances are bias-corrected 117 * @throws MathIllegalArgumentException if the input matrix does not have 118 * at least two rows and one column 119 */ Covariance(RealMatrix matrix, boolean biasCorrected)120 public Covariance(RealMatrix matrix, boolean biasCorrected) 121 throws MathIllegalArgumentException { 122 checkSufficientData(matrix); 123 n = matrix.getRowDimension(); 124 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected); 125 } 126 127 /** 128 * Create a covariance matrix from a matrix whose columns 129 * represent covariates. 130 * 131 * <p>The matrix must have at least one column and two rows</p> 132 * 133 * @param matrix matrix with columns representing covariates 134 * @throws MathIllegalArgumentException if the input matrix does not have 135 * at least two rows and one column 136 */ Covariance(RealMatrix matrix)137 public Covariance(RealMatrix matrix) throws MathIllegalArgumentException { 138 this(matrix, true); 139 } 140 141 /** 142 * Returns the covariance matrix 143 * 144 * @return covariance matrix 145 */ getCovarianceMatrix()146 public RealMatrix getCovarianceMatrix() { 147 return covarianceMatrix; 148 } 149 150 /** 151 * Returns the number of observations (length of covariate vectors) 152 * 153 * @return number of observations 154 */ getN()155 public int getN() { 156 return n; 157 } 158 159 /** 160 * Compute a covariance matrix from a matrix whose columns represent 161 * covariates. 162 * @param matrix input matrix (must have at least one column and two rows) 163 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 164 * @return covariance matrix 165 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data 166 */ computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)167 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) 168 throws MathIllegalArgumentException { 169 int dimension = matrix.getColumnDimension(); 170 Variance variance = new Variance(biasCorrected); 171 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension); 172 for (int i = 0; i < dimension; i++) { 173 for (int j = 0; j < i; j++) { 174 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected); 175 outMatrix.setEntry(i, j, cov); 176 outMatrix.setEntry(j, i, cov); 177 } 178 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i))); 179 } 180 return outMatrix; 181 } 182 183 /** 184 * Create a covariance matrix from a matrix whose columns represent 185 * covariates. Covariances are computed using the bias-corrected formula. 186 * @param matrix input matrix (must have at least one column and two rows) 187 * @return covariance matrix 188 * @throws MathIllegalArgumentException if matrix does not contain sufficient data 189 * @see #Covariance 190 */ computeCovarianceMatrix(RealMatrix matrix)191 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) 192 throws MathIllegalArgumentException { 193 return computeCovarianceMatrix(matrix, true); 194 } 195 196 /** 197 * Compute a covariance matrix from a rectangular array whose columns represent 198 * covariates. 199 * @param data input array (must have at least one column and two rows) 200 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 201 * @return covariance matrix 202 * @throws MathIllegalArgumentException if the data array does not contain sufficient 203 * data 204 * @throws NotStrictlyPositiveException if the input data array is not 205 * rectangular with at least one row and one column. 206 */ computeCovarianceMatrix(double[][] data, boolean biasCorrected)207 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) 208 throws MathIllegalArgumentException, NotStrictlyPositiveException { 209 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected); 210 } 211 212 /** 213 * Create a covariance matrix from a rectangular array whose columns represent 214 * covariates. Covariances are computed using the bias-corrected formula. 215 * @param data input array (must have at least one column and two rows) 216 * @return covariance matrix 217 * @throws MathIllegalArgumentException if the data array does not contain sufficient data 218 * @throws NotStrictlyPositiveException if the input data array is not 219 * rectangular with at least one row and one column. 220 * @see #Covariance 221 */ computeCovarianceMatrix(double[][] data)222 protected RealMatrix computeCovarianceMatrix(double[][] data) 223 throws MathIllegalArgumentException, NotStrictlyPositiveException { 224 return computeCovarianceMatrix(data, true); 225 } 226 227 /** 228 * Computes the covariance between the two arrays. 229 * 230 * <p>Array lengths must match and the common length must be at least 2.</p> 231 * 232 * @param xArray first data array 233 * @param yArray second data array 234 * @param biasCorrected if true, returned value will be bias-corrected 235 * @return returns the covariance for the two arrays 236 * @throws MathIllegalArgumentException if the arrays lengths do not match or 237 * there is insufficient data 238 */ covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)239 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected) 240 throws MathIllegalArgumentException { 241 Mean mean = new Mean(); 242 double result = 0d; 243 int length = xArray.length; 244 if (length != yArray.length) { 245 throw new MathIllegalArgumentException( 246 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length); 247 } else if (length < 2) { 248 throw new MathIllegalArgumentException( 249 LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2); 250 } else { 251 double xMean = mean.evaluate(xArray); 252 double yMean = mean.evaluate(yArray); 253 for (int i = 0; i < length; i++) { 254 double xDev = xArray[i] - xMean; 255 double yDev = yArray[i] - yMean; 256 result += (xDev * yDev - result) / (i + 1); 257 } 258 } 259 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result; 260 } 261 262 /** 263 * Computes the covariance between the two arrays, using the bias-corrected 264 * formula. 265 * 266 * <p>Array lengths must match and the common length must be at least 2.</p> 267 * 268 * @param xArray first data array 269 * @param yArray second data array 270 * @return returns the covariance for the two arrays 271 * @throws MathIllegalArgumentException if the arrays lengths do not match or 272 * there is insufficient data 273 */ covariance(final double[] xArray, final double[] yArray)274 public double covariance(final double[] xArray, final double[] yArray) 275 throws MathIllegalArgumentException { 276 return covariance(xArray, yArray, true); 277 } 278 279 /** 280 * Throws MathIllegalArgumentException if the matrix does not have at least 281 * one column and two rows. 282 * @param matrix matrix to check 283 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data 284 * to compute covariance 285 */ checkSufficientData(final RealMatrix matrix)286 private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException { 287 int nRows = matrix.getRowDimension(); 288 int nCols = matrix.getColumnDimension(); 289 if (nRows < 2 || nCols < 1) { 290 throw new MathIllegalArgumentException( 291 LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS, 292 nRows, nCols); 293 } 294 } 295 } 296