1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.math3.stat.correlation;
18 
19 import org.apache.commons.math3.exception.DimensionMismatchException;
20 import org.apache.commons.math3.exception.MathUnsupportedOperationException;
21 import org.apache.commons.math3.exception.NumberIsTooSmallException;
22 import org.apache.commons.math3.linear.MatrixUtils;
23 import org.apache.commons.math3.linear.RealMatrix;
24 
25 /**
26  * Covariance implementation that does not require input data to be
27  * stored in memory. The size of the covariance matrix is specified in the
28  * constructor. Specific elements of the matrix are incrementally updated with
29  * calls to incrementRow() or increment Covariance().
30  *
31  * <p>This class is based on a paper written by Philippe P&eacute;bay:
32  * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
33  * Formulas for Robust, One-Pass Parallel Computation of Covariances and
34  * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
35  * Sandia National Laboratories.</p>
36  *
37  * <p>Note: the underlying covariance matrix is symmetric, thus only the
38  * upper triangular part of the matrix is stored and updated each increment.</p>
39  *
40  * @since 3.0
41  */
42 public class StorelessCovariance extends Covariance {
43 
44     /** the square covariance matrix (upper triangular part) */
45     private StorelessBivariateCovariance[] covMatrix;
46 
47     /** dimension of the square covariance matrix */
48     private int dimension;
49 
50     /**
51      * Create a bias corrected covariance matrix with a given dimension.
52      *
53      * @param dim the dimension of the square covariance matrix
54      */
StorelessCovariance(final int dim)55     public StorelessCovariance(final int dim) {
56         this(dim, true);
57     }
58 
59     /**
60      * Create a covariance matrix with a given number of rows and columns and the
61      * indicated bias correction.
62      *
63      * @param dim the dimension of the covariance matrix
64      * @param biasCorrected if <code>true</code> the covariance estimate is corrected
65      * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
66      * i.e. n in the denominator.
67      */
StorelessCovariance(final int dim, final boolean biasCorrected)68     public StorelessCovariance(final int dim, final boolean biasCorrected) {
69         dimension = dim;
70         covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
71         initializeMatrix(biasCorrected);
72     }
73 
74     /**
75      * Initialize the internal two-dimensional array of
76      * {@link StorelessBivariateCovariance} instances.
77      *
78      * @param biasCorrected if the covariance estimate shall be corrected for bias
79      */
initializeMatrix(final boolean biasCorrected)80     private void initializeMatrix(final boolean biasCorrected) {
81         for(int i = 0; i < dimension; i++){
82             for(int j = 0; j < dimension; j++){
83                 setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
84             }
85         }
86     }
87 
88     /**
89      * Returns the index (i, j) translated into the one-dimensional
90      * array used to store the upper triangular part of the symmetric
91      * covariance matrix.
92      *
93      * @param i the row index
94      * @param j the column index
95      * @return the corresponding index in the matrix array
96      */
indexOf(final int i, final int j)97     private int indexOf(final int i, final int j) {
98         return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
99     }
100 
101     /**
102      * Gets the element at index (i, j) from the covariance matrix
103      * @param i the row index
104      * @param j the column index
105      * @return the {@link StorelessBivariateCovariance} element at the given index
106      */
getElement(final int i, final int j)107     private StorelessBivariateCovariance getElement(final int i, final int j) {
108         return covMatrix[indexOf(i, j)];
109     }
110 
111     /**
112      * Sets the covariance element at index (i, j) in the covariance matrix
113      * @param i the row index
114      * @param j the column index
115      * @param cov the {@link StorelessBivariateCovariance} element to be set
116      */
setElement(final int i, final int j, final StorelessBivariateCovariance cov)117     private void setElement(final int i, final int j,
118                             final StorelessBivariateCovariance cov) {
119         covMatrix[indexOf(i, j)] = cov;
120     }
121 
122     /**
123      * Get the covariance for an individual element of the covariance matrix.
124      *
125      * @param xIndex row index in the covariance matrix
126      * @param yIndex column index in the covariance matrix
127      * @return the covariance of the given element
128      * @throws NumberIsTooSmallException if the number of observations
129      * in the cell is &lt; 2
130      */
getCovariance(final int xIndex, final int yIndex)131     public double getCovariance(final int xIndex,
132                                 final int yIndex)
133         throws NumberIsTooSmallException {
134 
135         return getElement(xIndex, yIndex).getResult();
136 
137     }
138 
139     /**
140      * Increment the covariance matrix with one row of data.
141      *
142      * @param data array representing one row of data.
143      * @throws DimensionMismatchException if the length of <code>rowData</code>
144      * does not match with the covariance matrix
145      */
increment(final double[] data)146     public void increment(final double[] data)
147         throws DimensionMismatchException {
148 
149         int length = data.length;
150         if (length != dimension) {
151             throw new DimensionMismatchException(length, dimension);
152         }
153 
154         // only update the upper triangular part of the covariance matrix
155         // as only these parts are actually stored
156         for (int i = 0; i < length; i++){
157             for (int j = i; j < length; j++){
158                 getElement(i, j).increment(data[i], data[j]);
159             }
160         }
161 
162     }
163 
164     /**
165      * Appends {@code sc} to this, effectively aggregating the computations in {@code sc}
166      * with this.  After invoking this method, covariances returned should be close
167      * to what would have been obtained by performing all of the {@link #increment(double[])}
168      * operations in {@code sc} directly on this.
169      *
170      * @param sc externally computed StorelessCovariance to add to this
171      * @throws DimensionMismatchException if the dimension of sc does not match this
172      * @since 3.3
173      */
append(StorelessCovariance sc)174     public void append(StorelessCovariance sc) throws DimensionMismatchException {
175         if (sc.dimension != dimension) {
176             throw new DimensionMismatchException(sc.dimension, dimension);
177         }
178 
179         // only update the upper triangular part of the covariance matrix
180         // as only these parts are actually stored
181         for (int i = 0; i < dimension; i++) {
182             for (int j = i; j < dimension; j++) {
183                 getElement(i, j).append(sc.getElement(i, j));
184             }
185         }
186     }
187 
188     /**
189      * {@inheritDoc}
190      * @throws NumberIsTooSmallException if the number of observations
191      * in a cell is &lt; 2
192      */
193     @Override
getCovarianceMatrix()194     public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException {
195         return MatrixUtils.createRealMatrix(getData());
196     }
197 
198     /**
199      * Return the covariance matrix as two-dimensional array.
200      *
201      * @return a two-dimensional double array of covariance values
202      * @throws NumberIsTooSmallException if the number of observations
203      * for a cell is &lt; 2
204      */
getData()205     public double[][] getData() throws NumberIsTooSmallException {
206         final double[][] data = new double[dimension][dimension];
207         for (int i = 0; i < dimension; i++) {
208             for (int j = 0; j < dimension; j++) {
209                 data[i][j] = getElement(i, j).getResult();
210             }
211         }
212         return data;
213     }
214 
215     /**
216      * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
217      * since the number of bivariate observations does not have to be the same for different
218      * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
219      *
220      * @return nothing as this implementation always throws a
221      * {@link MathUnsupportedOperationException}
222      * @throws MathUnsupportedOperationException in all cases
223      */
224     @Override
getN()225     public int getN()
226         throws MathUnsupportedOperationException {
227         throw new MathUnsupportedOperationException();
228     }
229 }
230