1// Copyright ©2014 The Gonum Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package stat 6 7import ( 8 "math" 9 10 "gonum.org/v1/gonum/floats" 11 "gonum.org/v1/gonum/mat" 12) 13 14// CovarianceMatrix calculates the covariance matrix (also known as the 15// variance-covariance matrix) calculated from a matrix of data, x, using 16// a two-pass algorithm. The result is stored in dst. 17// 18// If weights is not nil the weighted covariance of x is calculated. weights 19// must have length equal to the number of rows in input data matrix and 20// must not contain negative elements. 21// The dst matrix must either be empty or have the same number of 22// columns as the input data matrix. 23func CovarianceMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) { 24 // This is the matrix version of the two-pass algorithm. It doesn't use the 25 // additional floating point error correction that the Covariance function uses 26 // to reduce the impact of rounding during centering. 27 28 r, c := x.Dims() 29 30 if dst.IsEmpty() { 31 *dst = *(dst.GrowSym(c).(*mat.SymDense)) 32 } else if n := dst.Symmetric(); n != c { 33 panic(mat.ErrShape) 34 } 35 36 var xt mat.Dense 37 xt.CloneFrom(x.T()) 38 // Subtract the mean of each of the columns. 39 for i := 0; i < c; i++ { 40 v := xt.RawRowView(i) 41 // This will panic with ErrShape if len(weights) != len(v), so 42 // we don't have to check the size later. 43 mean := Mean(v, weights) 44 floats.AddConst(-mean, v) 45 } 46 47 if weights == nil { 48 // Calculate the normalization factor 49 // scaled by the sample size. 50 dst.SymOuterK(1/(float64(r)-1), &xt) 51 return 52 } 53 54 // Multiply by the sqrt of the weights, so that multiplication is symmetric. 55 sqrtwts := make([]float64, r) 56 for i, w := range weights { 57 if w < 0 { 58 panic("stat: negative covariance matrix weights") 59 } 60 sqrtwts[i] = math.Sqrt(w) 61 } 62 // Weight the rows. 63 for i := 0; i < c; i++ { 64 v := xt.RawRowView(i) 65 floats.Mul(v, sqrtwts) 66 } 67 68 // Calculate the normalization factor 69 // scaled by the weighted sample size. 70 dst.SymOuterK(1/(floats.Sum(weights)-1), &xt) 71} 72 73// CorrelationMatrix returns the correlation matrix calculated from a matrix 74// of data, x, using a two-pass algorithm. The result is stored in dst. 75// 76// If weights is not nil the weighted correlation of x is calculated. weights 77// must have length equal to the number of rows in input data matrix and 78// must not contain negative elements. 79// The dst matrix must either be empty or have the same number of 80// columns as the input data matrix. 81func CorrelationMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) { 82 // This will panic if the sizes don't match, or if weights is the wrong size. 83 CovarianceMatrix(dst, x, weights) 84 covToCorr(dst) 85} 86 87// covToCorr converts a covariance matrix to a correlation matrix. 88func covToCorr(c *mat.SymDense) { 89 r := c.Symmetric() 90 91 s := make([]float64, r) 92 for i := 0; i < r; i++ { 93 s[i] = 1 / math.Sqrt(c.At(i, i)) 94 } 95 for i, sx := range s { 96 // Ensure that the diagonal has exactly ones. 97 c.SetSym(i, i, 1) 98 for j := i + 1; j < r; j++ { 99 v := c.At(i, j) 100 c.SetSym(i, j, v*sx*s[j]) 101 } 102 } 103} 104 105// corrToCov converts a correlation matrix to a covariance matrix. 106// The input sigma should be vector of standard deviations corresponding 107// to the covariance. It will panic if len(sigma) is not equal to the 108// number of rows in the correlation matrix. 109func corrToCov(c *mat.SymDense, sigma []float64) { 110 r, _ := c.Dims() 111 112 if r != len(sigma) { 113 panic(mat.ErrShape) 114 } 115 for i, sx := range sigma { 116 // Ensure that the diagonal has exactly sigma squared. 117 c.SetSym(i, i, sx*sx) 118 for j := i + 1; j < r; j++ { 119 v := c.At(i, j) 120 c.SetSym(i, j, v*sx*sigma[j]) 121 } 122 } 123} 124 125// Mahalanobis computes the Mahalanobis distance 126// D = sqrt((x-y)ᵀ * Σ^-1 * (x-y)) 127// between the column vectors x and y given the cholesky decomposition of Σ. 128// Mahalanobis returns NaN if the linear solve fails. 129// 130// See https://en.wikipedia.org/wiki/Mahalanobis_distance for more information. 131func Mahalanobis(x, y mat.Vector, chol *mat.Cholesky) float64 { 132 var diff mat.VecDense 133 diff.SubVec(x, y) 134 var tmp mat.VecDense 135 err := chol.SolveVecTo(&tmp, &diff) 136 if err != nil { 137 return math.NaN() 138 } 139 return math.Sqrt(mat.Dot(&tmp, &diff)) 140} 141