1// Copyright ©2014 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package stat
6
7import (
8	"math"
9
10	"gonum.org/v1/gonum/floats"
11	"gonum.org/v1/gonum/mat"
12)
13
14// CovarianceMatrix calculates the covariance matrix (also known as the
15// variance-covariance matrix) calculated from a matrix of data, x, using
16// a two-pass algorithm. The result is stored in dst.
17//
18// If weights is not nil the weighted covariance of x is calculated. weights
19// must have length equal to the number of rows in input data matrix and
20// must not contain negative elements.
21// The dst matrix must either be empty or have the same number of
22// columns as the input data matrix.
23func CovarianceMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) {
24	// This is the matrix version of the two-pass algorithm. It doesn't use the
25	// additional floating point error correction that the Covariance function uses
26	// to reduce the impact of rounding during centering.
27
28	r, c := x.Dims()
29
30	if dst.IsEmpty() {
31		*dst = *(dst.GrowSym(c).(*mat.SymDense))
32	} else if n := dst.Symmetric(); n != c {
33		panic(mat.ErrShape)
34	}
35
36	var xt mat.Dense
37	xt.CloneFrom(x.T())
38	// Subtract the mean of each of the columns.
39	for i := 0; i < c; i++ {
40		v := xt.RawRowView(i)
41		// This will panic with ErrShape if len(weights) != len(v), so
42		// we don't have to check the size later.
43		mean := Mean(v, weights)
44		floats.AddConst(-mean, v)
45	}
46
47	if weights == nil {
48		// Calculate the normalization factor
49		// scaled by the sample size.
50		dst.SymOuterK(1/(float64(r)-1), &xt)
51		return
52	}
53
54	// Multiply by the sqrt of the weights, so that multiplication is symmetric.
55	sqrtwts := make([]float64, r)
56	for i, w := range weights {
57		if w < 0 {
58			panic("stat: negative covariance matrix weights")
59		}
60		sqrtwts[i] = math.Sqrt(w)
61	}
62	// Weight the rows.
63	for i := 0; i < c; i++ {
64		v := xt.RawRowView(i)
65		floats.Mul(v, sqrtwts)
66	}
67
68	// Calculate the normalization factor
69	// scaled by the weighted sample size.
70	dst.SymOuterK(1/(floats.Sum(weights)-1), &xt)
71}
72
73// CorrelationMatrix returns the correlation matrix calculated from a matrix
74// of data, x, using a two-pass algorithm. The result is stored in dst.
75//
76// If weights is not nil the weighted correlation of x is calculated. weights
77// must have length equal to the number of rows in input data matrix and
78// must not contain negative elements.
79// The dst matrix must either be empty or have the same number of
80// columns as the input data matrix.
81func CorrelationMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) {
82	// This will panic if the sizes don't match, or if weights is the wrong size.
83	CovarianceMatrix(dst, x, weights)
84	covToCorr(dst)
85}
86
87// covToCorr converts a covariance matrix to a correlation matrix.
88func covToCorr(c *mat.SymDense) {
89	r := c.Symmetric()
90
91	s := make([]float64, r)
92	for i := 0; i < r; i++ {
93		s[i] = 1 / math.Sqrt(c.At(i, i))
94	}
95	for i, sx := range s {
96		// Ensure that the diagonal has exactly ones.
97		c.SetSym(i, i, 1)
98		for j := i + 1; j < r; j++ {
99			v := c.At(i, j)
100			c.SetSym(i, j, v*sx*s[j])
101		}
102	}
103}
104
105// corrToCov converts a correlation matrix to a covariance matrix.
106// The input sigma should be vector of standard deviations corresponding
107// to the covariance.  It will panic if len(sigma) is not equal to the
108// number of rows in the correlation matrix.
109func corrToCov(c *mat.SymDense, sigma []float64) {
110	r, _ := c.Dims()
111
112	if r != len(sigma) {
113		panic(mat.ErrShape)
114	}
115	for i, sx := range sigma {
116		// Ensure that the diagonal has exactly sigma squared.
117		c.SetSym(i, i, sx*sx)
118		for j := i + 1; j < r; j++ {
119			v := c.At(i, j)
120			c.SetSym(i, j, v*sx*sigma[j])
121		}
122	}
123}
124
125// Mahalanobis computes the Mahalanobis distance
126//  D = sqrt((x-y)ᵀ * Σ^-1 * (x-y))
127// between the column vectors x and y given the cholesky decomposition of Σ.
128// Mahalanobis returns NaN if the linear solve fails.
129//
130// See https://en.wikipedia.org/wiki/Mahalanobis_distance for more information.
131func Mahalanobis(x, y mat.Vector, chol *mat.Cholesky) float64 {
132	var diff mat.VecDense
133	diff.SubVec(x, y)
134	var tmp mat.VecDense
135	err := chol.SolveVecTo(&tmp, &diff)
136	if err != nil {
137		return math.NaN()
138	}
139	return math.Sqrt(mat.Dot(&tmp, &diff))
140}
141