1// Copyright ©2014 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package stat
6
7import (
8	"math"
9	"testing"
10
11	"golang.org/x/exp/rand"
12
13	"gonum.org/v1/gonum/floats"
14	"gonum.org/v1/gonum/mat"
15)
16
17func TestCovarianceMatrix(t *testing.T) {
18	const tol = 1e-15
19
20	// An alternative way to test this is to call the Variance and
21	// Covariance functions and ensure that the results are identical.
22	for i, test := range []struct {
23		data    *mat.Dense
24		weights []float64
25		ans     *mat.Dense
26	}{
27		{
28			data: mat.NewDense(5, 2, []float64{
29				-2, -4,
30				-1, 2,
31				0, 0,
32				1, -2,
33				2, 4,
34			}),
35			weights: nil,
36			ans: mat.NewDense(2, 2, []float64{
37				2.5, 3,
38				3, 10,
39			}),
40		}, {
41			data: mat.NewDense(3, 2, []float64{
42				1, 1,
43				2, 4,
44				3, 9,
45			}),
46			weights: []float64{
47				1,
48				1.5,
49				1,
50			},
51			ans: mat.NewDense(2, 2, []float64{
52				0.8, 3.2,
53				3.2, 13.142857142857146,
54			}),
55		},
56	} {
57		// Make a copy of the data to check that it isn't changing.
58		r := test.data.RawMatrix()
59		d := make([]float64, len(r.Data))
60		copy(d, r.Data)
61
62		w := make([]float64, len(test.weights))
63		if test.weights != nil {
64			copy(w, test.weights)
65		}
66
67		var cov mat.SymDense
68		CovarianceMatrix(&cov, test.data, test.weights)
69		if !mat.EqualApprox(&cov, test.ans, tol) {
70			t.Errorf("%d: expected cov %v, found %v", i, test.ans, &cov)
71		}
72		if !floats.Equal(d, r.Data) {
73			t.Errorf("%d: data was modified during execution", i)
74		}
75		if !floats.Equal(w, test.weights) {
76			t.Errorf("%d: weights was modified during execution", i)
77		}
78
79		// compare with call to Covariance
80		_, cols := cov.Dims()
81		for ci := 0; ci < cols; ci++ {
82			for cj := 0; cj < cols; cj++ {
83				x := mat.Col(nil, ci, test.data)
84				y := mat.Col(nil, cj, test.data)
85				wc := Covariance(x, y, test.weights)
86				if math.Abs(wc-cov.At(ci, cj)) > 1e-14 {
87					t.Errorf("CovMat does not match at (%v, %v). Want %v, got %v.", ci, cj, wc, cov.At(ci, cj))
88				}
89			}
90		}
91	}
92
93	if !panics(func() { CovarianceMatrix(nil, mat.NewDense(5, 2, nil), []float64{}) }) {
94		t.Errorf("CovarianceMatrix did not panic with weight size mismatch")
95	}
96	if !panics(func() { CovarianceMatrix(mat.NewSymDense(1, nil), mat.NewDense(5, 2, nil), nil) }) {
97		t.Errorf("CovarianceMatrix did not panic with preallocation size mismatch")
98	}
99	if !panics(func() { CovarianceMatrix(nil, mat.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) {
100		t.Errorf("CovarianceMatrix did not panic with negative weights")
101	}
102	if panics(func() {
103		dst := mat.NewSymDense(4, nil)
104		dst.Reset()
105		CovarianceMatrix(dst, mat.NewDense(2, 2, []float64{1, 2, 3, 4}), nil)
106	}) {
107		t.Errorf("CovarianceMatrix panics with reset destination")
108	}
109}
110
111func TestCorrelationMatrix(t *testing.T) {
112	for i, test := range []struct {
113		data    *mat.Dense
114		weights []float64
115		ans     *mat.Dense
116	}{
117		{
118			data: mat.NewDense(3, 3, []float64{
119				1, 2, 3,
120				3, 4, 5,
121				5, 6, 7,
122			}),
123			weights: nil,
124			ans: mat.NewDense(3, 3, []float64{
125				1, 1, 1,
126				1, 1, 1,
127				1, 1, 1,
128			}),
129		},
130		{
131			data: mat.NewDense(5, 2, []float64{
132				-2, -4,
133				-1, 2,
134				0, 0,
135				1, -2,
136				2, 4,
137			}),
138			weights: nil,
139			ans: mat.NewDense(2, 2, []float64{
140				1, 0.6,
141				0.6, 1,
142			}),
143		}, {
144			data: mat.NewDense(3, 2, []float64{
145				1, 1,
146				2, 4,
147				3, 9,
148			}),
149			weights: []float64{
150				1,
151				1.5,
152				1,
153			},
154			ans: mat.NewDense(2, 2, []float64{
155				1, 0.9868703275903379,
156				0.9868703275903379, 1,
157			}),
158		},
159	} {
160		// Make a copy of the data to check that it isn't changing.
161		r := test.data.RawMatrix()
162		d := make([]float64, len(r.Data))
163		copy(d, r.Data)
164
165		w := make([]float64, len(test.weights))
166		if test.weights != nil {
167			copy(w, test.weights)
168		}
169
170		var corr mat.SymDense
171		CorrelationMatrix(&corr, test.data, test.weights)
172		if !mat.Equal(&corr, test.ans) {
173			t.Errorf("%d: expected corr %v, found %v", i, test.ans, &corr)
174		}
175		if !floats.Equal(d, r.Data) {
176			t.Errorf("%d: data was modified during execution", i)
177		}
178		if !floats.Equal(w, test.weights) {
179			t.Errorf("%d: weights was modified during execution", i)
180		}
181
182		// compare with call to Covariance
183		_, cols := corr.Dims()
184		for ci := 0; ci < cols; ci++ {
185			for cj := 0; cj < cols; cj++ {
186				x := mat.Col(nil, ci, test.data)
187				y := mat.Col(nil, cj, test.data)
188				wc := Correlation(x, y, test.weights)
189				if math.Abs(wc-corr.At(ci, cj)) > 1e-14 {
190					t.Errorf("CorrMat does not match at (%v, %v). Want %v, got %v.", ci, cj, wc, corr.At(ci, cj))
191				}
192			}
193		}
194
195	}
196	if !panics(func() { CorrelationMatrix(nil, mat.NewDense(5, 2, nil), []float64{}) }) {
197		t.Errorf("CorrelationMatrix did not panic with weight size mismatch")
198	}
199	if !panics(func() { CorrelationMatrix(mat.NewSymDense(1, nil), mat.NewDense(5, 2, nil), nil) }) {
200		t.Errorf("CorrelationMatrix did not panic with preallocation size mismatch")
201	}
202	if !panics(func() { CorrelationMatrix(nil, mat.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) {
203		t.Errorf("CorrelationMatrix did not panic with negative weights")
204	}
205}
206
207func TestCorrCov(t *testing.T) {
208	// test both Cov2Corr and Cov2Corr
209	for i, test := range []struct {
210		data    *mat.Dense
211		weights []float64
212	}{
213		{
214			data: mat.NewDense(3, 3, []float64{
215				1, 2, 3,
216				3, 4, 5,
217				5, 6, 7,
218			}),
219			weights: nil,
220		},
221		{
222			data: mat.NewDense(5, 2, []float64{
223				-2, -4,
224				-1, 2,
225				0, 0,
226				1, -2,
227				2, 4,
228			}),
229			weights: nil,
230		}, {
231			data: mat.NewDense(3, 2, []float64{
232				1, 1,
233				2, 4,
234				3, 9,
235			}),
236			weights: []float64{
237				1,
238				1.5,
239				1,
240			},
241		},
242	} {
243		var corr, cov mat.SymDense
244		CorrelationMatrix(&corr, test.data, test.weights)
245		CovarianceMatrix(&cov, test.data, test.weights)
246
247		r := cov.Symmetric()
248
249		// Get the diagonal elements from cov to determine the sigmas.
250		sigmas := make([]float64, r)
251		for i := range sigmas {
252			sigmas[i] = math.Sqrt(cov.At(i, i))
253		}
254
255		covFromCorr := mat.NewSymDense(corr.Symmetric(), nil)
256		covFromCorr.CopySym(&corr)
257		corrToCov(covFromCorr, sigmas)
258
259		corrFromCov := mat.NewSymDense(cov.Symmetric(), nil)
260		corrFromCov.CopySym(&cov)
261		covToCorr(corrFromCov)
262
263		if !mat.EqualApprox(&corr, corrFromCov, 1e-14) {
264			t.Errorf("%d: corrToCov did not match direct Correlation calculation.  Want: %v, got: %v. ", i, corr, corrFromCov)
265		}
266		if !mat.EqualApprox(&cov, covFromCorr, 1e-14) {
267			t.Errorf("%d: covToCorr did not match direct Covariance calculation.  Want: %v, got: %v. ", i, cov, covFromCorr)
268		}
269
270		if !panics(func() { corrToCov(mat.NewSymDense(2, nil), []float64{}) }) {
271			t.Errorf("CorrelationMatrix did not panic with sigma size mismatch")
272		}
273	}
274}
275
276func TestMahalanobis(t *testing.T) {
277	// Comparison with scipy.
278	for cas, test := range []struct {
279		x, y  *mat.VecDense
280		Sigma *mat.SymDense
281		ans   float64
282	}{
283		{
284			x: mat.NewVecDense(3, []float64{1, 2, 3}),
285			y: mat.NewVecDense(3, []float64{0.8, 1.1, -1}),
286			Sigma: mat.NewSymDense(3,
287				[]float64{
288					0.8, 0.3, 0.1,
289					0.3, 0.7, -0.1,
290					0.1, -0.1, 7}),
291			ans: 1.9251757377680914,
292		},
293	} {
294		var chol mat.Cholesky
295		ok := chol.Factorize(test.Sigma)
296		if !ok {
297			panic("bad test")
298		}
299		ans := Mahalanobis(test.x, test.y, &chol)
300		if math.Abs(ans-test.ans) > 1e-14 {
301			t.Errorf("Cas %d: got %v, want %v", cas, ans, test.ans)
302		}
303	}
304}
305
306// benchmarks
307
308func randMat(r, c int) mat.Matrix {
309	x := make([]float64, r*c)
310	for i := range x {
311		x[i] = rand.Float64()
312	}
313	return mat.NewDense(r, c, x)
314}
315
316func benchmarkCovarianceMatrix(b *testing.B, m mat.Matrix) {
317	var res mat.SymDense
318	b.ResetTimer()
319	for i := 0; i < b.N; i++ {
320		res.Reset()
321		CovarianceMatrix(&res, m, nil)
322	}
323}
324func benchmarkCovarianceMatrixWeighted(b *testing.B, m mat.Matrix) {
325	r, _ := m.Dims()
326	wts := make([]float64, r)
327	for i := range wts {
328		wts[i] = 0.5
329	}
330	var res mat.SymDense
331	b.ResetTimer()
332	for i := 0; i < b.N; i++ {
333		res.Reset()
334		CovarianceMatrix(&res, m, wts)
335	}
336}
337func benchmarkCovarianceMatrixInPlace(b *testing.B, m mat.Matrix) {
338	_, c := m.Dims()
339	res := mat.NewSymDense(c, nil)
340	b.ResetTimer()
341	for i := 0; i < b.N; i++ {
342		CovarianceMatrix(res, m, nil)
343	}
344}
345
346func BenchmarkCovarianceMatrixSmallxSmall(b *testing.B) {
347	// 10 * 10 elements
348	x := randMat(small, small)
349	benchmarkCovarianceMatrix(b, x)
350}
351func BenchmarkCovarianceMatrixSmallxMedium(b *testing.B) {
352	// 10 * 1000 elements
353	x := randMat(small, medium)
354	benchmarkCovarianceMatrix(b, x)
355}
356
357func BenchmarkCovarianceMatrixMediumxSmall(b *testing.B) {
358	// 1000 * 10 elements
359	x := randMat(medium, small)
360	benchmarkCovarianceMatrix(b, x)
361}
362func BenchmarkCovarianceMatrixMediumxMedium(b *testing.B) {
363	// 1000 * 1000 elements
364	x := randMat(medium, medium)
365	benchmarkCovarianceMatrix(b, x)
366}
367
368func BenchmarkCovarianceMatrixLargexSmall(b *testing.B) {
369	// 1e5 * 10 elements
370	x := randMat(large, small)
371	benchmarkCovarianceMatrix(b, x)
372}
373
374func BenchmarkCovarianceMatrixHugexSmall(b *testing.B) {
375	// 1e7 * 10 elements
376	x := randMat(huge, small)
377	benchmarkCovarianceMatrix(b, x)
378}
379
380func BenchmarkCovarianceMatrixSmallxSmallWeighted(b *testing.B) {
381	// 10 * 10 elements
382	x := randMat(small, small)
383	benchmarkCovarianceMatrixWeighted(b, x)
384}
385func BenchmarkCovarianceMatrixSmallxMediumWeighted(b *testing.B) {
386	// 10 * 1000 elements
387	x := randMat(small, medium)
388	benchmarkCovarianceMatrixWeighted(b, x)
389}
390
391func BenchmarkCovarianceMatrixMediumxSmallWeighted(b *testing.B) {
392	// 1000 * 10 elements
393	x := randMat(medium, small)
394	benchmarkCovarianceMatrixWeighted(b, x)
395}
396func BenchmarkCovarianceMatrixMediumxMediumWeighted(b *testing.B) {
397	// 1000 * 1000 elements
398	x := randMat(medium, medium)
399	benchmarkCovarianceMatrixWeighted(b, x)
400}
401
402func BenchmarkCovarianceMatrixLargexSmallWeighted(b *testing.B) {
403	// 1e5 * 10 elements
404	x := randMat(large, small)
405	benchmarkCovarianceMatrixWeighted(b, x)
406}
407
408func BenchmarkCovarianceMatrixHugexSmallWeighted(b *testing.B) {
409	// 1e7 * 10 elements
410	x := randMat(huge, small)
411	benchmarkCovarianceMatrixWeighted(b, x)
412}
413
414func BenchmarkCovarianceMatrixSmallxSmallInPlace(b *testing.B) {
415	// 10 * 10 elements
416	x := randMat(small, small)
417	benchmarkCovarianceMatrixInPlace(b, x)
418}
419func BenchmarkCovarianceMatrixSmallxMediumInPlace(b *testing.B) {
420	// 10 * 1000 elements
421	x := randMat(small, medium)
422	benchmarkCovarianceMatrixInPlace(b, x)
423}
424
425func BenchmarkCovarianceMatrixMediumxSmallInPlace(b *testing.B) {
426	// 1000 * 10 elements
427	x := randMat(medium, small)
428	benchmarkCovarianceMatrixInPlace(b, x)
429}
430func BenchmarkCovarianceMatrixMediumxMediumInPlace(b *testing.B) {
431	// 1000 * 1000 elements
432	x := randMat(medium, medium)
433	benchmarkCovarianceMatrixInPlace(b, x)
434}
435
436func BenchmarkCovarianceMatrixLargexSmallInPlace(b *testing.B) {
437	// 1e5 * 10 elements
438	x := randMat(large, small)
439	benchmarkCovarianceMatrixInPlace(b, x)
440}
441
442func BenchmarkCovarianceMatrixHugexSmallInPlace(b *testing.B) {
443	// 1e7 * 10 elements
444	x := randMat(huge, small)
445	benchmarkCovarianceMatrixInPlace(b, x)
446}
447
448func BenchmarkCovToCorr(b *testing.B) {
449	// generate a 10x10 covariance matrix
450	m := randMat(small, small)
451	var cov mat.SymDense
452	CovarianceMatrix(&cov, m, nil)
453	cc := mat.NewSymDense(cov.Symmetric(), nil)
454	b.ResetTimer()
455	for i := 0; i < b.N; i++ {
456		b.StopTimer()
457		cc.CopySym(&cov)
458		b.StartTimer()
459		covToCorr(cc)
460	}
461}
462
463func BenchmarkCorrToCov(b *testing.B) {
464	// generate a 10x10 correlation matrix
465	m := randMat(small, small)
466	var corr mat.SymDense
467	CorrelationMatrix(&corr, m, nil)
468	cc := mat.NewSymDense(corr.Symmetric(), nil)
469	sigma := make([]float64, small)
470	for i := range sigma {
471		sigma[i] = 2
472	}
473	b.ResetTimer()
474	for i := 0; i < b.N; i++ {
475		b.StopTimer()
476		cc.CopySym(&corr)
477		b.StartTimer()
478		corrToCov(cc, sigma)
479	}
480}
481