1// Copyright ©2016 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package stat
6
7import (
8	"math"
9	"testing"
10
11	"gonum.org/v1/gonum/floats/scalar"
12	"gonum.org/v1/gonum/mat"
13)
14
15func TestPrincipalComponents(t *testing.T) {
16	// Threshold for detecting zero variances.
17	const epsilon = 1e-15
18tests:
19	for i, test := range []struct {
20		data     mat.Matrix
21		weights  []float64
22		wantVecs *mat.Dense
23		wantVars []float64
24		epsilon  float64
25	}{
26		// Test results verified using R.
27		{
28			data: mat.NewDense(3, 3, []float64{
29				1, 2, 3,
30				4, 5, 6,
31				7, 8, 9,
32			}),
33			wantVecs: mat.NewDense(3, 3, []float64{
34				0.5773502691896258, 0.8164965809277261, 0,
35				0.577350269189626, -0.4082482904638632, -0.7071067811865476,
36				0.5773502691896258, -0.4082482904638631, 0.7071067811865475,
37			}),
38			wantVars: []float64{27, 0, 0},
39			epsilon:  1e-12,
40		},
41		{ // Truncated iris data.
42			data: mat.NewDense(10, 4, []float64{
43				5.1, 3.5, 1.4, 0.2,
44				4.9, 3.0, 1.4, 0.2,
45				4.7, 3.2, 1.3, 0.2,
46				4.6, 3.1, 1.5, 0.2,
47				5.0, 3.6, 1.4, 0.2,
48				5.4, 3.9, 1.7, 0.4,
49				4.6, 3.4, 1.4, 0.3,
50				5.0, 3.4, 1.5, 0.2,
51				4.4, 2.9, 1.4, 0.2,
52				4.9, 3.1, 1.5, 0.1,
53			}),
54			wantVecs: mat.NewDense(4, 4, []float64{
55				-0.6681110197952722, 0.7064764857539533, -0.14026590216895132, -0.18666578956412125,
56				-0.7166344774801547, -0.6427036135482664, -0.135650285905254, 0.23444848208629923,
57				-0.164411275166307, 0.11898477441068218, 0.9136367900709548, 0.35224901970831746,
58				-0.11415613655453069, -0.2714141920887426, 0.35664028439226514, -0.8866286823515034,
59			}),
60			wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
61			epsilon:  1e-12,
62		},
63		{ // Truncated iris data to form wide matrix.
64			data: mat.NewDense(3, 4, []float64{
65				5.1, 3.5, 1.4, 0.2,
66				4.9, 3.0, 1.4, 0.2,
67				4.7, 3.2, 1.3, 0.2,
68			}),
69			wantVecs: mat.NewDense(4, 3, []float64{
70				-0.5705187254552365, -0.7505979435049239, 0.08084520834544455,
71				-0.8166537769529318, 0.5615147645527523, -0.032338083338177705,
72				-0.08709186238359454, -0.3482870890450082, -0.22636658336724505,
73				0, 0, -0.9701425001453315,
74			}),
75			wantVars: []float64{0.0844692361537822, 0.022197430512884326, 0},
76			epsilon:  1e-12,
77		},
78		{ // Truncated iris data transposed to check for operation on fat input.
79			data: mat.NewDense(10, 4, []float64{
80				5.1, 3.5, 1.4, 0.2,
81				4.9, 3.0, 1.4, 0.2,
82				4.7, 3.2, 1.3, 0.2,
83				4.6, 3.1, 1.5, 0.2,
84				5.0, 3.6, 1.4, 0.2,
85				5.4, 3.9, 1.7, 0.4,
86				4.6, 3.4, 1.4, 0.3,
87				5.0, 3.4, 1.5, 0.2,
88				4.4, 2.9, 1.4, 0.2,
89				4.9, 3.1, 1.5, 0.1,
90			}).T(),
91			wantVecs: mat.NewDense(10, 4, []float64{
92				-0.3366602459946619, -0.1373634006401213, 0.3465102523547623, -0.10290179303893479,
93				-0.31381852053861975, 0.5197145790632827, 0.5567296129086686, -0.15923062170153618,
94				-0.30857197637565165, -0.07670930360819002, 0.36159923003337235, 0.3342301027853355,
95				-0.29527124351656137, 0.16885455995353074, -0.5056204762881208, 0.32580913261444344,
96				-0.3327611073694004, -0.39365834489416474, 0.04900050959307464, 0.46812879383236555,
97				-0.34445484362044815, -0.2985206914561878, -0.1009714701361799, -0.16803618186050803,
98				-0.2986246350957691, -0.4222037823717799, -0.11838613462182519, -0.580283530375069,
99				-0.325911246223126, 0.024366468758217238, -0.12082035131864265, 0.16756027181337868,
100				-0.2814284432361538, 0.240812316260054, -0.24061437569068145, -0.365034616264623,
101				-0.31906138507685167, 0.4423912824105986, -0.2906412122303604, 0.027551046870337714,
102			}),
103			wantVars: []float64{41.8851906634233, 0.07762619213464989, 0.010516477775373585, 0},
104			epsilon:  1e-12,
105		},
106		{ // Truncated iris data unitary weights.
107			data: mat.NewDense(10, 4, []float64{
108				5.1, 3.5, 1.4, 0.2,
109				4.9, 3.0, 1.4, 0.2,
110				4.7, 3.2, 1.3, 0.2,
111				4.6, 3.1, 1.5, 0.2,
112				5.0, 3.6, 1.4, 0.2,
113				5.4, 3.9, 1.7, 0.4,
114				4.6, 3.4, 1.4, 0.3,
115				5.0, 3.4, 1.5, 0.2,
116				4.4, 2.9, 1.4, 0.2,
117				4.9, 3.1, 1.5, 0.1,
118			}),
119			weights: []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
120			wantVecs: mat.NewDense(4, 4, []float64{
121				-0.6681110197952722, 0.7064764857539533, -0.14026590216895132, -0.18666578956412125,
122				-0.7166344774801547, -0.6427036135482664, -0.135650285905254, 0.23444848208629923,
123				-0.164411275166307, 0.11898477441068218, 0.9136367900709548, 0.35224901970831746,
124				-0.11415613655453069, -0.2714141920887426, 0.35664028439226514, -0.8866286823515034,
125			}),
126			wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
127			epsilon:  1e-12,
128		},
129		{ // Truncated iris data non-unitary weights.
130			data: mat.NewDense(10, 4, []float64{
131				5.1, 3.5, 1.4, 0.2,
132				4.9, 3.0, 1.4, 0.2,
133				4.7, 3.2, 1.3, 0.2,
134				4.6, 3.1, 1.5, 0.2,
135				5.0, 3.6, 1.4, 0.2,
136				5.4, 3.9, 1.7, 0.4,
137				4.6, 3.4, 1.4, 0.3,
138				5.0, 3.4, 1.5, 0.2,
139				4.4, 2.9, 1.4, 0.2,
140				4.9, 3.1, 1.5, 0.1,
141			}),
142			weights: []float64{2, 3, 1, 1, 1, 1, 1, 1, 1, 2},
143			wantVecs: mat.NewDense(4, 4, []float64{
144				-0.618936145422414, 0.763069301531647, 0.124857741232537, 0.138035623677211,
145				-0.763958271606519, -0.603881770702898, 0.118267155321333, -0.194184052457746,
146				-0.143552119754944, 0.090014599564871, -0.942209377020044, -0.289018426115945,
147				-0.112599271966947, -0.212012782487076, -0.287515067921680, 0.927203898682805,
148			}),
149			wantVars: []float64{0.129621985550623, 0.022417487771598, 0.006454461065715, 0.002495076601075},
150			epsilon:  1e-12,
151		},
152	} {
153		var pc PC
154		vecs := &mat.Dense{}
155		var vars []float64
156		for j := 0; j < 2; j++ {
157			ok := pc.PrincipalComponents(test.data, test.weights)
158			pc.VectorsTo(vecs)
159			vars = pc.VarsTo(vars)
160			if !ok {
161				t.Errorf("unexpected SVD failure for test %d use %d", i, j)
162				continue tests
163			}
164
165			// Find the number of non-zero variances to handle
166			// non-uniqueness in SVD result (issue #21).
167			nnz := len(vars)
168			for k, v := range vars {
169				if math.Abs(v) < epsilon {
170					nnz = k
171					break
172				}
173			}
174			r, c := vecs.Dims()
175			if !mat.EqualApprox(vecs.Slice(0, r, 0, nnz), test.wantVecs.Slice(0, r, 0, nnz), test.epsilon) {
176				t.Errorf("%d use %d: unexpected PCA result got:\n%v\nwant:\n%v",
177					i, j, mat.Formatted(vecs), mat.Formatted(test.wantVecs))
178			}
179			if !approxEqual(vars, test.wantVars, test.epsilon) {
180				t.Errorf("%d use %d: unexpected variance result got:%v, want:%v",
181					i, j, vars, test.wantVars)
182			}
183
184			// Check that the set of principal vectors is
185			// orthonormal by comparing Vᵀ*V to the identity matrix.
186			I := mat.NewDiagDense(c, nil)
187			for k := 0; k < c; k++ {
188				I.SetDiag(k, 1)
189			}
190			var vv mat.Dense
191			vv.Mul(vecs.T(), vecs)
192			if !mat.EqualApprox(&vv, I, test.epsilon) {
193				t.Errorf("%d use %d: vectors not orthonormal\n%v", i, j, mat.Formatted(I))
194			}
195		}
196	}
197}
198
199func approxEqual(a, b []float64, epsilon float64) bool {
200	if len(a) != len(b) {
201		return false
202	}
203	for i, v := range a {
204		if !scalar.EqualWithinAbsOrRel(v, b[i], epsilon, epsilon) {
205			return false
206		}
207	}
208	return true
209}
210