1 /*
2 * Copyright 2008 Sandia Corporation.
3 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
4 * license for use of this work by or on behalf of the
5 * U.S. Government. Redistribution and use in source and binary forms, with
6 * or without modification, are permitted provided that this Notice and any
7 * statement of authorship are reproduced on all copies.
8 */
9 // .SECTION Thanks
10 // Thanks to Philippe Pebay and David Thompson from Sandia National Laboratories
11 // for implementing this test.
12 // Test added for Robust PCA by Tristan Coulange, Kitware SAS 2013
13
14 #include "vtkDoubleArray.h"
15 #include "vtkMultiBlockDataSet.h"
16 #include "vtkNew.h"
17 #include "vtkOrderStatistics.h"
18 #include "vtkPCAStatistics.h"
19 #include "vtkSmartPointer.h"
20 #include "vtkStringArray.h"
21 #include "vtkTable.h"
22 #include "vtkTestUtilities.h"
23
24 #include "vtksys/SystemTools.hxx"
25
26 // Perform a fuzzy compare of floats/doubles
27 template <class A>
fuzzyCompare(A a,A b)28 bool fuzzyCompare(A a, A b)
29 {
30 // return fabs(a - b) < std::numeric_limits<A>::epsilon();
31 return fabs(a - b) < .0001;
32 }
33
34 int TestPCA(int argc, char* argv[]);
35 int TestPCARobust(int argc, char* argv[]);
36 int TestPCAPart(int argc, char* argv[], bool RobustPCA);
37 int TestPCARobust2();
38 int TestEigen();
39
40 //=============================================================================
TestPCAStatistics(int argc,char * argv[])41 int TestPCAStatistics(int argc, char* argv[])
42 {
43 int result = EXIT_SUCCESS;
44
45 result |= TestPCA(argc, argv);
46 result |= TestPCARobust(argc, argv);
47 result |= TestPCARobust2();
48 result |= TestEigen();
49
50 if (result == EXIT_FAILURE)
51 {
52 cout << "FAILURE" << endl;
53 }
54 else
55 {
56 cout << "SUCCESS" << endl;
57 }
58
59 return result;
60 }
61
62 //=============================================================================
TestPCA(int argc,char * argv[])63 int TestPCA(int argc, char* argv[])
64 {
65 return TestPCAPart(argc, argv, false);
66 }
67
68 //=============================================================================
TestPCARobust(int argc,char * argv[])69 int TestPCARobust(int argc, char* argv[])
70 {
71 return TestPCAPart(argc, argv, true);
72 }
73
74 //=============================================================================
TestPCARobust2()75 int TestPCARobust2()
76 {
77 const int nVals = 7;
78 double mingledData[] = {
79 0., 1., //
80 1., 1., //
81 2., 1., //
82 3., 1., //
83 4., 1., //
84 5., 1., //
85 10., 10. //
86 };
87
88 const char m0Name[] = "M0";
89 vtkNew<vtkDoubleArray> dataset1Arr;
90 dataset1Arr->SetNumberOfComponents(1);
91 dataset1Arr->SetName(m0Name);
92
93 const char m1Name[] = "M1";
94 vtkNew<vtkDoubleArray> dataset2Arr;
95 dataset2Arr->SetNumberOfComponents(1);
96 dataset2Arr->SetName(m1Name);
97
98 for (int i = 0; i < nVals; ++i)
99 {
100 dataset1Arr->InsertNextValue(mingledData[i * 2]);
101 dataset2Arr->InsertNextValue(mingledData[i * 2 + 1]);
102 }
103
104 vtkNew<vtkTable> datasetTable;
105 datasetTable->AddColumn(dataset1Arr);
106 datasetTable->AddColumn(dataset2Arr);
107
108 // Set PCA statistics algorithm and its input data port
109 vtkNew<vtkPCAStatistics> pcas;
110
111 // Prepare first test with data
112 pcas->SetInputData(vtkStatisticsAlgorithm::INPUT_DATA, datasetTable);
113 pcas->MedianAbsoluteDeviationOn();
114
115 // -- Select Column Pairs of Interest ( Learn Mode ) --
116 pcas->SetColumnStatus(m0Name, 1);
117 pcas->SetColumnStatus(m1Name, 1);
118
119 // Test all options but Assess
120 pcas->SetLearnOption(true);
121 pcas->SetDeriveOption(true);
122 pcas->SetTestOption(true);
123 pcas->SetAssessOption(true);
124 pcas->Update();
125
126 vtkTable* outputData = pcas->GetOutput();
127
128 double res[] = { -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0 };
129
130 for (vtkIdType j = 0; j < 2; j++)
131 {
132 for (vtkIdType i = 0; i < 7; i++)
133 {
134 if (outputData->GetValue(i, j + 2) != res[j * outputData->GetNumberOfRows() + i])
135 {
136 return EXIT_FAILURE;
137 }
138 }
139 }
140
141 return EXIT_SUCCESS;
142 }
143
144 //=============================================================================
TestPCAPart(int argc,char * argv[],bool robustPCA)145 int TestPCAPart(int argc, char* argv[], bool robustPCA)
146 {
147 char* normScheme = vtkTestUtilities::GetArgOrEnvOrDefault(
148 "-normalize-covariance", argc, argv, "VTK_NORMALIZE_COVARIANCE", "None");
149 int testStatus = 0;
150
151 /* */
152 double mingledData[] = {
153 46, 45, //
154 47, 49, //
155 46, 47, //
156 46, 46, //
157 47, 46, //
158 47, 49, //
159 49, 49, //
160 47, 45, //
161 50, 50, //
162 46, 46, //
163 51, 50, //
164 48, 48, //
165 52, 54, //
166 48, 47, //
167 52, 52, //
168 49, 49, //
169 53, 54, //
170 50, 50, //
171 53, 54, //
172 50, 52, //
173 53, 53, //
174 50, 51, //
175 54, 54, //
176 49, 49, //
177 52, 52, //
178 50, 51, //
179 52, 52, //
180 49, 47, //
181 48, 48, //
182 48, 50, //
183 46, 48, //
184 47, 47 //
185 };
186 int nVals = 32;
187
188 const char m0Name[] = "M0";
189 vtkDoubleArray* dataset1Arr = vtkDoubleArray::New();
190 dataset1Arr->SetNumberOfComponents(1);
191 dataset1Arr->SetName(m0Name);
192
193 const char m1Name[] = "M1";
194 vtkDoubleArray* dataset2Arr = vtkDoubleArray::New();
195 dataset2Arr->SetNumberOfComponents(1);
196 dataset2Arr->SetName(m1Name);
197
198 const char m2Name[] = "M2";
199 vtkDoubleArray* dataset3Arr = vtkDoubleArray::New();
200 dataset3Arr->SetNumberOfComponents(1);
201 dataset3Arr->SetName(m2Name);
202
203 for (int i = 0; i < nVals; ++i)
204 {
205 int ti = i << 1;
206 dataset1Arr->InsertNextValue(mingledData[ti]);
207 dataset2Arr->InsertNextValue(mingledData[ti + 1]);
208 dataset3Arr->InsertNextValue(i != 12 ? -1. : -1.001);
209 }
210
211 vtkTable* datasetTable = vtkTable::New();
212 datasetTable->AddColumn(dataset1Arr);
213 dataset1Arr->Delete();
214 datasetTable->AddColumn(dataset2Arr);
215 dataset2Arr->Delete();
216 datasetTable->AddColumn(dataset3Arr);
217 dataset3Arr->Delete();
218
219 // Set PCA statistics algorithm and its input data port
220 vtkPCAStatistics* pcas = vtkPCAStatistics::New();
221 pcas->SetMedianAbsoluteDeviation(robustPCA);
222
223 // First verify that absence of input does not cause trouble
224 cout << "## Verifying that absence of input does not cause trouble... ";
225 pcas->Update();
226 cout << "done.\n";
227
228 // Prepare first test with data
229 pcas->SetInputData(vtkStatisticsAlgorithm::INPUT_DATA, datasetTable);
230 pcas->SetNormalizationSchemeByName(normScheme);
231 pcas->SetBasisSchemeByName("FixedBasisEnergy");
232 pcas->SetFixedBasisEnergy(1. - 1e-8);
233
234 datasetTable->Delete();
235
236 // -- Select Column Pairs of Interest ( Learn Mode ) --
237 pcas->SetColumnStatus(m0Name, 1);
238 pcas->SetColumnStatus(m1Name, 1);
239 pcas->RequestSelectedColumns();
240 pcas->ResetAllColumnStates();
241 pcas->SetColumnStatus(m0Name, 1);
242 pcas->SetColumnStatus(m1Name, 1);
243 pcas->SetColumnStatus(m2Name, 1);
244 pcas->SetColumnStatus(m2Name, 0);
245 pcas->SetColumnStatus(m2Name, 1);
246 pcas->RequestSelectedColumns();
247 pcas->RequestSelectedColumns(); // Try a duplicate entry. This should have no effect.
248 pcas->SetColumnStatus(m0Name, 0);
249 pcas->SetColumnStatus(m2Name, 0);
250 pcas->SetColumnStatus("Metric 3",
251 1); // An invalid name. This should result in a request for metric 1's self-correlation.
252 // pcas->RequestSelectedColumns(); will get called in RequestData()
253
254 // Test all options but Assess
255 pcas->SetLearnOption(true);
256 pcas->SetDeriveOption(true);
257 pcas->SetTestOption(true);
258 pcas->SetAssessOption(false);
259 pcas->Update();
260
261 vtkMultiBlockDataSet* outputMetaDS = vtkMultiBlockDataSet::SafeDownCast(
262 pcas->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
263 vtkTable* outputTest = pcas->GetOutput(vtkStatisticsAlgorithm::OUTPUT_TEST);
264
265 cout << "## Calculated the following statistics for data set:\n";
266 for (unsigned int b = 0; b < outputMetaDS->GetNumberOfBlocks(); ++b)
267 {
268 vtkTable* outputMeta = vtkTable::SafeDownCast(outputMetaDS->GetBlock(b));
269
270 if (b == 0)
271 {
272 cout << "Primary Statistics\n";
273 }
274 else
275 {
276 cout << "Derived Statistics " << (b - 1) << "\n";
277 }
278
279 outputMeta->Dump();
280 }
281
282 // Check some results of the Test option
283 cout << "\n## Calculated the following Jarque-Bera-Srivastava statistics for pseudo-random "
284 "variables (n="
285 << nVals;
286
287 #ifdef USE_GNU_R
288 int nNonGaussian = 1;
289 int nRejected = 0;
290 double alpha = .01;
291
292 cout << ", null hypothesis: binormality, significance level=" << alpha;
293 #endif // USE_GNU_R
294
295 cout << "):\n";
296
297 // Loop over Test table
298 for (vtkIdType r = 0; r < outputTest->GetNumberOfRows(); ++r)
299 {
300 cout << " ";
301 for (int i = 0; i < outputTest->GetNumberOfColumns(); ++i)
302 {
303 cout << outputTest->GetColumnName(i) << "=" << outputTest->GetValue(r, i).ToString() << " ";
304 }
305
306 #ifdef USE_GNU_R
307 // Check if null hypothesis is rejected at specified significance level
308 double p = outputTest->GetValueByName(r, "P").ToDouble();
309 // Must verify that p value is valid (it is set to -1 if R has failed)
310 if (p > -1 && p < alpha)
311 {
312 cout << "N.H. rejected";
313
314 ++nRejected;
315 }
316 #endif // USE_GNU_R
317
318 cout << "\n";
319 }
320
321 #ifdef USE_GNU_R
322 if (nRejected < nNonGaussian)
323 {
324 vtkGenericWarningMacro("Rejected only "
325 << nRejected << " null hypotheses of binormality whereas " << nNonGaussian
326 << " variable pairs are not Gaussian");
327 testStatus = 1;
328 }
329 #endif // USE_GNU_R
330
331 // Test Assess option
332 vtkMultiBlockDataSet* paramsTables = vtkMultiBlockDataSet::New();
333 paramsTables->ShallowCopy(outputMetaDS);
334
335 pcas->SetInputData(vtkStatisticsAlgorithm::INPUT_MODEL, paramsTables);
336 paramsTables->Delete();
337
338 // Test Assess only (Do not recalculate nor rederive nor retest a model)
339 pcas->SetLearnOption(false);
340 pcas->SetDeriveOption(false);
341 pcas->SetTestOption(false);
342 pcas->SetAssessOption(true);
343 pcas->Update();
344
345 cout << "\n## Assessment results:\n";
346 vtkTable* outputData = pcas->GetOutput();
347 outputData->Dump();
348
349 pcas->Delete();
350 delete[] normScheme;
351
352 return testStatus;
353 }
354
TestEigen()355 int TestEigen()
356 {
357 const char m0Name[] = "M0";
358 vtkSmartPointer<vtkDoubleArray> dataset1Arr = vtkSmartPointer<vtkDoubleArray>::New();
359 dataset1Arr->SetNumberOfComponents(1);
360 dataset1Arr->SetName(m0Name);
361 dataset1Arr->InsertNextValue(0);
362 dataset1Arr->InsertNextValue(1);
363 dataset1Arr->InsertNextValue(0);
364
365 const char m1Name[] = "M1";
366 vtkSmartPointer<vtkDoubleArray> dataset2Arr = vtkSmartPointer<vtkDoubleArray>::New();
367 dataset2Arr->SetNumberOfComponents(1);
368 dataset2Arr->SetName(m1Name);
369 dataset2Arr->InsertNextValue(0);
370 dataset2Arr->InsertNextValue(0);
371 dataset2Arr->InsertNextValue(1);
372
373 const char m2Name[] = "M2";
374 vtkSmartPointer<vtkDoubleArray> dataset3Arr = vtkSmartPointer<vtkDoubleArray>::New();
375 dataset3Arr->SetNumberOfComponents(1);
376 dataset3Arr->SetName(m2Name);
377 dataset3Arr->InsertNextValue(0);
378 dataset3Arr->InsertNextValue(0);
379 dataset3Arr->InsertNextValue(0);
380
381 vtkSmartPointer<vtkTable> datasetTable = vtkSmartPointer<vtkTable>::New();
382 datasetTable->AddColumn(dataset1Arr);
383 datasetTable->AddColumn(dataset2Arr);
384 datasetTable->AddColumn(dataset3Arr);
385
386 vtkSmartPointer<vtkPCAStatistics> pcaStatistics = vtkSmartPointer<vtkPCAStatistics>::New();
387 pcaStatistics->SetInputData(vtkStatisticsAlgorithm::INPUT_DATA, datasetTable);
388
389 pcaStatistics->SetColumnStatus("M0", 1);
390 pcaStatistics->SetColumnStatus("M1", 1);
391 pcaStatistics->SetColumnStatus("M2", 1);
392 pcaStatistics->RequestSelectedColumns();
393
394 pcaStatistics->SetDeriveOption(true);
395
396 pcaStatistics->Update();
397
398 vtkSmartPointer<vtkMultiBlockDataSet> outputMetaDS = vtkMultiBlockDataSet::SafeDownCast(
399 pcaStatistics->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
400
401 vtkSmartPointer<vtkTable> outputMeta = vtkTable::SafeDownCast(outputMetaDS->GetBlock(1));
402
403 outputMeta->Dump();
404
405 ///////// Eigenvalues ////////////
406 vtkSmartPointer<vtkDoubleArray> eigenvalues = vtkSmartPointer<vtkDoubleArray>::New();
407 pcaStatistics->GetEigenvalues(eigenvalues);
408 double eigenvaluesGroundTruth[3] = { .5, .166667, 0 };
409 vtkIdType eigenvaluesCount = eigenvalues->GetNumberOfTuples();
410 if (eigenvaluesCount > 3)
411 {
412 return EXIT_FAILURE;
413 }
414 for (vtkIdType i = 0; i < eigenvaluesCount; i++)
415 {
416 std::cout << "Eigenvalue " << i << " = " << eigenvalues->GetValue(i) << std::endl;
417 if (!fuzzyCompare(eigenvalues->GetValue(i), eigenvaluesGroundTruth[i]))
418 {
419 std::cerr << "Eigenvalues (GetEigenvalues) are not correct! (" << eigenvalues->GetValue(i)
420 << " vs " << eigenvaluesGroundTruth[i] << ")" << std::endl;
421 return EXIT_FAILURE;
422 }
423
424 if (!fuzzyCompare(pcaStatistics->GetEigenvalue(i), eigenvaluesGroundTruth[i]))
425 {
426 std::cerr << "Eigenvalues (GetEigenvalue) are not correct! ("
427 << pcaStatistics->GetEigenvalue(i) << " vs " << eigenvaluesGroundTruth[i] << ")"
428 << std::endl;
429 return EXIT_FAILURE;
430 }
431 }
432
433 std::vector<std::vector<double>> eigenvectorsGroundTruth;
434 std::vector<double> e0(3);
435 e0[0] = -.707107;
436 e0[1] = .707107;
437 e0[2] = 0;
438 std::vector<double> e1(3);
439 e1[0] = .707107;
440 e1[1] = .707107;
441 e1[2] = 0;
442 std::vector<double> e2(3);
443 e2[0] = 0;
444 e2[1] = 0;
445 e2[2] = 1;
446 eigenvectorsGroundTruth.push_back(e0);
447 eigenvectorsGroundTruth.push_back(e1);
448 eigenvectorsGroundTruth.push_back(e2);
449
450 vtkSmartPointer<vtkDoubleArray> eigenvectors = vtkSmartPointer<vtkDoubleArray>::New();
451
452 pcaStatistics->GetEigenvectors(eigenvectors);
453 for (vtkIdType i = 0; i < eigenvectors->GetNumberOfTuples(); i++)
454 {
455 std::cout << "Eigenvector " << i << " : ";
456 double* evec = new double[eigenvectors->GetNumberOfComponents()];
457 eigenvectors->GetTuple(i, evec);
458 int iamax = 0;
459 double vamax = fabs(eigenvectorsGroundTruth[i][0]);
460 for (vtkIdType j = 1; j < eigenvectors->GetNumberOfComponents(); j++)
461 {
462 double tmp = fabs(eigenvectorsGroundTruth[i][j]);
463 if (tmp > vamax)
464 {
465 iamax = j;
466 vamax = tmp;
467 }
468 }
469 double factor = (vamax == eigenvectorsGroundTruth[i][iamax]) ? +1 : -1;
470 for (vtkIdType j = 0; j < eigenvectors->GetNumberOfComponents(); j++)
471 {
472 std::cout << evec[j] << " ";
473 vtkSmartPointer<vtkDoubleArray> eigenvectorSingle = vtkSmartPointer<vtkDoubleArray>::New();
474 pcaStatistics->GetEigenvector(i, eigenvectorSingle);
475 if (!fuzzyCompare(factor * eigenvectorsGroundTruth[i][j], evec[j]) ||
476 !fuzzyCompare(factor * eigenvectorsGroundTruth[i][j], eigenvectorSingle->GetValue(j)))
477 {
478 std::cerr << "Eigenvectors do not match!" << std::endl;
479 return EXIT_FAILURE;
480 }
481 }
482 delete[] evec;
483 std::cout << std::endl;
484 }
485
486 return EXIT_SUCCESS;
487 }
488