1 /*
2  * Copyright 2008 Sandia Corporation.
3  * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
4  * license for use of this work by or on behalf of the
5  * U.S. Government. Redistribution and use in source and binary forms, with
6  * or without modification, are permitted provided that this Notice and any
7  * statement of authorship are reproduced on all copies.
8  */
9 // .SECTION Thanks
10 // Thanks to Philippe Pebay and David Thompson from Sandia National Laboratories
11 // for implementing this test.
12 // Test added for Robust PCA by Tristan Coulange, Kitware SAS 2013
13 
14 #include "vtkDoubleArray.h"
15 #include "vtkMultiBlockDataSet.h"
16 #include "vtkNew.h"
17 #include "vtkOrderStatistics.h"
18 #include "vtkPCAStatistics.h"
19 #include "vtkSmartPointer.h"
20 #include "vtkStringArray.h"
21 #include "vtkTable.h"
22 #include "vtkTestUtilities.h"
23 
24 #include "vtksys/SystemTools.hxx"
25 
26 // Perform a fuzzy compare of floats/doubles
27 template <class A>
fuzzyCompare(A a,A b)28 bool fuzzyCompare(A a, A b)
29 {
30   //  return fabs(a - b) < std::numeric_limits<A>::epsilon();
31   return fabs(a - b) < .0001;
32 }
33 
34 int TestPCA(int argc, char* argv[]);
35 int TestPCARobust(int argc, char* argv[]);
36 int TestPCAPart(int argc, char* argv[], bool RobustPCA);
37 int TestPCARobust2();
38 int TestEigen();
39 
40 //=============================================================================
TestPCAStatistics(int argc,char * argv[])41 int TestPCAStatistics(int argc, char* argv[])
42 {
43   int result = EXIT_SUCCESS;
44 
45   result |= TestPCA(argc, argv);
46   result |= TestPCARobust(argc, argv);
47   result |= TestPCARobust2();
48   result |= TestEigen();
49 
50   if (result == EXIT_FAILURE)
51   {
52     cout << "FAILURE" << endl;
53   }
54   else
55   {
56     cout << "SUCCESS" << endl;
57   }
58 
59   return result;
60 }
61 
62 //=============================================================================
TestPCA(int argc,char * argv[])63 int TestPCA(int argc, char* argv[])
64 {
65   return TestPCAPart(argc, argv, false);
66 }
67 
68 //=============================================================================
TestPCARobust(int argc,char * argv[])69 int TestPCARobust(int argc, char* argv[])
70 {
71   return TestPCAPart(argc, argv, true);
72 }
73 
74 //=============================================================================
TestPCARobust2()75 int TestPCARobust2()
76 {
77   const int nVals = 7;
78   double mingledData[] = {
79     0., 1.,  //
80     1., 1.,  //
81     2., 1.,  //
82     3., 1.,  //
83     4., 1.,  //
84     5., 1.,  //
85     10., 10. //
86   };
87 
88   const char m0Name[] = "M0";
89   vtkNew<vtkDoubleArray> dataset1Arr;
90   dataset1Arr->SetNumberOfComponents(1);
91   dataset1Arr->SetName(m0Name);
92 
93   const char m1Name[] = "M1";
94   vtkNew<vtkDoubleArray> dataset2Arr;
95   dataset2Arr->SetNumberOfComponents(1);
96   dataset2Arr->SetName(m1Name);
97 
98   for (int i = 0; i < nVals; ++i)
99   {
100     dataset1Arr->InsertNextValue(mingledData[i * 2]);
101     dataset2Arr->InsertNextValue(mingledData[i * 2 + 1]);
102   }
103 
104   vtkNew<vtkTable> datasetTable;
105   datasetTable->AddColumn(dataset1Arr);
106   datasetTable->AddColumn(dataset2Arr);
107 
108   // Set PCA statistics algorithm and its input data port
109   vtkNew<vtkPCAStatistics> pcas;
110 
111   // Prepare first test with data
112   pcas->SetInputData(vtkStatisticsAlgorithm::INPUT_DATA, datasetTable);
113   pcas->MedianAbsoluteDeviationOn();
114 
115   // -- Select Column Pairs of Interest ( Learn Mode ) --
116   pcas->SetColumnStatus(m0Name, 1);
117   pcas->SetColumnStatus(m1Name, 1);
118 
119   // Test all options but Assess
120   pcas->SetLearnOption(true);
121   pcas->SetDeriveOption(true);
122   pcas->SetTestOption(true);
123   pcas->SetAssessOption(true);
124   pcas->Update();
125 
126   vtkTable* outputData = pcas->GetOutput();
127 
128   double res[] = { -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0 };
129 
130   for (vtkIdType j = 0; j < 2; j++)
131   {
132     for (vtkIdType i = 0; i < 7; i++)
133     {
134       if (outputData->GetValue(i, j + 2) != res[j * outputData->GetNumberOfRows() + i])
135       {
136         return EXIT_FAILURE;
137       }
138     }
139   }
140 
141   return EXIT_SUCCESS;
142 }
143 
144 //=============================================================================
TestPCAPart(int argc,char * argv[],bool robustPCA)145 int TestPCAPart(int argc, char* argv[], bool robustPCA)
146 {
147   char* normScheme = vtkTestUtilities::GetArgOrEnvOrDefault(
148     "-normalize-covariance", argc, argv, "VTK_NORMALIZE_COVARIANCE", "None");
149   int testStatus = 0;
150 
151   /* */
152   double mingledData[] = {
153     46, 45, //
154     47, 49, //
155     46, 47, //
156     46, 46, //
157     47, 46, //
158     47, 49, //
159     49, 49, //
160     47, 45, //
161     50, 50, //
162     46, 46, //
163     51, 50, //
164     48, 48, //
165     52, 54, //
166     48, 47, //
167     52, 52, //
168     49, 49, //
169     53, 54, //
170     50, 50, //
171     53, 54, //
172     50, 52, //
173     53, 53, //
174     50, 51, //
175     54, 54, //
176     49, 49, //
177     52, 52, //
178     50, 51, //
179     52, 52, //
180     49, 47, //
181     48, 48, //
182     48, 50, //
183     46, 48, //
184     47, 47  //
185   };
186   int nVals = 32;
187 
188   const char m0Name[] = "M0";
189   vtkDoubleArray* dataset1Arr = vtkDoubleArray::New();
190   dataset1Arr->SetNumberOfComponents(1);
191   dataset1Arr->SetName(m0Name);
192 
193   const char m1Name[] = "M1";
194   vtkDoubleArray* dataset2Arr = vtkDoubleArray::New();
195   dataset2Arr->SetNumberOfComponents(1);
196   dataset2Arr->SetName(m1Name);
197 
198   const char m2Name[] = "M2";
199   vtkDoubleArray* dataset3Arr = vtkDoubleArray::New();
200   dataset3Arr->SetNumberOfComponents(1);
201   dataset3Arr->SetName(m2Name);
202 
203   for (int i = 0; i < nVals; ++i)
204   {
205     int ti = i << 1;
206     dataset1Arr->InsertNextValue(mingledData[ti]);
207     dataset2Arr->InsertNextValue(mingledData[ti + 1]);
208     dataset3Arr->InsertNextValue(i != 12 ? -1. : -1.001);
209   }
210 
211   vtkTable* datasetTable = vtkTable::New();
212   datasetTable->AddColumn(dataset1Arr);
213   dataset1Arr->Delete();
214   datasetTable->AddColumn(dataset2Arr);
215   dataset2Arr->Delete();
216   datasetTable->AddColumn(dataset3Arr);
217   dataset3Arr->Delete();
218 
219   // Set PCA statistics algorithm and its input data port
220   vtkPCAStatistics* pcas = vtkPCAStatistics::New();
221   pcas->SetMedianAbsoluteDeviation(robustPCA);
222 
223   // First verify that absence of input does not cause trouble
224   cout << "## Verifying that absence of input does not cause trouble... ";
225   pcas->Update();
226   cout << "done.\n";
227 
228   // Prepare first test with data
229   pcas->SetInputData(vtkStatisticsAlgorithm::INPUT_DATA, datasetTable);
230   pcas->SetNormalizationSchemeByName(normScheme);
231   pcas->SetBasisSchemeByName("FixedBasisEnergy");
232   pcas->SetFixedBasisEnergy(1. - 1e-8);
233 
234   datasetTable->Delete();
235 
236   // -- Select Column Pairs of Interest ( Learn Mode ) --
237   pcas->SetColumnStatus(m0Name, 1);
238   pcas->SetColumnStatus(m1Name, 1);
239   pcas->RequestSelectedColumns();
240   pcas->ResetAllColumnStates();
241   pcas->SetColumnStatus(m0Name, 1);
242   pcas->SetColumnStatus(m1Name, 1);
243   pcas->SetColumnStatus(m2Name, 1);
244   pcas->SetColumnStatus(m2Name, 0);
245   pcas->SetColumnStatus(m2Name, 1);
246   pcas->RequestSelectedColumns();
247   pcas->RequestSelectedColumns(); // Try a duplicate entry. This should have no effect.
248   pcas->SetColumnStatus(m0Name, 0);
249   pcas->SetColumnStatus(m2Name, 0);
250   pcas->SetColumnStatus("Metric 3",
251     1); // An invalid name. This should result in a request for metric 1's self-correlation.
252   // pcas->RequestSelectedColumns(); will get called in RequestData()
253 
254   // Test all options but Assess
255   pcas->SetLearnOption(true);
256   pcas->SetDeriveOption(true);
257   pcas->SetTestOption(true);
258   pcas->SetAssessOption(false);
259   pcas->Update();
260 
261   vtkMultiBlockDataSet* outputMetaDS = vtkMultiBlockDataSet::SafeDownCast(
262     pcas->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
263   vtkTable* outputTest = pcas->GetOutput(vtkStatisticsAlgorithm::OUTPUT_TEST);
264 
265   cout << "## Calculated the following statistics for data set:\n";
266   for (unsigned int b = 0; b < outputMetaDS->GetNumberOfBlocks(); ++b)
267   {
268     vtkTable* outputMeta = vtkTable::SafeDownCast(outputMetaDS->GetBlock(b));
269 
270     if (b == 0)
271     {
272       cout << "Primary Statistics\n";
273     }
274     else
275     {
276       cout << "Derived Statistics " << (b - 1) << "\n";
277     }
278 
279     outputMeta->Dump();
280   }
281 
282   // Check some results of the Test option
283   cout << "\n## Calculated the following Jarque-Bera-Srivastava statistics for pseudo-random "
284           "variables (n="
285        << nVals;
286 
287 #ifdef USE_GNU_R
288   int nNonGaussian = 1;
289   int nRejected = 0;
290   double alpha = .01;
291 
292   cout << ", null hypothesis: binormality, significance level=" << alpha;
293 #endif // USE_GNU_R
294 
295   cout << "):\n";
296 
297   // Loop over Test table
298   for (vtkIdType r = 0; r < outputTest->GetNumberOfRows(); ++r)
299   {
300     cout << "   ";
301     for (int i = 0; i < outputTest->GetNumberOfColumns(); ++i)
302     {
303       cout << outputTest->GetColumnName(i) << "=" << outputTest->GetValue(r, i).ToString() << "  ";
304     }
305 
306 #ifdef USE_GNU_R
307     // Check if null hypothesis is rejected at specified significance level
308     double p = outputTest->GetValueByName(r, "P").ToDouble();
309     // Must verify that p value is valid (it is set to -1 if R has failed)
310     if (p > -1 && p < alpha)
311     {
312       cout << "N.H. rejected";
313 
314       ++nRejected;
315     }
316 #endif // USE_GNU_R
317 
318     cout << "\n";
319   }
320 
321 #ifdef USE_GNU_R
322   if (nRejected < nNonGaussian)
323   {
324     vtkGenericWarningMacro("Rejected only "
325       << nRejected << " null hypotheses of binormality whereas " << nNonGaussian
326       << " variable pairs are not Gaussian");
327     testStatus = 1;
328   }
329 #endif // USE_GNU_R
330 
331   // Test Assess option
332   vtkMultiBlockDataSet* paramsTables = vtkMultiBlockDataSet::New();
333   paramsTables->ShallowCopy(outputMetaDS);
334 
335   pcas->SetInputData(vtkStatisticsAlgorithm::INPUT_MODEL, paramsTables);
336   paramsTables->Delete();
337 
338   // Test Assess only (Do not recalculate nor rederive nor retest a model)
339   pcas->SetLearnOption(false);
340   pcas->SetDeriveOption(false);
341   pcas->SetTestOption(false);
342   pcas->SetAssessOption(true);
343   pcas->Update();
344 
345   cout << "\n## Assessment results:\n";
346   vtkTable* outputData = pcas->GetOutput();
347   outputData->Dump();
348 
349   pcas->Delete();
350   delete[] normScheme;
351 
352   return testStatus;
353 }
354 
TestEigen()355 int TestEigen()
356 {
357   const char m0Name[] = "M0";
358   vtkSmartPointer<vtkDoubleArray> dataset1Arr = vtkSmartPointer<vtkDoubleArray>::New();
359   dataset1Arr->SetNumberOfComponents(1);
360   dataset1Arr->SetName(m0Name);
361   dataset1Arr->InsertNextValue(0);
362   dataset1Arr->InsertNextValue(1);
363   dataset1Arr->InsertNextValue(0);
364 
365   const char m1Name[] = "M1";
366   vtkSmartPointer<vtkDoubleArray> dataset2Arr = vtkSmartPointer<vtkDoubleArray>::New();
367   dataset2Arr->SetNumberOfComponents(1);
368   dataset2Arr->SetName(m1Name);
369   dataset2Arr->InsertNextValue(0);
370   dataset2Arr->InsertNextValue(0);
371   dataset2Arr->InsertNextValue(1);
372 
373   const char m2Name[] = "M2";
374   vtkSmartPointer<vtkDoubleArray> dataset3Arr = vtkSmartPointer<vtkDoubleArray>::New();
375   dataset3Arr->SetNumberOfComponents(1);
376   dataset3Arr->SetName(m2Name);
377   dataset3Arr->InsertNextValue(0);
378   dataset3Arr->InsertNextValue(0);
379   dataset3Arr->InsertNextValue(0);
380 
381   vtkSmartPointer<vtkTable> datasetTable = vtkSmartPointer<vtkTable>::New();
382   datasetTable->AddColumn(dataset1Arr);
383   datasetTable->AddColumn(dataset2Arr);
384   datasetTable->AddColumn(dataset3Arr);
385 
386   vtkSmartPointer<vtkPCAStatistics> pcaStatistics = vtkSmartPointer<vtkPCAStatistics>::New();
387   pcaStatistics->SetInputData(vtkStatisticsAlgorithm::INPUT_DATA, datasetTable);
388 
389   pcaStatistics->SetColumnStatus("M0", 1);
390   pcaStatistics->SetColumnStatus("M1", 1);
391   pcaStatistics->SetColumnStatus("M2", 1);
392   pcaStatistics->RequestSelectedColumns();
393 
394   pcaStatistics->SetDeriveOption(true);
395 
396   pcaStatistics->Update();
397 
398   vtkSmartPointer<vtkMultiBlockDataSet> outputMetaDS = vtkMultiBlockDataSet::SafeDownCast(
399     pcaStatistics->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
400 
401   vtkSmartPointer<vtkTable> outputMeta = vtkTable::SafeDownCast(outputMetaDS->GetBlock(1));
402 
403   outputMeta->Dump();
404 
405   ///////// Eigenvalues ////////////
406   vtkSmartPointer<vtkDoubleArray> eigenvalues = vtkSmartPointer<vtkDoubleArray>::New();
407   pcaStatistics->GetEigenvalues(eigenvalues);
408   double eigenvaluesGroundTruth[3] = { .5, .166667, 0 };
409   vtkIdType eigenvaluesCount = eigenvalues->GetNumberOfTuples();
410   if (eigenvaluesCount > 3)
411   {
412     return EXIT_FAILURE;
413   }
414   for (vtkIdType i = 0; i < eigenvaluesCount; i++)
415   {
416     std::cout << "Eigenvalue " << i << " = " << eigenvalues->GetValue(i) << std::endl;
417     if (!fuzzyCompare(eigenvalues->GetValue(i), eigenvaluesGroundTruth[i]))
418     {
419       std::cerr << "Eigenvalues (GetEigenvalues) are not correct! (" << eigenvalues->GetValue(i)
420                 << " vs " << eigenvaluesGroundTruth[i] << ")" << std::endl;
421       return EXIT_FAILURE;
422     }
423 
424     if (!fuzzyCompare(pcaStatistics->GetEigenvalue(i), eigenvaluesGroundTruth[i]))
425     {
426       std::cerr << "Eigenvalues (GetEigenvalue) are not correct! ("
427                 << pcaStatistics->GetEigenvalue(i) << " vs " << eigenvaluesGroundTruth[i] << ")"
428                 << std::endl;
429       return EXIT_FAILURE;
430     }
431   }
432 
433   std::vector<std::vector<double>> eigenvectorsGroundTruth;
434   std::vector<double> e0(3);
435   e0[0] = -.707107;
436   e0[1] = .707107;
437   e0[2] = 0;
438   std::vector<double> e1(3);
439   e1[0] = .707107;
440   e1[1] = .707107;
441   e1[2] = 0;
442   std::vector<double> e2(3);
443   e2[0] = 0;
444   e2[1] = 0;
445   e2[2] = 1;
446   eigenvectorsGroundTruth.push_back(e0);
447   eigenvectorsGroundTruth.push_back(e1);
448   eigenvectorsGroundTruth.push_back(e2);
449 
450   vtkSmartPointer<vtkDoubleArray> eigenvectors = vtkSmartPointer<vtkDoubleArray>::New();
451 
452   pcaStatistics->GetEigenvectors(eigenvectors);
453   for (vtkIdType i = 0; i < eigenvectors->GetNumberOfTuples(); i++)
454   {
455     std::cout << "Eigenvector " << i << " : ";
456     double* evec = new double[eigenvectors->GetNumberOfComponents()];
457     eigenvectors->GetTuple(i, evec);
458     int iamax = 0;
459     double vamax = fabs(eigenvectorsGroundTruth[i][0]);
460     for (vtkIdType j = 1; j < eigenvectors->GetNumberOfComponents(); j++)
461     {
462       double tmp = fabs(eigenvectorsGroundTruth[i][j]);
463       if (tmp > vamax)
464       {
465         iamax = j;
466         vamax = tmp;
467       }
468     }
469     double factor = (vamax == eigenvectorsGroundTruth[i][iamax]) ? +1 : -1;
470     for (vtkIdType j = 0; j < eigenvectors->GetNumberOfComponents(); j++)
471     {
472       std::cout << evec[j] << " ";
473       vtkSmartPointer<vtkDoubleArray> eigenvectorSingle = vtkSmartPointer<vtkDoubleArray>::New();
474       pcaStatistics->GetEigenvector(i, eigenvectorSingle);
475       if (!fuzzyCompare(factor * eigenvectorsGroundTruth[i][j], evec[j]) ||
476         !fuzzyCompare(factor * eigenvectorsGroundTruth[i][j], eigenvectorSingle->GetValue(j)))
477       {
478         std::cerr << "Eigenvectors do not match!" << std::endl;
479         return EXIT_FAILURE;
480       }
481     }
482     delete[] evec;
483     std::cout << std::endl;
484   }
485 
486   return EXIT_SUCCESS;
487 }
488