1 /*
2  * Copyright 2008 Sandia Corporation.
3  * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
4  * license for use of this work by or on behalf of the
5  * U.S. Government. Redistribution and use in source and binary forms, with
6  * or without modification, are permitted provided that this Notice and any
7  * statement of authorship are reproduced on all copies.
8  */
9 // .SECTION Thanks
10 // Thanks to Janine Bennett, Philippe Pebay, and David Thompson from Sandia National Laboratories
11 // for implementing this test.
12 
13 #include "vtkDoubleArray.h"
14 #include "vtkIdTypeArray.h"
15 #include "vtkKMeansStatistics.h"
16 #include "vtkMath.h"
17 #include "vtkMultiBlockDataSet.h"
18 #include "vtkStdString.h"
19 #include "vtkStringArray.h"
20 #include "vtkTable.h"
21 #include "vtkTimerLog.h"
22 
23 #include <sstream>
24 
25 //=============================================================================
TestKMeansStatistics(int,char * [])26 int TestKMeansStatistics(int, char*[])
27 {
28   int testStatus = 0;
29 
30   const int nDim = 4;
31   int nVals = 50;
32 
33   // Seed random number generator
34   vtkMath::RandomSeed(static_cast<int>(vtkTimerLog::GetUniversalTime()));
35 
36   // Generate an input table that contains samples of mutually independent random variables over [0,
37   // 1]
38   vtkTable* inputData = vtkTable::New();
39   vtkDoubleArray* doubleArray;
40 
41   int numComponents = 1;
42   for (int c = 0; c < nDim; ++c)
43   {
44     std::ostringstream colName;
45     colName << "coord " << c;
46     doubleArray = vtkDoubleArray::New();
47     doubleArray->SetNumberOfComponents(numComponents);
48     doubleArray->SetName(colName.str().c_str());
49     doubleArray->SetNumberOfTuples(nVals);
50 
51     double x;
52     for (int r = 0; r < nVals; ++r)
53     {
54       // x = vtkMath::Gaussian();
55       x = vtkMath::Random();
56       doubleArray->SetValue(r, x);
57     }
58 
59     inputData->AddColumn(doubleArray);
60     doubleArray->Delete();
61   }
62 
63   vtkTable* paramData = vtkTable::New();
64   vtkIdTypeArray* paramCluster;
65   vtkDoubleArray* paramArray;
66   const int numRuns = 5;
67   const int numClustersInRun[] = { 5, 2, 3, 4, 5 };
68   paramCluster = vtkIdTypeArray::New();
69   paramCluster->SetName("K");
70 
71   for (int curRun = 0; curRun < numRuns; curRun++)
72   {
73     for (int nInRun = 0; nInRun < numClustersInRun[curRun]; nInRun++)
74     {
75       paramCluster->InsertNextValue(numClustersInRun[curRun]);
76     }
77   }
78   paramData->AddColumn(paramCluster);
79   paramCluster->Delete();
80 
81   for (int c = 0; c < 5; ++c)
82   {
83     std::ostringstream colName;
84     colName << "coord " << c;
85     paramArray = vtkDoubleArray::New();
86     paramArray->SetNumberOfComponents(numComponents);
87     paramArray->SetName(colName.str().c_str());
88 
89     double x;
90     for (int curRun = 0; curRun < numRuns; curRun++)
91     {
92       for (int nInRun = 0; nInRun < numClustersInRun[curRun]; nInRun++)
93       {
94         // x = vtkMath::Gaussian();
95         x = vtkMath::Random();
96         paramArray->InsertNextValue(x);
97       }
98     }
99     paramData->AddColumn(paramArray);
100     paramArray->Delete();
101   }
102 
103   // Set k-means statistics algorithm and its input data port
104   vtkKMeansStatistics* haruspex = vtkKMeansStatistics::New();
105 
106   // First verify that absence of input does not cause trouble
107   cout << "## Verifying that absence of input does not cause trouble... ";
108   haruspex->Update();
109   cout << "done.\n";
110 
111   // Prepare first test with data
112   haruspex->SetInputData(vtkStatisticsAlgorithm::INPUT_DATA, inputData);
113   haruspex->SetColumnStatus(inputData->GetColumnName(0), 1);
114   haruspex->SetColumnStatus(inputData->GetColumnName(2), 1);
115   haruspex->SetColumnStatus("Testing", 1);
116   haruspex->RequestSelectedColumns();
117   haruspex->SetDefaultNumberOfClusters(3);
118 
119   // Test Learn and Derive options
120   haruspex->SetLearnOption(true);
121   haruspex->SetDeriveOption(true);
122   haruspex->SetTestOption(false);
123   haruspex->SetAssessOption(false);
124   haruspex->Update();
125 
126   // Get output data and meta tables
127   vtkMultiBlockDataSet* outputMetaDS = vtkMultiBlockDataSet::SafeDownCast(
128     haruspex->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
129   for (unsigned int b = 0; b < outputMetaDS->GetNumberOfBlocks(); ++b)
130   {
131     vtkTable* outputMeta = vtkTable::SafeDownCast(outputMetaDS->GetBlock(b));
132     if (!b)
133     {
134 
135       vtkIdType testIntValue = 0;
136       for (vtkIdType r = 0; r < outputMeta->GetNumberOfRows(); r++)
137       {
138         testIntValue += outputMeta->GetValueByName(r, "Cardinality").ToInt();
139       }
140 
141       cout << "## Computed clusters (cardinality: " << testIntValue << " / run):\n";
142 
143       if (testIntValue != nVals)
144       {
145         vtkGenericWarningMacro(
146           "Sum of cluster cardinalities is incorrect: " << testIntValue << " != " << nVals << ".");
147         testStatus = 1;
148       }
149     }
150     else
151     {
152       cout << "## Ranked cluster: "
153            << "\n";
154     }
155 
156     outputMeta->Dump();
157     cout << "\n";
158   }
159 
160   haruspex->SetInputData(vtkStatisticsAlgorithm::LEARN_PARAMETERS, paramData);
161   cout << "## Testing with input table:"
162        << "\n";
163 
164   paramData->Dump();
165   cout << "\n";
166 
167   // Test Assess option only
168   haruspex->SetLearnOption(true);
169   haruspex->SetDeriveOption(true);
170   haruspex->SetTestOption(false);
171   haruspex->SetAssessOption(false);
172 
173   haruspex->Update();
174   outputMetaDS = vtkMultiBlockDataSet::SafeDownCast(
175     haruspex->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
176   for (unsigned int b = 0; b < outputMetaDS->GetNumberOfBlocks(); ++b)
177   {
178     vtkTable* outputMeta = vtkTable::SafeDownCast(outputMetaDS->GetBlock(b));
179     if (b == 0)
180     {
181       vtkIdType r = 0;
182       vtkIdType testIntValue = 0;
183       for (int curRun = 0; curRun < numRuns; curRun++)
184       {
185         testIntValue = 0;
186         for (int nInRun = 0; nInRun < numClustersInRun[curRun]; nInRun++)
187         {
188           testIntValue += outputMeta->GetValueByName(r, "Cardinality").ToInt();
189           r++;
190         }
191       }
192 
193       if (r != outputMeta->GetNumberOfRows())
194       {
195         vtkGenericWarningMacro("Inconsistency in number of rows: "
196           << r << " != " << outputMeta->GetNumberOfRows() << ".");
197         testStatus = 1;
198       }
199 
200       cout << "## Computed clusters (cardinality: " << testIntValue << " / run):\n";
201 
202       if (testIntValue != nVals)
203       {
204         vtkGenericWarningMacro(
205           "Sum of cluster cardinalities is incorrect: " << testIntValue << " != " << nVals << ".");
206         testStatus = 1;
207       }
208     }
209     else
210     {
211       cout << "## Ranked cluster: "
212            << "\n";
213     }
214 
215     outputMeta->Dump();
216     cout << "\n";
217   }
218 
219   cout << "=================== ASSESS ==================== " << endl;
220   vtkMultiBlockDataSet* paramsTables = vtkMultiBlockDataSet::New();
221   paramsTables->ShallowCopy(outputMetaDS);
222 
223   haruspex->SetInputData(vtkStatisticsAlgorithm::INPUT_MODEL, paramsTables);
224 
225   // Test Assess option only (do not recalculate nor rederive a model)
226   haruspex->SetLearnOption(false);
227   haruspex->SetDeriveOption(false);
228   haruspex->SetTestOption(false);
229   haruspex->SetAssessOption(true);
230   haruspex->Update();
231   vtkTable* outputData = haruspex->GetOutput();
232   outputData->Dump();
233   paramsTables->Delete();
234   paramData->Delete();
235   inputData->Delete();
236   haruspex->Delete();
237 
238   return testStatus;
239 }
240