1 /*
2 * Copyright 2008 Sandia Corporation.
3 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
4 * license for use of this work by or on behalf of the
5 * U.S. Government. Redistribution and use in source and binary forms, with
6 * or without modification, are permitted provided that this Notice and any
7 * statement of authorship are reproduced on all copies.
8 */
9 // .SECTION Thanks
10 // Thanks to Janine Bennett, Philippe Pebay, and David Thompson from Sandia National Laboratories
11 // for implementing this test.
12
13 #include "vtkDoubleArray.h"
14 #include "vtkIdTypeArray.h"
15 #include "vtkKMeansStatistics.h"
16 #include "vtkMath.h"
17 #include "vtkMultiBlockDataSet.h"
18 #include "vtkStdString.h"
19 #include "vtkStringArray.h"
20 #include "vtkTable.h"
21 #include "vtkTimerLog.h"
22
23 #include <sstream>
24
25 //=============================================================================
TestKMeansStatistics(int,char * [])26 int TestKMeansStatistics(int, char*[])
27 {
28 int testStatus = 0;
29
30 const int nDim = 4;
31 int nVals = 50;
32
33 // Seed random number generator
34 vtkMath::RandomSeed(static_cast<int>(vtkTimerLog::GetUniversalTime()));
35
36 // Generate an input table that contains samples of mutually independent random variables over [0,
37 // 1]
38 vtkTable* inputData = vtkTable::New();
39 vtkDoubleArray* doubleArray;
40
41 int numComponents = 1;
42 for (int c = 0; c < nDim; ++c)
43 {
44 std::ostringstream colName;
45 colName << "coord " << c;
46 doubleArray = vtkDoubleArray::New();
47 doubleArray->SetNumberOfComponents(numComponents);
48 doubleArray->SetName(colName.str().c_str());
49 doubleArray->SetNumberOfTuples(nVals);
50
51 double x;
52 for (int r = 0; r < nVals; ++r)
53 {
54 // x = vtkMath::Gaussian();
55 x = vtkMath::Random();
56 doubleArray->SetValue(r, x);
57 }
58
59 inputData->AddColumn(doubleArray);
60 doubleArray->Delete();
61 }
62
63 vtkTable* paramData = vtkTable::New();
64 vtkIdTypeArray* paramCluster;
65 vtkDoubleArray* paramArray;
66 const int numRuns = 5;
67 const int numClustersInRun[] = { 5, 2, 3, 4, 5 };
68 paramCluster = vtkIdTypeArray::New();
69 paramCluster->SetName("K");
70
71 for (int curRun = 0; curRun < numRuns; curRun++)
72 {
73 for (int nInRun = 0; nInRun < numClustersInRun[curRun]; nInRun++)
74 {
75 paramCluster->InsertNextValue(numClustersInRun[curRun]);
76 }
77 }
78 paramData->AddColumn(paramCluster);
79 paramCluster->Delete();
80
81 for (int c = 0; c < 5; ++c)
82 {
83 std::ostringstream colName;
84 colName << "coord " << c;
85 paramArray = vtkDoubleArray::New();
86 paramArray->SetNumberOfComponents(numComponents);
87 paramArray->SetName(colName.str().c_str());
88
89 double x;
90 for (int curRun = 0; curRun < numRuns; curRun++)
91 {
92 for (int nInRun = 0; nInRun < numClustersInRun[curRun]; nInRun++)
93 {
94 // x = vtkMath::Gaussian();
95 x = vtkMath::Random();
96 paramArray->InsertNextValue(x);
97 }
98 }
99 paramData->AddColumn(paramArray);
100 paramArray->Delete();
101 }
102
103 // Set k-means statistics algorithm and its input data port
104 vtkKMeansStatistics* haruspex = vtkKMeansStatistics::New();
105
106 // First verify that absence of input does not cause trouble
107 cout << "## Verifying that absence of input does not cause trouble... ";
108 haruspex->Update();
109 cout << "done.\n";
110
111 // Prepare first test with data
112 haruspex->SetInputData(vtkStatisticsAlgorithm::INPUT_DATA, inputData);
113 haruspex->SetColumnStatus(inputData->GetColumnName(0), 1);
114 haruspex->SetColumnStatus(inputData->GetColumnName(2), 1);
115 haruspex->SetColumnStatus("Testing", 1);
116 haruspex->RequestSelectedColumns();
117 haruspex->SetDefaultNumberOfClusters(3);
118
119 // Test Learn and Derive options
120 haruspex->SetLearnOption(true);
121 haruspex->SetDeriveOption(true);
122 haruspex->SetTestOption(false);
123 haruspex->SetAssessOption(false);
124 haruspex->Update();
125
126 // Get output data and meta tables
127 vtkMultiBlockDataSet* outputMetaDS = vtkMultiBlockDataSet::SafeDownCast(
128 haruspex->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
129 for (unsigned int b = 0; b < outputMetaDS->GetNumberOfBlocks(); ++b)
130 {
131 vtkTable* outputMeta = vtkTable::SafeDownCast(outputMetaDS->GetBlock(b));
132 if (!b)
133 {
134
135 vtkIdType testIntValue = 0;
136 for (vtkIdType r = 0; r < outputMeta->GetNumberOfRows(); r++)
137 {
138 testIntValue += outputMeta->GetValueByName(r, "Cardinality").ToInt();
139 }
140
141 cout << "## Computed clusters (cardinality: " << testIntValue << " / run):\n";
142
143 if (testIntValue != nVals)
144 {
145 vtkGenericWarningMacro(
146 "Sum of cluster cardinalities is incorrect: " << testIntValue << " != " << nVals << ".");
147 testStatus = 1;
148 }
149 }
150 else
151 {
152 cout << "## Ranked cluster: "
153 << "\n";
154 }
155
156 outputMeta->Dump();
157 cout << "\n";
158 }
159
160 haruspex->SetInputData(vtkStatisticsAlgorithm::LEARN_PARAMETERS, paramData);
161 cout << "## Testing with input table:"
162 << "\n";
163
164 paramData->Dump();
165 cout << "\n";
166
167 // Test Assess option only
168 haruspex->SetLearnOption(true);
169 haruspex->SetDeriveOption(true);
170 haruspex->SetTestOption(false);
171 haruspex->SetAssessOption(false);
172
173 haruspex->Update();
174 outputMetaDS = vtkMultiBlockDataSet::SafeDownCast(
175 haruspex->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
176 for (unsigned int b = 0; b < outputMetaDS->GetNumberOfBlocks(); ++b)
177 {
178 vtkTable* outputMeta = vtkTable::SafeDownCast(outputMetaDS->GetBlock(b));
179 if (b == 0)
180 {
181 vtkIdType r = 0;
182 vtkIdType testIntValue = 0;
183 for (int curRun = 0; curRun < numRuns; curRun++)
184 {
185 testIntValue = 0;
186 for (int nInRun = 0; nInRun < numClustersInRun[curRun]; nInRun++)
187 {
188 testIntValue += outputMeta->GetValueByName(r, "Cardinality").ToInt();
189 r++;
190 }
191 }
192
193 if (r != outputMeta->GetNumberOfRows())
194 {
195 vtkGenericWarningMacro("Inconsistency in number of rows: "
196 << r << " != " << outputMeta->GetNumberOfRows() << ".");
197 testStatus = 1;
198 }
199
200 cout << "## Computed clusters (cardinality: " << testIntValue << " / run):\n";
201
202 if (testIntValue != nVals)
203 {
204 vtkGenericWarningMacro(
205 "Sum of cluster cardinalities is incorrect: " << testIntValue << " != " << nVals << ".");
206 testStatus = 1;
207 }
208 }
209 else
210 {
211 cout << "## Ranked cluster: "
212 << "\n";
213 }
214
215 outputMeta->Dump();
216 cout << "\n";
217 }
218
219 cout << "=================== ASSESS ==================== " << endl;
220 vtkMultiBlockDataSet* paramsTables = vtkMultiBlockDataSet::New();
221 paramsTables->ShallowCopy(outputMetaDS);
222
223 haruspex->SetInputData(vtkStatisticsAlgorithm::INPUT_MODEL, paramsTables);
224
225 // Test Assess option only (do not recalculate nor rederive a model)
226 haruspex->SetLearnOption(false);
227 haruspex->SetDeriveOption(false);
228 haruspex->SetTestOption(false);
229 haruspex->SetAssessOption(true);
230 haruspex->Update();
231 vtkTable* outputData = haruspex->GetOutput();
232 outputData->Dump();
233 paramsTables->Delete();
234 paramData->Delete();
235 inputData->Delete();
236 haruspex->Delete();
237
238 return testStatus;
239 }
240