1
2 ///////////////////////////////////////////////////////////
3 // //
4 // SAGA //
5 // //
6 // System for Automated Geoscientific Analyses //
7 // //
8 // Tool Library //
9 // Table_Calculus //
10 // //
11 //-------------------------------------------------------//
12 // //
13 // table_cluster_analysis.cpp //
14 // //
15 // Copyright (C) 2010 by //
16 // Olaf Conrad //
17 // //
18 //-------------------------------------------------------//
19 // //
20 // This file is part of 'SAGA - System for Automated //
21 // Geoscientific Analyses'. SAGA is free software; you //
22 // can redistribute it and/or modify it under the terms //
23 // of the GNU General Public License as published by the //
24 // Free Software Foundation, either version 2 of the //
25 // License, or (at your option) any later version. //
26 // //
27 // SAGA is distributed in the hope that it will be //
28 // useful, but WITHOUT ANY WARRANTY; without even the //
29 // implied warranty of MERCHANTABILITY or FITNESS FOR A //
30 // PARTICULAR PURPOSE. See the GNU General Public //
31 // License for more details. //
32 // //
33 // You should have received a copy of the GNU General //
34 // Public License along with this program; if not, see //
35 // <http://www.gnu.org/licenses/>. //
36 // //
37 //-------------------------------------------------------//
38 // //
39 // e-mail: oconrad@saga-gis.org //
40 // //
41 // contact: Olaf Conrad //
42 // Institute of Geography //
43 // University of Hamburg //
44 // Germany //
45 // //
46 ///////////////////////////////////////////////////////////
47
48 //---------------------------------------------------------
49 #include "table_cluster_analysis.h"
50
51
52 ///////////////////////////////////////////////////////////
53 // //
54 // //
55 // //
56 ///////////////////////////////////////////////////////////
57
58 //---------------------------------------------------------
CTable_Cluster_Analysis(bool bShapes)59 CTable_Cluster_Analysis::CTable_Cluster_Analysis(bool bShapes)
60 {
61 Set_Author ("O. Conrad (c) 2010");
62
63 Set_Description (_TW(
64 "Cluster Analysis for tables."
65 ));
66
67 Add_Reference("Forgy, E.", "1965",
68 "Cluster Analysis of multivariate data: efficiency vs. interpretability of classifications",
69 "Biometrics 21:768."
70 );
71
72 Add_Reference("Rubin, J.", "1967",
73 "Optimal Classification into Groups: An Approach for Solving the Taxonomy Problem",
74 "J. Theoretical Biology, 15:103-144."
75 );
76
77 //-----------------------------------------------------
78 if( (m_bShapes = bShapes) == true )
79 {
80 Set_Name (_TL("Cluster Analysis (Shapes)"));
81
82 Parameters.Add_Shapes("", "INPUT" , _TL("Shapes"), _TL(""), PARAMETER_INPUT);
83 Parameters.Add_Shapes("", "RESULT", _TL("Result"), _TL(""), PARAMETER_OUTPUT_OPTIONAL);
84 }
85 else
86 {
87 Set_Name (_TL("Cluster Analysis"));
88
89 Parameters.Add_Table("", "INPUT" , _TL("Table" ), _TL(""), PARAMETER_INPUT);
90 Parameters.Add_Table("", "RESULT" , _TL("Result"), _TL(""), PARAMETER_OUTPUT_OPTIONAL);
91 }
92
93 Parameters.Add_Table_Fields("INPUT",
94 "FIELDS" , _TL("Attributes"),
95 _TL("")
96 );
97
98 Parameters.Add_Table_Field("INPUT",
99 "CLUSTER" , _TL("Cluster"),
100 _TL(""),
101 true
102 );
103
104 Parameters.Add_Table("",
105 "STATISTICS" , _TL("Statistics"),
106 _TL(""),
107 PARAMETER_OUTPUT
108 );
109
110 Parameters.Add_Choice("",
111 "METHOD" , _TL("Method"),
112 _TL(""),
113 CSG_String::Format("%s|%s|%s",
114 _TL("Iterative Minimum Distance (Forgy 1965)"),
115 _TL("Hill-Climbing (Rubin 1967)"),
116 _TL("Combined Minimum Distance / Hillclimbing")
117 ), 1
118 );
119
120 Parameters.Add_Int("",
121 "NCLUSTER" , _TL("Number of Clusters"),
122 _TL(""),
123 10, 2, true
124 );
125
126 Parameters.Add_Bool("",
127 "NORMALISE" , _TL("Normalise"),
128 _TL(""),
129 false
130 );
131 }
132
133
134 ///////////////////////////////////////////////////////////
135 // //
136 ///////////////////////////////////////////////////////////
137
138 //---------------------------------------------------------
On_Execute(void)139 bool CTable_Cluster_Analysis::On_Execute(void)
140 {
141 bool bNormalize;
142 int iFeature, nFeatures, *Features, iElement, nElements, Cluster;
143 CSG_Cluster_Analysis Analysis;
144 CSG_Table *pTable;
145
146 //-----------------------------------------------------
147 pTable = Parameters("RESULT" )->asTable();
148 bNormalize = Parameters("NORMALISE")->asBool();
149 Cluster = Parameters("CLUSTER" )->asInt();
150
151 Features = (int *)Parameters("FIELDS")->asPointer();
152 nFeatures = Parameters("FIELDS")->asInt ();
153
154 if( !Features || nFeatures <= 0 )
155 {
156 Error_Set(_TL("no features in selection"));
157
158 return( false );
159 }
160
161 if( !Analysis.Create(nFeatures) )
162 {
163 Error_Set(_TL("could not initialize cluster engine"));
164
165 return( false );
166 }
167
168 if( pTable && pTable != Parameters("INPUT")->asTable() )
169 {
170 if( m_bShapes )
171 {
172 ((CSG_Shapes *)pTable)->Create(*Parameters("INPUT")->asShapes());
173 }
174 else
175 {
176 pTable->Create(*Parameters("INPUT")->asTable());
177 }
178 }
179 else
180 {
181 pTable = Parameters("INPUT")->asTable();
182 }
183
184 if( Cluster < 0 )
185 {
186 Cluster = pTable->Get_Field_Count();
187
188 pTable->Add_Field(_TL("CLUSTER"), SG_DATATYPE_Int);
189 }
190
191 //-----------------------------------------------------
192 for(iElement=0, nElements=0; iElement<pTable->Get_Count() && Set_Progress(iElement, pTable->Get_Count()); iElement++)
193 {
194 CSG_Table_Record *pRecord = pTable->Get_Record(iElement);
195
196 bool bNoData = false;
197
198 for(iFeature=0; iFeature<nFeatures && !bNoData; iFeature++)
199 {
200 if( pRecord->is_NoData(Features[iFeature]) )
201 {
202 bNoData = true;
203 }
204 }
205
206 if( bNoData || !Analysis.Add_Element() )
207 {
208 pRecord->Set_NoData(Cluster);
209 }
210 else
211 {
212 pRecord->Set_Value(Cluster, 0.0);
213
214 for(iFeature=0; iFeature<nFeatures; iFeature++)
215 {
216 double d = pRecord->asDouble(Features[iFeature]);
217
218 if( bNormalize )
219 {
220 d = (d - pTable->Get_Mean(Features[iFeature])) / pTable->Get_StdDev(Features[iFeature]);
221 }
222
223 Analysis.Set_Feature(nElements, iFeature, d);
224 }
225
226 nElements++;
227 }
228 }
229
230 if( nElements <= 1 )
231 {
232 return( false );
233 }
234
235 //-----------------------------------------------------
236 bool bResult = Analysis.Execute(Parameters("METHOD")->asInt(), Parameters("NCLUSTER")->asInt());
237
238 for(iElement=0, nElements=0; iElement<pTable->Get_Count(); iElement++)
239 {
240 Set_Progress(iElement, pTable->Get_Count());
241
242 CSG_Table_Record *pRecord = pTable->Get_Record(iElement);
243
244 if( !pRecord->is_NoData(Cluster) )
245 {
246 pRecord->Set_Value(Cluster, Analysis.Get_Cluster(nElements++));
247 }
248 }
249
250 Save_Statistics(pTable, Features, bNormalize, Analysis);
251
252 // Save_LUT(pCluster, Analysis.Get_nClusters());
253
254 DataObject_Update(pTable);
255
256 return( bResult );
257 }
258
259
260 ///////////////////////////////////////////////////////////
261 // //
262 ///////////////////////////////////////////////////////////
263
264 //---------------------------------------------------------
Save_Statistics(CSG_Table * pTable,int * Features,bool bNormalize,const CSG_Cluster_Analysis & Analysis)265 void CTable_Cluster_Analysis::Save_Statistics(CSG_Table *pTable, int *Features, bool bNormalize, const CSG_Cluster_Analysis &Analysis)
266 {
267 int iCluster, iFeature;
268 CSG_String s;
269 CSG_Table *pStatistics;
270
271 pStatistics = Parameters("STATISTICS")->asTable();
272
273 pStatistics->Destroy();
274 pStatistics->Set_Name(_TL("Cluster Analysis"));
275
276 pStatistics->Add_Field(_TL("ClusterID") , SG_DATATYPE_Int);
277 pStatistics->Add_Field(_TL("Elements") , SG_DATATYPE_Int);
278 pStatistics->Add_Field(_TL("Std.Dev.") , SG_DATATYPE_Double);
279
280 s.Printf("\n%s:\t%ld \n%s:\t%d \n%s:\t%d \n%s:\t%f\n\n%s\t%s\t%s",
281 _TL("Number of Elements") , Analysis.Get_nElements(),
282 _TL("Number of Variables") , Analysis.Get_nFeatures(),
283 _TL("Number of Clusters") , Analysis.Get_nClusters(),
284 _TL("Value of Target Function") , Analysis.Get_SP(),
285 _TL("Cluster"), _TL("Elements"), _TL("Std.Dev.")
286 );
287
288 for(iFeature=0; iFeature<Analysis.Get_nFeatures(); iFeature++)
289 {
290 s += CSG_String::Format("\t%s", pTable->Get_Field_Name(Features[iFeature]));
291
292 pStatistics->Add_Field(pTable->Get_Field_Name(Features[iFeature]), SG_DATATYPE_Double);
293 }
294
295 Message_Add(s);
296
297 for(iCluster=0; iCluster<Analysis.Get_nClusters(); iCluster++)
298 {
299 s.Printf("\n%d\t%d\t%f", iCluster, Analysis.Get_nMembers(iCluster), sqrt(Analysis.Get_Variance(iCluster)));
300
301 CSG_Table_Record *pRecord = pStatistics->Add_Record();
302
303 pRecord->Set_Value(0, iCluster);
304 pRecord->Set_Value(1, Analysis.Get_nMembers(iCluster));
305 pRecord->Set_Value(2, sqrt(Analysis.Get_Variance(iCluster)));
306
307 for(iFeature=0; iFeature<Analysis.Get_nFeatures(); iFeature++)
308 {
309 double Centroid = Analysis.Get_Centroid(iCluster, iFeature);
310
311 if( bNormalize )
312 {
313 Centroid = pTable->Get_Mean(Features[iFeature]) + Centroid * pTable->Get_StdDev(Features[iFeature]);
314 }
315
316 s += CSG_String::Format("\t%f", Centroid);
317
318 pRecord->Set_Value(iFeature + 3, Centroid);
319 }
320
321 Message_Add(s, false);
322 }
323 }
324
325
326 ///////////////////////////////////////////////////////////
327 // //
328 // //
329 // //
330 ///////////////////////////////////////////////////////////
331
332 //---------------------------------------------------------
333