1 /* functions to classify sorted arrays of doubles and fill a vector of classbreaks */
2
3 #include <grass/glocale.h>
4 #include <grass/arraystats.h>
5
AS_option_to_algorithm(const struct Option * option)6 int AS_option_to_algorithm(const struct Option * option)
7 {
8 if (G_strcasecmp(option->answer, "int") == 0)
9 return CLASS_INTERVAL;
10 if (G_strcasecmp(option->answer, "std") == 0)
11 return CLASS_STDEV;
12 if (G_strcasecmp(option->answer, "qua") == 0)
13 return CLASS_QUANT;
14 if (G_strcasecmp(option->answer, "equ") == 0)
15 return CLASS_EQUIPROB;
16 if (G_strcasecmp(option->answer, "dis") == 0)
17 return CLASS_DISCONT;
18
19 G_fatal_error(_("Unknown algorithm '%s'"), option->answer);
20 }
21
AS_class_apply_algorithm(int algo,double * data,int nrec,int * nbreaks,double * classbreaks)22 double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks,
23 double *classbreaks)
24 {
25 double finfo = 0.0;
26
27 switch (algo) {
28 case CLASS_INTERVAL:
29 finfo = AS_class_interval(data, nrec, *nbreaks, classbreaks);
30 break;
31 case CLASS_STDEV:
32 finfo = AS_class_stdev(data, nrec, *nbreaks, classbreaks);
33 break;
34 case CLASS_QUANT:
35 finfo = AS_class_quant(data, nrec, *nbreaks, classbreaks);
36 break;
37 case CLASS_EQUIPROB:
38 finfo = AS_class_equiprob(data, nrec, nbreaks, classbreaks);
39 break;
40 case CLASS_DISCONT:
41 /* finfo = class_discont(data, nrec, *nbreaks, classbreaks); disabled because of bugs */
42 G_fatal_error(_("Discont algorithm currently not available because of bugs"));
43 break;
44 default:
45 break;
46 }
47
48 if (finfo == 0)
49 G_fatal_error(_("Classification algorithm failed"));
50
51 return finfo;
52 }
53
AS_class_interval(double * data,int count,int nbreaks,double * classbreaks)54 int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
55 {
56 double min, max;
57 double step;
58 int i = 0;
59
60 min = data[0];
61 max = data[count - 1];
62
63 step = (max - min) / (nbreaks + 1);
64
65 for (i = 0; i < nbreaks; i++)
66 classbreaks[i] = min + (step * (i + 1));
67
68 return (1);
69 }
70
AS_class_stdev(double * data,int count,int nbreaks,double * classbreaks)71 double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
72 {
73 struct GASTATS stats;
74 int i;
75 int nbclass;
76 double scale = 1.0;
77
78 AS_basic_stats(data, count, &stats);
79
80 nbclass = nbreaks + 1;
81
82 if (nbclass % 2 == 1) { /* number of classes is uneven so we center middle class on mean */
83
84 /* find appropriate fraction of stdev for step */
85 i = 1;
86 while (i) {
87 if (((stats.mean + stats.stdev * scale / 2) +
88 (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) ||
89 ((stats.mean - stats.stdev * scale / 2) -
90 (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min))
91 scale = scale / 2;
92 else
93 i = 0;
94 }
95
96 /* classbreaks below the mean */
97 for (i = 0; i < nbreaks / 2; i++)
98 classbreaks[i] =
99 (stats.mean - stats.stdev * scale / 2) -
100 stats.stdev * scale * (nbreaks / 2 - (i + 1));
101 /* classbreaks above the mean */
102 for (i = i; i < nbreaks; i++)
103 classbreaks[i] =
104 (stats.mean + stats.stdev * scale / 2) +
105 stats.stdev * scale * (i - nbreaks / 2);
106
107 }
108 else { /* number of classes is even so mean is a classbreak */
109
110 /* decide whether to use 1*stdev or 0.5*stdev as step */
111 i = 1;
112 while (i) {
113 if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
114 stats.max) ||
115 ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
116 stats.min))
117 scale = scale / 2;
118 else
119 i = 0;
120 }
121
122 /* classbreaks below the mean and on the mean */
123 for (i = 0; i <= nbreaks / 2; i++)
124 classbreaks[i] =
125 stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
126 /* classbreaks above the mean */
127 for (i = i; i < nbreaks; i++)
128 classbreaks[i] =
129 stats.mean + stats.stdev * scale * (i - nbreaks / 2);
130 }
131
132 return (scale);
133 }
134
AS_class_quant(double * data,int count,int nbreaks,double * classbreaks)135 int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
136 {
137 int i, step;
138
139 step = count / (nbreaks + 1);
140
141 for (i = 0; i < nbreaks; i++)
142 classbreaks[i] = data[step * (i + 1)];
143
144 return (1);
145 }
146
147
AS_class_equiprob(double * data,int count,int * nbreaks,double * classbreaks)148 int AS_class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
149 {
150 int i, j;
151 double *lequi; /*Vector of scale factors for probabilities of the normal distribution */
152 struct GASTATS stats;
153 int nbclass;
154
155 nbclass = *nbreaks + 1;
156
157 lequi = G_malloc(*nbreaks * sizeof(double));
158
159 /* The following values come from the normal distribution and will be used as:
160 * classbreak[i] = (lequi[i] * stdev) + mean;
161 */
162
163 if (nbclass < 3) {
164 lequi[0] = 0;
165 }
166 else if (nbclass == 3) {
167 lequi[0] = -0.43076;
168 lequi[1] = 0.43076;
169 }
170 else if (nbclass == 4) {
171 lequi[0] = -0.6745;
172 lequi[1] = 0;
173 lequi[2] = 0.6745;
174 }
175 else if (nbclass == 5) {
176 lequi[0] = -0.8416;
177 lequi[1] = -0.2533;
178 lequi[2] = 0.2533;
179 lequi[3] = 0.8416;
180 }
181 else if (nbclass == 6) {
182 lequi[0] = -0.9676;
183 lequi[1] = -0.43076;
184 lequi[2] = 0;
185 lequi[3] = 0.43076;
186 lequi[4] = 0.9676;
187 }
188 else if (nbclass == 7) {
189 lequi[0] = -1.068;
190 lequi[1] = -0.566;
191 lequi[2] = -0.18;
192 lequi[3] = 0.18;
193 lequi[4] = 0.566;
194 lequi[5] = 1.068;
195 }
196 else if (nbclass == 8) {
197 lequi[0] = -1.1507;
198 lequi[1] = -0.6745;
199 lequi[2] = -0.3187;
200 lequi[3] = 0;
201 lequi[4] = 0.3187;
202 lequi[5] = 0.6745;
203 lequi[6] = 1.1507;
204 }
205 else if (nbclass == 9) {
206 lequi[0] = -1.2208;
207 lequi[1] = -0.7648;
208 lequi[2] = -0.4385;
209 lequi[3] = -0.1397;
210 lequi[4] = 0.1397;
211 lequi[5] = 0.4385;
212 lequi[6] = 0.7648;
213 lequi[7] = 1.2208;
214 }
215 else if (nbclass == 10) {
216 lequi[0] = -1.28155;
217 lequi[1] = -0.84162;
218 lequi[2] = -0.5244;
219 lequi[3] = -0.25335;
220 lequi[4] = 0;
221 lequi[5] = 0.25335;
222 lequi[6] = 0.5244;
223 lequi[7] = 0.84162;
224 lequi[8] = 1.28155;
225 }
226 else {
227 G_fatal_error(_("Equiprobable classbreaks currently limited to 10 classes"));
228 }
229
230 AS_basic_stats(data, count, &stats);
231
232 /* Check if any of the classbreaks would fall outside of the range min-max */
233 j = 0;
234 for (i = 0; i < *nbreaks; i++) {
235 if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
236 (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
237 j++;
238 }
239 }
240
241 if (j < (*nbreaks)) {
242 G_warning(_("There are classbreaks outside the range min-max. Number of "
243 "classes reduced to %i, but using probabilities for %i classes."),
244 j + 1, *nbreaks + 1);
245 G_realloc(classbreaks, j * sizeof(double));
246 for (i = 0; i < j; i++)
247 classbreaks[i] = 0;
248 }
249
250 j = 0;
251 for (i = 0; i < *nbreaks; i++) {
252 if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
253 (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
254 classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
255 j++;
256 }
257 }
258
259 *nbreaks = j;
260
261 G_free(lequi);
262 return (1);
263 }
264
265 /* FIXME: there seems to a problem with array overflow, probably due to
266 the fact that the code was ported from fortran which has 1-based arrays*/
AS_class_discont(double * data,int count,int nbreaks,double * classbreaks)267 double AS_class_discont(double *data, int count, int nbreaks,
268 double *classbreaks)
269 {
270 int *num, nbclass;
271 double *no, *zz, *nz, *xn, *co;
272 double *x; /* Vector standardized observations */
273 int i, j, k;
274 double min = 0, max = 0, rangemax = 0;
275 int n = 0;
276 double rangemin = 0, xlim = 0;
277 double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0;
278 int nf = 0, nmax = 0;
279 double *abc;
280 int nd = 0;
281 double den = 0, d = 0;
282 int im = 0, ji = 0;
283 int tmp = 0;
284 int nff = 0, jj = 0, no1 = 0, no2 = 0;
285 double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;
286
287
288 /*get the number of values */
289 n = count;
290
291 nbclass = nbreaks + 1;
292
293 num = G_malloc((nbclass + 1) * sizeof(int));
294 no = G_malloc((nbclass + 1) * sizeof(double));
295 zz = G_malloc((nbclass + 1) * sizeof(double));
296 nz = G_malloc(3 * sizeof(double));
297 xn = G_malloc((n + 1) * sizeof(double));
298 co = G_malloc((nbclass + 1) * sizeof(double));
299
300 /* We copy the array of values to x, in order to be able to standardize it */
301 x = G_malloc((n + 1) * sizeof(double));
302 x[0] = n;
303 xn[0] = 0;
304
305 min = data[0];
306 max = data[count - 1];
307 for (i = 1; i <= n; i++)
308 x[i] = data[i - 1];
309
310 rangemax = max - min;
311 rangemin = rangemax;
312
313 for (i = 2; i <= n; i++) {
314 if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
315 rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */
316 }
317
318 /* STANDARDIZATION
319 * and creation of the number vector (xn) */
320
321 for (i = 1; i <= n; i++) {
322 x[i] = (x[i] - min) / rangemax;
323 xn[i] = i / (double)n;
324 }
325 xlim = rangemin / rangemax;
326 rangemin = rangemin / 2.0;
327
328 /* Searching for the limits */
329 num[1] = n;
330 abc = G_malloc(3 * sizeof(double));
331
332 /* Loop through possible solutions */
333 for (i = 1; i <= nbclass; i++) {
334 nmax = 0;
335 dmax = 0.0;
336 d2 = 0.0;
337 nf = 0; /*End number */
338
339 /* Loop through classes */
340 for (j = 1; j <= i; j++) {
341 nd = nf; /*Start number */
342 nf = num[j];
343 co[j] = 10e37;
344 AS_eqdrt(x, xn, nd, nf, abc);
345 den = sqrt(pow(abc[1], 2) + 1.0);
346 nd++;
347 /* Loop through observations */
348 for (k = nd; k <= nf; k++) {
349 if (abc[2] == 0.0)
350 d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
351 else
352 d = fabs(x[k] - abc[2]);
353 d2 += pow(d, 2);
354 if (x[k] - x[nd] < xlim)
355 continue;
356 if (x[nf] - x[k] < xlim)
357 continue;
358 if (d <= dmax)
359 continue;
360 dmax = d;
361 nmax = k;
362 }
363 nd--; /* A VERIFIER! */
364 if (x[nf] != x[nd]) {
365 if (nd != 0)
366 co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
367 else
368 co[j] = (xn[nf]) / (x[nf]); /* A VERIFIER! */
369 }
370 }
371 if (i == 1)
372 dd = d2;
373 p = d2 / dd;
374 for (j = 1; j <= i; j++) {
375 no[j] = num[j];
376 zz[j] = x[num[j]] * rangemax + min;
377 if (j == i)
378 continue;
379 if (co[j] > co[j + 1]) {
380 zz[j] = zz[j] + rangemin;
381 continue;
382 }
383 zz[j] = zz[j] - rangemin;
384 no[j] = no[j] - 1;
385 }
386 im = i - 1;
387 if (im != 0.0) {
388 for (j = 1; j <= im; j++) {
389 ji = i + 1 - j;
390 no[ji] -= no[ji - 1];
391 }
392 }
393 if (nmax == 0) {
394 break;
395 }
396 nff = i + 2;
397 tmp = 0;
398 for (j = 1; j <= i; j++) {
399 jj = nff - j;
400 if (num[jj - 1] < nmax) {
401 num[jj] = nmax;
402 tmp = 1;
403 break;
404 }
405 num[jj] = num[jj - 1];
406 }
407 if (tmp == 0) {
408 num[1] = nmax;
409 jj = 1;
410 }
411 if (jj == 1) {
412 xnj_1 = 0;
413 xj_1 = 0;
414 }
415 else {
416 xnj_1 = xn[num[jj - 1]];
417 xj_1 = x[num[jj - 1]];
418 }
419 no1 = (xn[num[jj]] - xnj_1) * n;
420 no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
421 f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
422 f *= n;
423 xt1 = (x[num[jj]] - xj_1) * f;
424 xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
425 if (xt2 == 0) {
426 xt2 = rangemin / 2.0 / rangemax * f;
427 xt1 -= xt2;
428 }
429 else if (xt1 * xt2 == 0) {
430 xt1 = rangemin / 2.0 / rangemax * f;
431 xt2 -= xt1;
432 }
433
434 /* calculate chi-square to indicate statistical significance of new class, i.e. how probable would it be that the new class could be the result of purely random choice */
435 if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
436 chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
437
438 }
439
440 /* Fill up classbreaks of i <=nbclass classes */
441 for (j = 0; j <= (i - 1); j++)
442 classbreaks[j] = zz[j + 1];
443
444 return (chi2);
445 }
446
AS_class_frequencies(double * data,int count,int nbreaks,double * classbreaks,int * frequencies)447 int AS_class_frequencies(double *data, int count, int nbreaks,
448 double *classbreaks, int *frequencies)
449 {
450 int i, j;
451 double min, max;
452
453 min = data[0];
454 max = data[count - 1];
455 /* count cases in all classes, except for last class */
456 i = 0;
457 for (j = 0; j < nbreaks; j++) {
458 while (data[i] <= classbreaks[j]) {
459 frequencies[j]++;
460 i++;
461 }
462 }
463
464 /*Now count cases in last class */
465 for (i = i; i < count; i++) {
466 frequencies[nbreaks]++;
467 }
468
469 return (1);
470 }
471