1 /* functions to classify sorted arrays of doubles and fill a vector of classbreaks */
2 
3 #include <grass/glocale.h>
4 #include <grass/arraystats.h>
5 
AS_option_to_algorithm(const struct Option * option)6 int AS_option_to_algorithm(const struct Option * option)
7 {
8     if (G_strcasecmp(option->answer, "int") == 0)
9         return CLASS_INTERVAL;
10     if (G_strcasecmp(option->answer, "std") == 0)
11         return CLASS_STDEV;
12     if (G_strcasecmp(option->answer, "qua") == 0)
13         return CLASS_QUANT;
14     if (G_strcasecmp(option->answer, "equ") == 0)
15         return CLASS_EQUIPROB;
16     if (G_strcasecmp(option->answer, "dis") == 0)
17         return CLASS_DISCONT;
18 
19     G_fatal_error(_("Unknown algorithm '%s'"), option->answer);
20 }
21 
AS_class_apply_algorithm(int algo,double * data,int nrec,int * nbreaks,double * classbreaks)22 double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks,
23                                 double *classbreaks)
24 {
25     double finfo = 0.0;
26 
27     switch (algo) {
28     case CLASS_INTERVAL:
29         finfo = AS_class_interval(data, nrec, *nbreaks, classbreaks);
30         break;
31     case CLASS_STDEV:
32         finfo = AS_class_stdev(data, nrec, *nbreaks, classbreaks);
33         break;
34     case CLASS_QUANT:
35         finfo = AS_class_quant(data, nrec, *nbreaks, classbreaks);
36         break;
37     case CLASS_EQUIPROB:
38         finfo = AS_class_equiprob(data, nrec, nbreaks, classbreaks);
39         break;
40     case CLASS_DISCONT:
41         /*	finfo = class_discont(data, nrec, *nbreaks, classbreaks); disabled because of bugs */
42         G_fatal_error(_("Discont algorithm currently not available because of bugs"));
43         break;
44     default:
45         break;
46     }
47 
48     if (finfo == 0)
49 	G_fatal_error(_("Classification algorithm failed"));
50 
51     return finfo;
52 }
53 
AS_class_interval(double * data,int count,int nbreaks,double * classbreaks)54 int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
55 {
56     double min, max;
57     double step;
58     int i = 0;
59 
60     min = data[0];
61     max = data[count - 1];
62 
63     step = (max - min) / (nbreaks + 1);
64 
65     for (i = 0; i < nbreaks; i++)
66 	classbreaks[i] = min + (step * (i + 1));
67 
68     return (1);
69 }
70 
AS_class_stdev(double * data,int count,int nbreaks,double * classbreaks)71 double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
72 {
73     struct GASTATS stats;
74     int i;
75     int nbclass;
76     double scale = 1.0;
77 
78     AS_basic_stats(data, count, &stats);
79 
80     nbclass = nbreaks + 1;
81 
82     if (nbclass % 2 == 1) {	/* number of classes is uneven so we center middle class on mean */
83 
84 	/* find appropriate fraction of stdev for step */
85 	i = 1;
86 	while (i) {
87 	    if (((stats.mean + stats.stdev * scale / 2) +
88 		 (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) ||
89 		((stats.mean - stats.stdev * scale / 2) -
90 		 (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min))
91 		scale = scale / 2;
92 	    else
93 		i = 0;
94 	}
95 
96 	/* classbreaks below the mean */
97 	for (i = 0; i < nbreaks / 2; i++)
98 	    classbreaks[i] =
99 		(stats.mean - stats.stdev * scale / 2) -
100 		stats.stdev * scale * (nbreaks / 2 - (i + 1));
101 	/* classbreaks above the mean */
102 	for (i = i; i < nbreaks; i++)
103 	    classbreaks[i] =
104 		(stats.mean + stats.stdev * scale / 2) +
105 		stats.stdev * scale * (i - nbreaks / 2);
106 
107     }
108     else {		/* number of classes is even so mean is a classbreak */
109 
110 	/* decide whether to use 1*stdev or 0.5*stdev as step */
111 	i = 1;
112 	while (i) {
113 	    if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
114 		 stats.max) ||
115 		((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
116 		 stats.min))
117 		scale = scale / 2;
118 	    else
119 		i = 0;
120 	}
121 
122 	/* classbreaks below the mean and on the mean */
123 	for (i = 0; i <= nbreaks / 2; i++)
124 	    classbreaks[i] =
125 		stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
126 	/* classbreaks above the mean */
127 	for (i = i; i < nbreaks; i++)
128 	    classbreaks[i] =
129 		stats.mean + stats.stdev * scale * (i - nbreaks / 2);
130     }
131 
132     return (scale);
133 }
134 
AS_class_quant(double * data,int count,int nbreaks,double * classbreaks)135 int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
136 {
137     int i, step;
138 
139     step = count / (nbreaks + 1);
140 
141     for (i = 0; i < nbreaks; i++)
142 	classbreaks[i] = data[step * (i + 1)];
143 
144     return (1);
145 }
146 
147 
AS_class_equiprob(double * data,int count,int * nbreaks,double * classbreaks)148 int AS_class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
149 {
150     int i, j;
151     double *lequi;		/*Vector of scale factors for probabilities of the normal distribution */
152     struct GASTATS stats;
153     int nbclass;
154 
155     nbclass = *nbreaks + 1;
156 
157     lequi = G_malloc(*nbreaks * sizeof(double));
158 
159     /* The following values come from the normal distribution and will be used as:
160      * classbreak[i] = (lequi[i] * stdev) + mean;
161      */
162 
163     if (nbclass < 3) {
164 	lequi[0] = 0;
165     }
166     else if (nbclass == 3) {
167 	lequi[0] = -0.43076;
168 	lequi[1] = 0.43076;
169     }
170     else if (nbclass == 4) {
171 	lequi[0] = -0.6745;
172 	lequi[1] = 0;
173 	lequi[2] = 0.6745;
174     }
175     else if (nbclass == 5) {
176 	lequi[0] = -0.8416;
177 	lequi[1] = -0.2533;
178 	lequi[2] = 0.2533;
179 	lequi[3] = 0.8416;
180     }
181     else if (nbclass == 6) {
182 	lequi[0] = -0.9676;
183 	lequi[1] = -0.43076;
184 	lequi[2] = 0;
185 	lequi[3] = 0.43076;
186 	lequi[4] = 0.9676;
187     }
188     else if (nbclass == 7) {
189 	lequi[0] = -1.068;
190 	lequi[1] = -0.566;
191 	lequi[2] = -0.18;
192 	lequi[3] = 0.18;
193 	lequi[4] = 0.566;
194 	lequi[5] = 1.068;
195     }
196     else if (nbclass == 8) {
197 	lequi[0] = -1.1507;
198 	lequi[1] = -0.6745;
199 	lequi[2] = -0.3187;
200 	lequi[3] = 0;
201 	lequi[4] = 0.3187;
202 	lequi[5] = 0.6745;
203 	lequi[6] = 1.1507;
204     }
205     else if (nbclass == 9) {
206 	lequi[0] = -1.2208;
207 	lequi[1] = -0.7648;
208 	lequi[2] = -0.4385;
209 	lequi[3] = -0.1397;
210 	lequi[4] = 0.1397;
211 	lequi[5] = 0.4385;
212 	lequi[6] = 0.7648;
213 	lequi[7] = 1.2208;
214     }
215     else if (nbclass == 10) {
216 	lequi[0] = -1.28155;
217 	lequi[1] = -0.84162;
218 	lequi[2] = -0.5244;
219 	lequi[3] = -0.25335;
220 	lequi[4] = 0;
221 	lequi[5] = 0.25335;
222 	lequi[6] = 0.5244;
223 	lequi[7] = 0.84162;
224 	lequi[8] = 1.28155;
225     }
226     else {
227 	G_fatal_error(_("Equiprobable classbreaks currently limited to 10 classes"));
228     }
229 
230     AS_basic_stats(data, count, &stats);
231 
232     /* Check if any of the classbreaks would fall outside of the range min-max */
233     j = 0;
234     for (i = 0; i < *nbreaks; i++) {
235 	if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
236 	    (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
237 	    j++;
238 	}
239     }
240 
241     if (j < (*nbreaks)) {
242 	G_warning(_("There are classbreaks outside the range min-max. Number of "
243 		    "classes reduced to %i, but using probabilities for %i classes."),
244 		  j + 1, *nbreaks + 1);
245 	G_realloc(classbreaks, j * sizeof(double));
246 	for (i = 0; i < j; i++)
247 	    classbreaks[i] = 0;
248     }
249 
250     j = 0;
251     for (i = 0; i < *nbreaks; i++) {
252 	if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
253 	    (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
254 	    classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
255 	    j++;
256 	}
257     }
258 
259     *nbreaks = j;
260 
261     G_free(lequi);
262     return (1);
263 }
264 
265 /* FIXME: there seems to a problem with array overflow, probably due to
266    the fact that the code was ported from fortran which has 1-based arrays*/
AS_class_discont(double * data,int count,int nbreaks,double * classbreaks)267 double AS_class_discont(double *data, int count, int nbreaks,
268 		     double *classbreaks)
269 {
270     int *num, nbclass;
271     double *no, *zz, *nz, *xn, *co;
272     double *x;		/* Vector standardized observations */
273     int i, j, k;
274     double min = 0, max = 0, rangemax = 0;
275     int n = 0;
276     double rangemin = 0, xlim = 0;
277     double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0;
278     int nf = 0, nmax = 0;
279     double *abc;
280     int nd = 0;
281     double den = 0, d = 0;
282     int im = 0, ji = 0;
283     int tmp = 0;
284     int nff = 0, jj = 0, no1 = 0, no2 = 0;
285     double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;
286 
287 
288     /*get the number of values */
289     n = count;
290 
291     nbclass = nbreaks + 1;
292 
293     num = G_malloc((nbclass + 1) * sizeof(int));
294     no = G_malloc((nbclass + 1) * sizeof(double));
295     zz = G_malloc((nbclass + 1) * sizeof(double));
296     nz = G_malloc(3 * sizeof(double));
297     xn = G_malloc((n + 1) * sizeof(double));
298     co = G_malloc((nbclass + 1) * sizeof(double));
299 
300     /* We copy the array of values to x, in order to be able to standardize it */
301     x = G_malloc((n + 1) * sizeof(double));
302     x[0] = n;
303     xn[0] = 0;
304 
305     min = data[0];
306     max = data[count - 1];
307     for (i = 1; i <= n; i++)
308 	x[i] = data[i - 1];
309 
310     rangemax = max - min;
311     rangemin = rangemax;
312 
313     for (i = 2; i <= n; i++) {
314 	if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
315 	    rangemin = x[i] - x[i - 1];	/* rangemin = minimal distance */
316     }
317 
318     /* STANDARDIZATION
319      * and creation of the number vector (xn) */
320 
321     for (i = 1; i <= n; i++) {
322 	x[i] = (x[i] - min) / rangemax;
323 	xn[i] = i / (double)n;
324     }
325     xlim = rangemin / rangemax;
326     rangemin = rangemin / 2.0;
327 
328     /* Searching for the limits */
329     num[1] = n;
330     abc = G_malloc(3 * sizeof(double));
331 
332     /*     Loop through possible solutions */
333     for (i = 1; i <= nbclass; i++) {
334 	nmax = 0;
335 	dmax = 0.0;
336 	d2 = 0.0;
337 	nf = 0;			/*End number */
338 
339 	/*           Loop through classes */
340 	for (j = 1; j <= i; j++) {
341 	    nd = nf;		/*Start number */
342 	    nf = num[j];
343 	    co[j] = 10e37;
344 	    AS_eqdrt(x, xn, nd, nf, abc);
345 	    den = sqrt(pow(abc[1], 2) + 1.0);
346 	    nd++;
347 	    /*              Loop through observations */
348 	    for (k = nd; k <= nf; k++) {
349 		if (abc[2] == 0.0)
350 		    d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
351 		else
352 		    d = fabs(x[k] - abc[2]);
353 		d2 += pow(d, 2);
354 		if (x[k] - x[nd] < xlim)
355 		    continue;
356 		if (x[nf] - x[k] < xlim)
357 		    continue;
358 		if (d <= dmax)
359 		    continue;
360 		dmax = d;
361 		nmax = k;
362 	    }
363 	    nd--;		/* A VERIFIER! */
364 	    if (x[nf] != x[nd]) {
365 		if (nd != 0)
366 		    co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
367 		else
368 		    co[j] = (xn[nf]) / (x[nf]);	/* A VERIFIER! */
369 	    }
370 	}
371 	if (i == 1)
372 	    dd = d2;
373 	p = d2 / dd;
374 	for (j = 1; j <= i; j++) {
375 	    no[j] = num[j];
376 	    zz[j] = x[num[j]] * rangemax + min;
377 	    if (j == i)
378 		continue;
379 	    if (co[j] > co[j + 1]) {
380 		zz[j] = zz[j] + rangemin;
381 		continue;
382 	    }
383 	    zz[j] = zz[j] - rangemin;
384 	    no[j] = no[j] - 1;
385 	}
386 	im = i - 1;
387 	if (im != 0.0) {
388 	    for (j = 1; j <= im; j++) {
389 		ji = i + 1 - j;
390 		no[ji] -= no[ji - 1];
391 	    }
392 	}
393 	if (nmax == 0) {
394 	    break;
395 	}
396 	nff = i + 2;
397 	tmp = 0;
398 	for (j = 1; j <= i; j++) {
399 	    jj = nff - j;
400 	    if (num[jj - 1] < nmax) {
401 		num[jj] = nmax;
402 		tmp = 1;
403 		break;
404 	    }
405 	    num[jj] = num[jj - 1];
406 	}
407 	if (tmp == 0) {
408 	    num[1] = nmax;
409 	    jj = 1;
410 	}
411 	if (jj == 1) {
412 	    xnj_1 = 0;
413 	    xj_1 = 0;
414 	}
415 	else {
416 	    xnj_1 = xn[num[jj - 1]];
417 	    xj_1 = x[num[jj - 1]];
418 	}
419 	no1 = (xn[num[jj]] - xnj_1) * n;
420 	no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
421 	f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
422 	f *= n;
423 	xt1 = (x[num[jj]] - xj_1) * f;
424 	xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
425 	if (xt2 == 0) {
426 	    xt2 = rangemin / 2.0 / rangemax * f;
427 	    xt1 -= xt2;
428 	}
429 	else if (xt1 * xt2 == 0) {
430 	    xt1 = rangemin / 2.0 / rangemax * f;
431 	    xt2 -= xt1;
432 	}
433 
434 	/* calculate chi-square to indicate statistical significance of new class, i.e. how probable would it be that the new class could be the result of purely random choice */
435 	if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
436 	    chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
437 
438     }
439 
440     /*  Fill up classbreaks of i <=nbclass classes */
441     for (j = 0; j <= (i - 1); j++)
442 	classbreaks[j] = zz[j + 1];
443 
444     return (chi2);
445 }
446 
AS_class_frequencies(double * data,int count,int nbreaks,double * classbreaks,int * frequencies)447 int AS_class_frequencies(double *data, int count, int nbreaks,
448 		      double *classbreaks, int *frequencies)
449 {
450     int i, j;
451     double min, max;
452 
453     min = data[0];
454     max = data[count - 1];
455     /* count cases in all classes, except for last class */
456     i = 0;
457     for (j = 0; j < nbreaks; j++) {
458 	while (data[i] <= classbreaks[j]) {
459 	    frequencies[j]++;
460 	    i++;
461 	}
462     }
463 
464     /*Now count cases in last class */
465     for (i = i; i < count; i++) {
466 	frequencies[nbreaks]++;
467     }
468 
469     return (1);
470 }
471