1 /*
2 * r.in.xyz
3 *
4 * Calculates univariate statistics from the non-null cells of a GRASS raster map
5 *
6 * Copyright 2006-2014 by M. Hamish Bowman, and The GRASS Development Team
7 * Author: M. Hamish Bowman, University of Otago, Dunedin, New Zealand
8 *
9 * Extended 2007 by Volker Wichmann to support the aggregate functions
10 * median, percentile, skewness and trimmed mean
11 *
12 * This program is free software licensed under the GPL (>=v2).
13 * Read the COPYING file that comes with GRASS for details.
14 *
15 * This program is intended as a replacement for the GRASS 5 s.cellstats module.
16 */
17
18 #include <grass/config.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <math.h>
23 #include <sys/types.h>
24 #include <grass/gis.h>
25 #include <grass/raster.h>
26 #include <grass/glocale.h>
27 #include "local_proto.h"
28
29 struct node
30 {
31 int next;
32 double z;
33 };
34
35 #define SIZE_INCREMENT 10
36 int num_nodes = 0;
37 int max_nodes = 0;
38 struct node *nodes;
39
40
new_node(void)41 int new_node(void)
42 {
43 int n = num_nodes++;
44
45 if (num_nodes >= max_nodes) {
46 max_nodes += SIZE_INCREMENT;
47 nodes = G_realloc(nodes, (size_t)max_nodes * sizeof(struct node));
48 }
49
50 return n;
51 }
52
53
54 /* add node to sorted, single linked list
55 * returns id if head has to be saved to index array, otherwise -1 */
add_node(int head,double z)56 int add_node(int head, double z)
57 {
58 int node_id, last_id, newnode_id, head_id;
59
60 head_id = head;
61 node_id = head_id;
62 last_id = head_id;
63
64 while (node_id != -1 && nodes[node_id].z < z) {
65 last_id = node_id;
66 node_id = nodes[node_id].next;
67 }
68
69 /* end of list, simply append */
70 if (node_id == -1) {
71 newnode_id = new_node();
72 nodes[newnode_id].next = -1;
73 nodes[newnode_id].z = z;
74 nodes[last_id].next = newnode_id;
75 return -1;
76 }
77 else if (node_id == head_id) { /* pole position, insert as head */
78 newnode_id = new_node();
79 nodes[newnode_id].next = head_id;
80 head_id = newnode_id;
81 nodes[newnode_id].z = z;
82 return (head_id);
83 }
84 else { /* somewhere in the middle, insert */
85 newnode_id = new_node();
86 nodes[newnode_id].z = z;
87 nodes[newnode_id].next = node_id;
88 nodes[last_id].next = newnode_id;
89 return -1;
90 }
91 }
92
93
94
main(int argc,char * argv[])95 int main(int argc, char *argv[])
96 {
97
98 FILE *in_fp;
99 int out_fd;
100 char *infile, *outmap;
101 int xcol, ycol, zcol, vcol, max_col, percent, skip_lines;
102 int method = -1;
103 int bin_n, bin_min, bin_max, bin_sum, bin_sumsq, bin_index;
104 double zrange_min, zrange_max, vrange_min, vrange_max, d_tmp;
105 char *fs; /* field delim */
106 off_t filesize;
107 int linesize;
108 unsigned long estimated_lines, line;
109 int from_stdin;
110 int can_seek;
111
112 RASTER_MAP_TYPE rtype;
113 struct History history;
114 char title[64];
115 void *n_array, *min_array, *max_array, *sum_array, *sumsq_array,
116 *index_array;
117 void *raster_row, *ptr;
118 struct Cell_head region;
119 int rows, last_rows, row0, cols; /* scan box size */
120 int row, col; /* counters */
121
122 int pass, npasses;
123 char buff[BUFFSIZE];
124 double x, y, z;
125 char **tokens;
126 int ntokens; /* number of tokens */
127 int arr_row, arr_col;
128 unsigned long count, count_total;
129
130 double min = 0.0 / 0.0; /* init as nan */
131 double max = 0.0 / 0.0; /* init as nan */
132 double zscale = 1.0;
133 double vscale = 1.0;
134 size_t offset, n_offset;
135 int n = 0;
136 double sum = 0.;
137 double sumsq = 0.;
138 double variance, mean, skew, sumdev;
139 int pth = 0;
140 double trim = 0.0;
141
142 int j, k;
143 int head_id, node_id;
144 int r_low, r_up;
145
146 struct GModule *module;
147 struct Option *input_opt, *output_opt, *delim_opt, *percent_opt,
148 *type_opt;
149 struct Option *method_opt, *xcol_opt, *ycol_opt, *zcol_opt, *zrange_opt,
150 *zscale_opt, *vcol_opt, *vrange_opt, *vscale_opt, *skip_opt;
151 struct Option *trim_opt, *pth_opt;
152 struct Flag *scan_flag, *shell_style, *skipline;
153
154
155 G_gisinit(argv[0]);
156
157 module = G_define_module();
158 G_add_keyword(_("raster"));
159 G_add_keyword(_("import"));
160 G_add_keyword(_("statistics"));
161 G_add_keyword(_("conversion"));
162 G_add_keyword(_("aggregation"));
163 G_add_keyword(_("binning"));
164 G_add_keyword("ASCII");
165 G_add_keyword(_("LIDAR"));
166 module->description =
167 _("Creates a raster map from an assemblage of many coordinates using univariate statistics.");
168
169 input_opt = G_define_standard_option(G_OPT_F_BIN_INPUT);
170 input_opt->description =
171 _("ASCII file containing input data (or \"-\" to read from stdin)");
172
173 output_opt = G_define_standard_option(G_OPT_R_OUTPUT);
174
175 method_opt = G_define_option();
176 method_opt->key = "method";
177 method_opt->type = TYPE_STRING;
178 method_opt->required = NO;
179 method_opt->description = _("Statistic to use for raster values");
180 method_opt->options =
181 "n,min,max,range,sum,mean,stddev,variance,coeff_var,median,percentile,skewness,trimmean";
182 method_opt->answer = "mean";
183 method_opt->guisection = _("Statistic");
184 G_asprintf((char **)&(method_opt->descriptions),
185 "n;%s;"
186 "min;%s;"
187 "max;%s;"
188 "range;%s;"
189 "sum;%s;"
190 "mean;%s;"
191 "stddev;%s;"
192 "variance;%s;"
193 "coeff_var;%s;"
194 "median;%s;"
195 "percentile;%s;"
196 "skewness;%s;"
197 "trimmean;%s",
198 _("Number of points in cell"),
199 _("Minimum value of point values in cell"),
200 _("Maximum value of point values in cell"),
201 _("Range of point values in cell"),
202 _("Sum of point values in cell"),
203 _("Mean (average) value of point values in cell"),
204 _("Standard deviation of point values in cell"),
205 _("Variance of point values in cell"),
206 _("Coefficient of variance of point values in cell"),
207 _("Median value of point values in cell"),
208 _("Pth (nth) percentile of point values in cell"),
209 _("Skewness of point values in cell"),
210 _("Trimmed mean of point values in cell"));
211
212 delim_opt = G_define_standard_option(G_OPT_F_SEP);
213 delim_opt->guisection = _("Input");
214
215 xcol_opt = G_define_option();
216 xcol_opt->key = "x";
217 xcol_opt->type = TYPE_INTEGER;
218 xcol_opt->required = NO;
219 xcol_opt->answer = "1";
220 xcol_opt->description =
221 _("Column number of x coordinates in input file (first column is 1)");
222 xcol_opt->guisection = _("Input");
223
224 ycol_opt = G_define_option();
225 ycol_opt->key = "y";
226 ycol_opt->type = TYPE_INTEGER;
227 ycol_opt->required = NO;
228 ycol_opt->answer = "2";
229 ycol_opt->description = _("Column number of y coordinates in input file");
230 ycol_opt->guisection = _("Input");
231
232 zcol_opt = G_define_option();
233 zcol_opt->key = "z";
234 zcol_opt->type = TYPE_INTEGER;
235 zcol_opt->required = NO;
236 zcol_opt->answer = "3";
237 zcol_opt->label = _("Column number of data values in input file");
238 zcol_opt->description =
239 _("If a separate value column is given, this option refers to the "
240 "z-coordinate column to be filtered by the zrange option");
241 zcol_opt->guisection = _("Input");
242
243 skip_opt = G_define_option();
244 skip_opt->key = "skip";
245 skip_opt->type = TYPE_INTEGER;
246 skip_opt->required = NO;
247 skip_opt->multiple = NO;
248 skip_opt->answer = "0";
249 skip_opt->description =
250 _("Number of header lines to skip at top of input file");
251 skip_opt->guisection = _("Input");
252
253 zrange_opt = G_define_option();
254 zrange_opt->key = "zrange";
255 zrange_opt->type = TYPE_DOUBLE;
256 zrange_opt->required = NO;
257 zrange_opt->key_desc = "min,max";
258 zrange_opt->description = _("Filter range for z data (min,max)");
259 zrange_opt->guisection = _("Advanced Input");
260
261 zscale_opt = G_define_option();
262 zscale_opt->key = "zscale";
263 zscale_opt->type = TYPE_DOUBLE;
264 zscale_opt->required = NO;
265 zscale_opt->answer = "1.0";
266 zscale_opt->description = _("Scale to apply to z data");
267 zscale_opt->guisection = _("Advanced Input");
268
269 vcol_opt = G_define_option();
270 vcol_opt->key = "value_column";
271 vcol_opt->type = TYPE_INTEGER;
272 vcol_opt->required = NO;
273 vcol_opt->answer = "0";
274 vcol_opt->label = _("Alternate column number of data values in input file");
275 vcol_opt->description = _("If not given (or set to 0) the z-column data is used");
276 vcol_opt->guisection = _("Advanced Input");
277
278 vrange_opt = G_define_option();
279 vrange_opt->key = "vrange";
280 vrange_opt->type = TYPE_DOUBLE;
281 vrange_opt->required = NO;
282 vrange_opt->key_desc = "min,max";
283 vrange_opt->description = _("Filter range for alternate value column data (min,max)");
284 vrange_opt->guisection = _("Advanced Input");
285
286 vscale_opt = G_define_option();
287 vscale_opt->key = "vscale";
288 vscale_opt->type = TYPE_DOUBLE;
289 vscale_opt->required = NO;
290 vscale_opt->answer = "1.0";
291 vscale_opt->description = _("Scale to apply to alternate value column data");
292 vscale_opt->guisection = _("Advanced Input");
293
294 type_opt = G_define_standard_option(G_OPT_R_TYPE);
295 type_opt->required = NO;
296 type_opt->answer = "FCELL";
297 type_opt->guisection = _("Output");
298
299 percent_opt = G_define_option();
300 percent_opt->key = "percent";
301 percent_opt->type = TYPE_INTEGER;
302 percent_opt->required = NO;
303 percent_opt->answer = "100";
304 percent_opt->options = "1-100";
305 percent_opt->description = _("Percent of map to keep in memory");
306
307 /* I would prefer to call the following "percentile", but that has too
308 * much namespace overlap with the "percent" option above */
309 pth_opt = G_define_option();
310 pth_opt->key = "pth";
311 pth_opt->type = TYPE_INTEGER;
312 pth_opt->required = NO;
313 pth_opt->options = "1-100";
314 pth_opt->description = _("Pth percentile of the values");
315 pth_opt->guisection = _("Statistic");
316
317 trim_opt = G_define_option();
318 trim_opt->key = "trim";
319 trim_opt->type = TYPE_DOUBLE;
320 trim_opt->required = NO;
321 trim_opt->options = "0-50";
322 trim_opt->description =
323 _("Discard <trim> percent of the smallest and <trim> percent of the largest observations");
324 trim_opt->guisection = _("Statistic");
325
326 scan_flag = G_define_flag();
327 scan_flag->key = 's';
328 scan_flag->description = _("Scan data file for extent then exit");
329 scan_flag->guisection = _("Scan");
330 scan_flag->suppress_required = YES;
331
332 shell_style = G_define_flag();
333 shell_style->key = 'g';
334 shell_style->description =
335 _("In scan mode, print using shell script style");
336 shell_style->guisection = _("Scan");
337 shell_style->suppress_required = YES;
338
339 skipline = G_define_flag();
340 skipline->key = 'i';
341 skipline->description = _("Ignore broken lines");
342
343 G_option_required(output_opt, scan_flag, shell_style, NULL);
344 G_option_requires(scan_flag, input_opt, NULL);
345 G_option_requires(shell_style, input_opt, NULL);
346
347 if (G_parser(argc, argv))
348 exit(EXIT_FAILURE);
349
350
351 /* parse input values */
352 infile = input_opt->answer;
353 outmap = output_opt->answer;
354
355 if (shell_style->answer && !scan_flag->answer) {
356 scan_flag->answer = 1; /* pointer not int, so set = shell_style->answer ? */
357 }
358
359 fs = G_option_to_separator(delim_opt);
360
361 xcol = atoi(xcol_opt->answer);
362 ycol = atoi(ycol_opt->answer);
363 zcol = atoi(zcol_opt->answer);
364 vcol = atoi(vcol_opt->answer);
365 if ((xcol < 0) || (ycol < 0) || (zcol < 0) || (vcol < 0))
366 G_fatal_error(_("Please specify a reasonable column number."));
367 max_col = (xcol > ycol) ? xcol : ycol;
368 max_col = (zcol > max_col) ? zcol : max_col;
369 if(vcol)
370 max_col = (vcol > max_col) ? vcol : max_col;
371
372 percent = atoi(percent_opt->answer);
373 zscale = atof(zscale_opt->answer);
374 vscale = atof(vscale_opt->answer);
375
376 skip_lines = atoi(skip_opt->answer);
377 if (skip_lines < 0)
378 G_fatal_error(_("Please specify reasonable number of lines to skip"));
379
380 /* parse zrange and vrange */
381 if (zrange_opt->answer != NULL) {
382 if (zrange_opt->answers[0] == NULL)
383 G_fatal_error(_("Invalid zrange"));
384
385 sscanf(zrange_opt->answers[0], "%lf", &zrange_min);
386 sscanf(zrange_opt->answers[1], "%lf", &zrange_max);
387
388 if (zrange_min > zrange_max) {
389 d_tmp = zrange_max;
390 zrange_max = zrange_min;
391 zrange_min = d_tmp;
392 }
393 }
394
395 if (vrange_opt->answer != NULL) {
396 if (vrange_opt->answers[0] == NULL)
397 G_fatal_error(_("Invalid vrange"));
398
399 sscanf(vrange_opt->answers[0], "%lf", &vrange_min);
400 sscanf(vrange_opt->answers[1], "%lf", &vrange_max);
401
402 if (vrange_min > vrange_max) {
403 d_tmp = vrange_max;
404 vrange_max = vrange_min;
405 vrange_min = d_tmp;
406 }
407 }
408
409 /* figure out what maps we need in memory */
410 /* n n
411 min min
412 max max
413 range min max max - min
414 sum sum
415 mean sum n sum/n
416 stddev sum sumsq n sqrt((sumsq - sum*sum/n)/n)
417 variance sum sumsq n (sumsq - sum*sum/n)/n
418 coeff_var sum sumsq n sqrt((sumsq - sum*sum/n)/n) / (sum/n)
419 median n array index to linked list
420 percentile n array index to linked list
421 skewness n array index to linked list
422 trimmean n array index to linked list
423 */
424 bin_n = FALSE;
425 bin_min = FALSE;
426 bin_max = FALSE;
427 bin_sum = FALSE;
428 bin_sumsq = FALSE;
429 bin_index = FALSE;
430
431 if (strcmp(method_opt->answer, "n") == 0) {
432 method = METHOD_N;
433 bin_n = TRUE;
434 }
435 if (strcmp(method_opt->answer, "min") == 0) {
436 method = METHOD_MIN;
437 bin_min = TRUE;
438 }
439 if (strcmp(method_opt->answer, "max") == 0) {
440 method = METHOD_MAX;
441 bin_max = TRUE;
442 }
443 if (strcmp(method_opt->answer, "range") == 0) {
444 method = METHOD_RANGE;
445 bin_min = TRUE;
446 bin_max = TRUE;
447 }
448 if (strcmp(method_opt->answer, "sum") == 0) {
449 method = METHOD_SUM;
450 bin_sum = TRUE;
451 }
452 if (strcmp(method_opt->answer, "mean") == 0) {
453 method = METHOD_MEAN;
454 bin_sum = TRUE;
455 bin_n = TRUE;
456 }
457 if (strcmp(method_opt->answer, "stddev") == 0) {
458 method = METHOD_STDDEV;
459 bin_sum = TRUE;
460 bin_sumsq = TRUE;
461 bin_n = TRUE;
462 }
463 if (strcmp(method_opt->answer, "variance") == 0) {
464 method = METHOD_VARIANCE;
465 bin_sum = TRUE;
466 bin_sumsq = TRUE;
467 bin_n = TRUE;
468 }
469 if (strcmp(method_opt->answer, "coeff_var") == 0) {
470 method = METHOD_COEFF_VAR;
471 bin_sum = TRUE;
472 bin_sumsq = TRUE;
473 bin_n = TRUE;
474 }
475 if (strcmp(method_opt->answer, "median") == 0) {
476 method = METHOD_MEDIAN;
477 bin_index = TRUE;
478 }
479 if (strcmp(method_opt->answer, "percentile") == 0) {
480 if (pth_opt->answer != NULL)
481 pth = atoi(pth_opt->answer);
482 else
483 G_fatal_error(_("Unable to calculate percentile without the pth option specified!"));
484 method = METHOD_PERCENTILE;
485 bin_index = TRUE;
486 }
487 if (strcmp(method_opt->answer, "skewness") == 0) {
488 method = METHOD_SKEWNESS;
489 bin_index = TRUE;
490 }
491 if (strcmp(method_opt->answer, "trimmean") == 0) {
492 if (trim_opt->answer != NULL)
493 trim = atof(trim_opt->answer) / 100.0;
494 else
495 G_fatal_error(_("Unable to calculate trimmed mean without the trim option specified!"));
496 method = METHOD_TRIMMEAN;
497 bin_index = TRUE;
498 }
499
500 if (strcmp("CELL", type_opt->answer) == 0)
501 rtype = CELL_TYPE;
502 else if (strcmp("DCELL", type_opt->answer) == 0)
503 rtype = DCELL_TYPE;
504 else
505 rtype = FCELL_TYPE;
506
507 if (method == METHOD_N)
508 rtype = CELL_TYPE;
509
510
511 G_get_window(®ion);
512 rows = last_rows = region.rows;
513 npasses = 1;
514 if (percent < 100) {
515 rows = (int)(region.rows * (percent / 100.0));
516 npasses = region.rows / rows;
517 last_rows = region.rows - npasses * rows;
518 if (last_rows)
519 npasses++;
520 else
521 last_rows = rows;
522 }
523 cols = region.cols;
524
525 G_debug(2, "region.n=%f region.s=%f region.ns_res=%f", region.north,
526 region.south, region.ns_res);
527 G_debug(2, "region.rows=%d [box_rows=%d] region.cols=%d", region.rows,
528 rows, region.cols);
529
530 if (!scan_flag->answer) {
531 /* check if rows * (cols + 1) go into a size_t */
532 if (sizeof(size_t) < 8) {
533 double dsize = rows * (cols + 1);
534
535 if (dsize != (size_t)rows * (cols + 1))
536 G_fatal_error(_("Unable to process the whole map at once. "
537 "Please set the %s option to some value lower than 100."),
538 percent_opt->key);
539 }
540 /* allocate memory (test for enough before we start) */
541 if (bin_n)
542 n_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(CELL_TYPE));
543 if (bin_min)
544 min_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(rtype));
545 if (bin_max)
546 max_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(rtype));
547 if (bin_sum)
548 sum_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(rtype));
549 if (bin_sumsq)
550 sumsq_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(rtype));
551 if (bin_index)
552 index_array =
553 G_calloc((size_t)rows * (cols + 1), Rast_cell_size(CELL_TYPE));
554
555 /* and then free it again */
556 if (bin_n)
557 G_free(n_array);
558 if (bin_min)
559 G_free(min_array);
560 if (bin_max)
561 G_free(max_array);
562 if (bin_sum)
563 G_free(sum_array);
564 if (bin_sumsq)
565 G_free(sumsq_array);
566 if (bin_index)
567 G_free(index_array);
568
569 /** end memory test **/
570 }
571
572
573 /* open input file */
574 if (strcmp("-", infile) == 0) {
575 from_stdin = TRUE;
576 in_fp = stdin;
577 infile = G_store("stdin"); /* filename for history metadata */
578 }
579 else {
580 from_stdin = FALSE;
581 if ((in_fp = fopen(infile, "r")) == NULL)
582 G_fatal_error(_("Unable to open input file <%s>"), infile);
583 }
584
585 can_seek = fseek(in_fp, 0L, SEEK_SET) == 0;
586
587 /* can't rewind() non-files */
588 if (!can_seek && npasses != 1) {
589 G_warning(_("If input is not from a file it is only possible to perform a single pass."));
590 npasses = 1;
591 }
592
593 /* skip past header lines */
594 for (line = 0; line < (unsigned long)skip_lines; line++) {
595 if (0 == G_getl2(buff, BUFFSIZE - 1, in_fp))
596 break;
597 }
598
599 if (scan_flag->answer) {
600 if (zrange_opt->answer || vrange_opt->answer)
601 G_warning(_("Range filters will not be taken into account during scan"));
602
603 scan_bounds(in_fp, xcol, ycol, zcol, vcol, fs, shell_style->answer,
604 skipline->answer, zscale, vscale);
605
606 /* close input file */
607 if (!from_stdin)
608 fclose(in_fp);
609
610 exit(EXIT_SUCCESS);
611 }
612
613
614 /* open output map */
615 out_fd = Rast_open_new(outmap, rtype);
616
617 if (can_seek) {
618 /* guess at number of lines in the file without actually reading it all in */
619 for (line = 0; line < 10; line++) { /* arbitrarily use 10th line for guess */
620 if (0 == G_getl2(buff, BUFFSIZE - 1, in_fp))
621 break;
622 linesize = strlen(buff) + 1;
623 }
624 G_fseek(in_fp, 0L, SEEK_END);
625 filesize = G_ftell(in_fp);
626 rewind(in_fp);
627 if (linesize < 6) /* min possible: "0,0,0\n" */
628 linesize = 6;
629 estimated_lines = filesize / linesize;
630 G_debug(2, "estimated number of lines in file: %lu", estimated_lines);
631 }
632 else
633 estimated_lines = -1;
634
635 /* allocate memory for a single row of output data */
636 raster_row = Rast_allocate_buf(rtype);
637
638 G_message(_("Reading input data..."));
639
640 count_total = 0;
641
642 /* main binning loop(s) */
643 for (pass = 1; pass <= npasses; pass++) {
644 if (npasses > 1)
645 G_message(_("Pass #%d (of %d) ..."), pass, npasses);
646
647 if (can_seek) {
648 rewind(in_fp);
649
650 /* skip past header lines again */
651 for (line = 0; line < (unsigned long)skip_lines; line++) {
652 if (0 == G_getl2(buff, BUFFSIZE - 1, in_fp))
653 break;
654 }
655 }
656
657 /* figure out segmentation */
658 row0 = (pass - 1) * rows;
659 if (pass == npasses) {
660 rows = last_rows;
661 }
662
663 G_debug(2, "pass=%d/%d rows=%d", pass, npasses, rows);
664
665 if (bin_n) {
666 G_debug(2, "allocating n_array");
667 n_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(CELL_TYPE));
668 blank_array(n_array, rows, cols, CELL_TYPE, 0);
669 }
670 if (bin_min) {
671 G_debug(2, "allocating min_array");
672 min_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(rtype));
673 blank_array(min_array, rows, cols, rtype, -1); /* fill with NULLs */
674 }
675 if (bin_max) {
676 G_debug(2, "allocating max_array");
677 max_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(rtype));
678 blank_array(max_array, rows, cols, rtype, -1); /* fill with NULLs */
679 }
680 if (bin_sum) {
681 G_debug(2, "allocating sum_array");
682 sum_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(rtype));
683 blank_array(sum_array, rows, cols, rtype, 0);
684 }
685 if (bin_sumsq) {
686 G_debug(2, "allocating sumsq_array");
687 sumsq_array = G_calloc((size_t)rows * (cols + 1), Rast_cell_size(rtype));
688 blank_array(sumsq_array, rows, cols, rtype, 0);
689 }
690 if (bin_index) {
691 G_debug(2, "allocating index_array");
692 index_array =
693 G_calloc((size_t)rows * (cols + 1), Rast_cell_size(CELL_TYPE));
694 blank_array(index_array, rows, cols, CELL_TYPE, -1); /* fill with NULLs */
695 }
696
697 line = 0;
698 count = 0;
699 G_percent_reset();
700
701 while (0 != G_getl2(buff, BUFFSIZE - 1, in_fp)) {
702 line++;
703
704 if (line % 100000 == 0) { /* mod for speed */
705 if (!can_seek)
706 G_clicker();
707 else if (line < estimated_lines)
708 G_percent(line, estimated_lines, 3);
709 }
710
711 if ((buff[0] == '#') || (buff[0] == '\0')) {
712 continue; /* line is a comment or blank */
713 }
714
715 G_chop(buff); /* remove leading and trailing whitespace from the string. unneded?? */
716 tokens = G_tokenize(buff, fs);
717 ntokens = G_number_of_tokens(tokens);
718
719 if ((ntokens < 3) || (max_col > ntokens)) {
720 if (skipline->answer) {
721 G_warning(_("Not enough data columns. "
722 "Incorrect delimiter or column number? "
723 "Found the following character(s) in row %lu:\n[%s]"),
724 line, buff);
725 G_warning(_("Line ignored as requested"));
726 continue; /* line is garbage */
727 }
728 else {
729 G_fatal_error(_("Not enough data columns. "
730 "Incorrect delimiter or column number? "
731 "Found the following character(s) in row %lu:\n[%s]"),
732 line, buff);
733 }
734 }
735
736 /* too slow?
737 if ( G_projection() == PROJECTION_LL ) {
738 G_scan_easting( tokens[xcol-1], &x, region.proj);
739 G_scan_northing( tokens[ycol-1], &y, region.proj);
740 }
741 else {
742 */
743 if (1 != sscanf(tokens[ycol - 1], "%lf", &y))
744 G_fatal_error(_("Bad y-coordinate line %lu column %d. <%s>"),
745 line, ycol, tokens[ycol - 1]);
746 if (y <= region.south || y > region.north) {
747 G_free_tokens(tokens);
748 continue;
749 }
750 if (1 != sscanf(tokens[xcol - 1], "%lf", &x))
751 G_fatal_error(_("Bad x-coordinate line %lu column %d. <%s>"),
752 line, xcol, tokens[xcol - 1]);
753 if (x < region.west || x >= region.east) {
754 G_free_tokens(tokens);
755 continue;
756 }
757
758 /* find the bin in the current array box */
759 arr_row = (int)((region.north - y) / region.ns_res) - row0;
760 if (arr_row < 0 || arr_row >= rows) {
761 G_free_tokens(tokens);
762 continue;
763 }
764 arr_col = (int)((x - region.west) / region.ew_res);
765
766 /* G_debug(5, "arr_row: %d arr_col: %d", arr_row, arr_col); */
767
768 if (1 != sscanf(tokens[zcol - 1], "%lf", &z))
769 G_fatal_error(_("Bad z-coordinate line %lu column %d. <%s>"),
770 line, zcol, tokens[zcol - 1]);
771 z = z * zscale;
772
773 if (zrange_opt->answer) {
774 if (z < zrange_min || z > zrange_max) {
775 G_free_tokens(tokens);
776 continue;
777 }
778 }
779
780 if(vcol) {
781 if (1 != sscanf(tokens[vcol - 1], "%lf", &z))
782 G_fatal_error(_("Bad data value line %lu column %d. <%s>"),
783 line, vcol, tokens[vcol - 1]);
784 /* we're past the zrange check, so pass over control of the variable */
785 z = z * vscale;
786
787 if (vrange_opt->answer) {
788 if (z < vrange_min || z > vrange_max) {
789 G_free_tokens(tokens);
790 continue;
791 }
792 }
793 }
794
795 count++;
796 /* G_debug(5, "x: %f, y: %f, z: %f", x, y, z); */
797 G_free_tokens(tokens);
798
799 if (bin_n)
800 update_n(n_array, cols, arr_row, arr_col);
801 if (bin_min)
802 update_min(min_array, cols, arr_row, arr_col, rtype, z);
803 if (bin_max)
804 update_max(max_array, cols, arr_row, arr_col, rtype, z);
805 if (bin_sum)
806 update_sum(sum_array, cols, arr_row, arr_col, rtype, z);
807 if (bin_sumsq)
808 update_sumsq(sumsq_array, cols, arr_row, arr_col, rtype, z);
809 if (bin_index) {
810 ptr = index_array;
811 ptr =
812 G_incr_void_ptr(ptr,
813 ((arr_row * cols) +
814 arr_col) * Rast_cell_size(CELL_TYPE));
815
816 if (Rast_is_null_value(ptr, CELL_TYPE)) { /* first node */
817 head_id = new_node();
818 nodes[head_id].next = -1;
819 nodes[head_id].z = z;
820 Rast_set_c_value(ptr, head_id, CELL_TYPE); /* store index to head */
821 }
822 else { /* head is already there */
823
824 head_id = Rast_get_c_value(ptr, CELL_TYPE); /* get index to head */
825 head_id = add_node(head_id, z);
826 if (head_id != -1)
827 Rast_set_c_value(ptr, head_id, CELL_TYPE); /* store index to head */
828 }
829 }
830 } /* while !EOF */
831
832 G_percent(1, 1, 1); /* flush */
833 G_debug(2, "pass %d finished, %lu coordinates in box", pass, count);
834 count_total += count;
835 G_message(_("%lu points found in input file"), line);
836
837 /* calc stats and output */
838 G_message(_("Writing to output raster map..."));
839 for (row = 0; row < rows; row++) {
840
841 switch (method) {
842 case METHOD_N: /* n is a straight copy */
843 Rast_raster_cpy(raster_row,
844 n_array +
845 (row * cols * Rast_cell_size(CELL_TYPE)), cols,
846 CELL_TYPE);
847 break;
848
849 case METHOD_MIN:
850 Rast_raster_cpy(raster_row,
851 min_array + (row * cols * Rast_cell_size(rtype)),
852 cols, rtype);
853 break;
854
855 case METHOD_MAX:
856 Rast_raster_cpy(raster_row,
857 max_array + (row * cols * Rast_cell_size(rtype)),
858 cols, rtype);
859 break;
860
861 case METHOD_SUM:
862 Rast_raster_cpy(raster_row,
863 sum_array + (row * cols * Rast_cell_size(rtype)),
864 cols, rtype);
865 break;
866
867 case METHOD_RANGE: /* (max-min) */
868 ptr = raster_row;
869 for (col = 0; col < cols; col++) {
870 offset = (row * cols + col) * Rast_cell_size(rtype);
871 min = Rast_get_d_value(min_array + offset, rtype);
872 max = Rast_get_d_value(max_array + offset, rtype);
873 Rast_set_d_value(ptr, max - min, rtype);
874 ptr = G_incr_void_ptr(ptr, Rast_cell_size(rtype));
875 }
876 break;
877
878 case METHOD_MEAN: /* (sum / n) */
879 ptr = raster_row;
880 for (col = 0; col < cols; col++) {
881 offset = (row * cols + col) * Rast_cell_size(rtype);
882 n_offset = (row * cols + col) * Rast_cell_size(CELL_TYPE);
883 n = Rast_get_c_value(n_array + n_offset, CELL_TYPE);
884 sum = Rast_get_d_value(sum_array + offset, rtype);
885
886 if (n == 0)
887 Rast_set_null_value(ptr, 1, rtype);
888 else
889 Rast_set_d_value(ptr, (sum / n), rtype);
890
891 ptr = G_incr_void_ptr(ptr, Rast_cell_size(rtype));
892 }
893 break;
894
895 case METHOD_STDDEV: /* sqrt(variance) */
896 case METHOD_VARIANCE: /* (sumsq - sum*sum/n)/n */
897 case METHOD_COEFF_VAR: /* 100 * stdev / mean */
898 ptr = raster_row;
899 for (col = 0; col < cols; col++) {
900 offset = (row * cols + col) * Rast_cell_size(rtype);
901 n_offset = (row * cols + col) * Rast_cell_size(CELL_TYPE);
902 n = Rast_get_c_value(n_array + n_offset, CELL_TYPE);
903 sum = Rast_get_d_value(sum_array + offset, rtype);
904 sumsq = Rast_get_d_value(sumsq_array + offset, rtype);
905
906 if (n == 0)
907 Rast_set_null_value(ptr, 1, rtype);
908 else {
909 variance = (sumsq - sum * sum / n) / n;
910 if (variance < GRASS_EPSILON)
911 variance = 0.0;
912
913 if (method == METHOD_STDDEV)
914 Rast_set_d_value(ptr, sqrt(variance), rtype);
915
916 else if (method == METHOD_VARIANCE)
917 Rast_set_d_value(ptr, variance, rtype);
918
919 else if (method == METHOD_COEFF_VAR)
920 Rast_set_d_value(ptr,
921 100 * sqrt(variance) / (sum /
922 n),
923 rtype);
924
925 }
926 ptr = G_incr_void_ptr(ptr, Rast_cell_size(rtype));
927 }
928 break;
929 case METHOD_MEDIAN: /* median, if only one point in cell we will use that */
930 ptr = raster_row;
931 for (col = 0; col < cols; col++) {
932 n_offset = (row * cols + col) * Rast_cell_size(CELL_TYPE);
933 if (Rast_is_null_value(index_array + n_offset, CELL_TYPE)) /* no points in cell */
934 Rast_set_null_value(ptr, 1, rtype);
935 else { /* one or more points in cell */
936
937 head_id =
938 Rast_get_c_value(index_array + n_offset,
939 CELL_TYPE);
940 node_id = head_id;
941
942 n = 0;
943
944 while (node_id != -1) { /* count number of points in cell */
945 n++;
946 node_id = nodes[node_id].next;
947 }
948
949 if (n == 1) /* only one point, use that */
950 Rast_set_d_value(ptr, nodes[head_id].z,
951 rtype);
952 else if (n % 2 != 0) { /* odd number of points: median_i = (n + 1) / 2 */
953 n = (n + 1) / 2;
954 node_id = head_id;
955 for (j = 1; j < n; j++) /* get "median element" */
956 node_id = nodes[node_id].next;
957
958 Rast_set_d_value(ptr, nodes[node_id].z,
959 rtype);
960 }
961 else { /* even number of points: median = (val_below + val_above) / 2 */
962
963 z = (n + 1) / 2.0;
964 n = floor(z);
965 node_id = head_id;
966 for (j = 1; j < n; j++) /* get element "below" */
967 node_id = nodes[node_id].next;
968
969 z = (nodes[node_id].z +
970 nodes[nodes[node_id].next].z) / 2;
971 Rast_set_d_value(ptr, z, rtype);
972 }
973 }
974 ptr = G_incr_void_ptr(ptr, Rast_cell_size(rtype));
975 }
976 break;
977 case METHOD_PERCENTILE: /* rank = (pth*(n+1))/100; interpolate linearly */
978 ptr = raster_row;
979 for (col = 0; col < cols; col++) {
980 n_offset = (row * cols + col) * Rast_cell_size(CELL_TYPE);
981 if (Rast_is_null_value(index_array + n_offset, CELL_TYPE)) /* no points in cell */
982 Rast_set_null_value(ptr, 1, rtype);
983 else {
984 head_id =
985 Rast_get_c_value(index_array + n_offset,
986 CELL_TYPE);
987 node_id = head_id;
988 n = 0;
989
990 while (node_id != -1) { /* count number of points in cell */
991 n++;
992 node_id = nodes[node_id].next;
993 }
994
995 z = (pth * (n + 1)) / 100.0;
996 r_low = floor(z); /* lower rank */
997 if (r_low < 1)
998 r_low = 1;
999 else if (r_low > n)
1000 r_low = n;
1001
1002 r_up = ceil(z); /* upper rank */
1003 if (r_up > n)
1004 r_up = n;
1005
1006 node_id = head_id;
1007 for (j = 1; j < r_low; j++) /* search lower value */
1008 node_id = nodes[node_id].next;
1009
1010 z = nodes[node_id].z; /* save lower value */
1011 node_id = head_id;
1012 for (j = 1; j < r_up; j++) /* search upper value */
1013 node_id = nodes[node_id].next;
1014
1015 z = (z + nodes[node_id].z) / 2;
1016 Rast_set_d_value(ptr, z, rtype);
1017 }
1018 ptr = G_incr_void_ptr(ptr, Rast_cell_size(rtype));
1019 }
1020 break;
1021 case METHOD_SKEWNESS: /* skewness = sum(xi-mean)^3/(N-1)*s^3 */
1022 ptr = raster_row;
1023 for (col = 0; col < cols; col++) {
1024 n_offset = (row * cols + col) * Rast_cell_size(CELL_TYPE);
1025 if (Rast_is_null_value(index_array + n_offset, CELL_TYPE)) /* no points in cell */
1026 Rast_set_null_value(ptr, 1, rtype);
1027 else {
1028 head_id =
1029 Rast_get_c_value(index_array + n_offset,
1030 CELL_TYPE);
1031 node_id = head_id;
1032
1033 n = 0; /* count */
1034 sum = 0.0; /* sum */
1035 sumsq = 0.0; /* sum of squares */
1036 sumdev = 0.0; /* sum of (xi - mean)^3 */
1037 skew = 0.0; /* skewness */
1038
1039 while (node_id != -1) {
1040 z = nodes[node_id].z;
1041 n++;
1042 sum += z;
1043 sumsq += (z * z);
1044 node_id = nodes[node_id].next;
1045 }
1046
1047 if (n > 1) { /* if n == 1, skew is "0.0" */
1048 mean = sum / n;
1049 node_id = head_id;
1050 while (node_id != -1) {
1051 z = nodes[node_id].z;
1052 sumdev += pow((z - mean), 3);
1053 node_id = nodes[node_id].next;
1054 }
1055
1056 variance = (sumsq - sum * sum / n) / n;
1057 if (variance < GRASS_EPSILON)
1058 skew = 0.0;
1059 else
1060 skew =
1061 sumdev / ((n - 1) *
1062 pow(sqrt(variance), 3));
1063 }
1064 Rast_set_d_value(ptr, skew, rtype);
1065 }
1066 ptr = G_incr_void_ptr(ptr, Rast_cell_size(rtype));
1067 }
1068 break;
1069 case METHOD_TRIMMEAN:
1070 ptr = raster_row;
1071 for (col = 0; col < cols; col++) {
1072 n_offset = (row * cols + col) * Rast_cell_size(CELL_TYPE);
1073 if (Rast_is_null_value(index_array + n_offset, CELL_TYPE)) /* no points in cell */
1074 Rast_set_null_value(ptr, 1, rtype);
1075 else {
1076 head_id =
1077 Rast_get_c_value(index_array + n_offset,
1078 CELL_TYPE);
1079
1080 node_id = head_id;
1081 n = 0;
1082 while (node_id != -1) { /* count number of points in cell */
1083 n++;
1084 node_id = nodes[node_id].next;
1085 }
1086
1087 if (1 == n)
1088 mean = nodes[head_id].z;
1089 else {
1090 k = floor(trim * n + 0.5); /* number of ranks to discard on each tail */
1091
1092 if (k > 0 && (n - 2 * k) > 0) { /* enough elements to discard */
1093 node_id = head_id;
1094 for (j = 0; j < k; j++) /* move to first rank to consider */
1095 node_id = nodes[node_id].next;
1096
1097 j = k + 1;
1098 k = n - k;
1099 n = 0;
1100 sum = 0.0;
1101
1102 while (j <= k) { /* get values in interval */
1103 n++;
1104 sum += nodes[node_id].z;
1105 node_id = nodes[node_id].next;
1106 j++;
1107 }
1108 }
1109 else {
1110 node_id = head_id;
1111 n = 0;
1112 sum = 0.0;
1113 while (node_id != -1) {
1114 n++;
1115 sum += nodes[node_id].z;
1116 node_id = nodes[node_id].next;
1117 }
1118 }
1119 mean = sum / n;
1120 }
1121 Rast_set_d_value(ptr, mean, rtype);
1122 }
1123 ptr = G_incr_void_ptr(ptr, Rast_cell_size(rtype));
1124 }
1125 break;
1126
1127 default:
1128 G_fatal_error("?");
1129 }
1130
1131 G_percent(row, rows, 5);
1132
1133 /* write out line of raster data */
1134 Rast_put_row(out_fd, raster_row, rtype);
1135 }
1136
1137 /* free memory */
1138 if (bin_n)
1139 G_free(n_array);
1140 if (bin_min)
1141 G_free(min_array);
1142 if (bin_max)
1143 G_free(max_array);
1144 if (bin_sum)
1145 G_free(sum_array);
1146 if (bin_sumsq)
1147 G_free(sumsq_array);
1148 if (bin_index) {
1149 G_free(index_array);
1150 G_free(nodes);
1151 num_nodes = 0;
1152 max_nodes = 0;
1153 nodes = NULL;
1154 }
1155
1156 } /* passes loop */
1157
1158 G_percent(1, 1, 1); /* flush */
1159 G_free(raster_row);
1160
1161 /* close input file */
1162 if (!from_stdin)
1163 fclose(in_fp);
1164
1165 /* close raster file & write history */
1166 Rast_close(out_fd);
1167
1168 sprintf(title, "Raw x,y,z data binned into a raster grid by cell %s",
1169 method_opt->answer);
1170 Rast_put_cell_title(outmap, title);
1171
1172 Rast_short_history(outmap, "raster", &history);
1173 Rast_command_history(&history);
1174 Rast_set_history(&history, HIST_DATSRC_1, infile);
1175 Rast_write_history(outmap, &history);
1176
1177 G_done_msg(_("%lu points found in region."), count_total);
1178
1179 exit(EXIT_SUCCESS);
1180 }
1181
1182
1183
scan_bounds(FILE * fp,int xcol,int ycol,int zcol,int vcol,char * fs,int shell_style,int skipline,double zscale,double vscale)1184 int scan_bounds(FILE * fp, int xcol, int ycol, int zcol, int vcol, char *fs,
1185 int shell_style, int skipline, double zscale, double vscale)
1186 {
1187 unsigned long line;
1188 int first, max_col;
1189 char buff[BUFFSIZE];
1190 double min_x, max_x, min_y, max_y, min_z, max_z, min_v, max_v;
1191 char **tokens;
1192 int ntokens; /* number of tokens */
1193 double x, y, z, v;
1194
1195 max_col = (xcol > ycol) ? xcol : ycol;
1196 max_col = (zcol > max_col) ? zcol : max_col;
1197 if(vcol)
1198 max_col = (vcol > max_col) ? vcol : max_col;
1199
1200 line = 0;
1201 first = TRUE;
1202
1203 G_verbose_message(_("Scanning data ..."));
1204
1205 while (0 != G_getl2(buff, BUFFSIZE - 1, fp)) {
1206 line++;
1207
1208 if ((buff[0] == '#') || (buff[0] == '\0')) {
1209 continue; /* line is a comment or blank */
1210 }
1211
1212 G_chop(buff); /* remove leading and trailing whitespace. unneded?? */
1213 tokens = G_tokenize(buff, fs);
1214 ntokens = G_number_of_tokens(tokens);
1215
1216 if ((ntokens < 3) || (max_col > ntokens)) {
1217 if (skipline) {
1218 G_warning(_("Not enough data columns. "
1219 "Incorrect delimiter or column number? "
1220 "Found the following character(s) in row %lu:\n[%s]"),
1221 line, buff);
1222 G_warning(_("Line ignored as requested"));
1223 continue; /* line is garbage */
1224 }
1225 else {
1226 G_fatal_error(_("Not enough data columns. "
1227 "Incorrect delimiter or column number? "
1228 "Found the following character(s) in row %lu:\n[%s]"),
1229 line, buff);
1230 }
1231 }
1232
1233 /* too slow?
1234 if ( G_projection() == PROJECTION_LL ) {
1235 G_scan_easting( tokens[xcol-1], &x, region.proj);
1236 G_scan_northing( tokens[ycol-1], &y, region.proj);
1237 }
1238 else {
1239 */
1240 if (1 != sscanf(tokens[xcol - 1], "%lf", &x))
1241 G_fatal_error(_("Bad x-coordinate line %lu column %d. <%s>"), line,
1242 xcol, tokens[xcol - 1]);
1243
1244 if (first) {
1245 min_x = x;
1246 max_x = x;
1247 }
1248 else {
1249 if (x < min_x)
1250 min_x = x;
1251 if (x > max_x)
1252 max_x = x;
1253 }
1254
1255 if (1 != sscanf(tokens[ycol - 1], "%lf", &y))
1256 G_fatal_error(_("Bad y-coordinate line %lu column %d. <%s>"), line,
1257 ycol, tokens[ycol - 1]);
1258
1259 if (first) {
1260 min_y = y;
1261 max_y = y;
1262 }
1263 else {
1264 if (y < min_y)
1265 min_y = y;
1266 if (y > max_y)
1267 max_y = y;
1268 }
1269
1270 if (1 != sscanf(tokens[zcol - 1], "%lf", &z))
1271 G_fatal_error(_("Bad z-coordinate line %lu column %d. <%s>"), line,
1272 zcol, tokens[zcol - 1]);
1273
1274 if (first) {
1275 min_z = z;
1276 max_z = z;
1277 if(!vcol)
1278 first = FALSE;
1279 }
1280 else {
1281 if (z < min_z)
1282 min_z = z;
1283 if (z > max_z)
1284 max_z = z;
1285 }
1286
1287 if(vcol) {
1288 if (1 != sscanf(tokens[vcol - 1], "%lf", &v))
1289 G_fatal_error(_("Bad data value line %lu column %d. <%s>"), line,
1290 vcol, tokens[vcol - 1]);
1291
1292 if (first) {
1293 min_v = v;
1294 max_v = v;
1295 first = FALSE;
1296 }
1297 else {
1298 if (v < min_v)
1299 min_v = v;
1300 if (v > max_v)
1301 max_v = v;
1302 }
1303 }
1304
1305 G_free_tokens(tokens);
1306 }
1307
1308 if (!shell_style) {
1309 fprintf(stderr, _("Range: min max\n"));
1310 fprintf(stdout, "x: %11.15g %11.15g\n", min_x, max_x);
1311 fprintf(stdout, "y: %11.15g %11.15g\n", min_y, max_y);
1312 fprintf(stdout, "z: %11.15g %11.15g\n", min_z * zscale, max_z * zscale);
1313 if(vcol)
1314 fprintf(stdout, "v: %11.15g %11.15g\n", min_v * vscale, max_v * vscale);
1315 }
1316 else {
1317 fprintf(stdout, "n=%.15g s=%.15g e=%.15g w=%.15g b=%.15g t=%.15g",
1318 max_y, min_y, max_x, min_x, min_z * zscale, max_z * zscale);
1319 if(vcol)
1320 fprintf(stdout, " min=%.15g max=%.15g\n", min_v * vscale,
1321 max_v * vscale);
1322 else
1323 fprintf(stdout, "\n");
1324 }
1325
1326 G_debug(1, "Processed %lu lines.", line);
1327 G_debug(1, "region template: g.region n=%.15g s=%.15g e=%.15g w=%.15g",
1328 max_y, min_y, max_x, min_x);
1329
1330 return 0;
1331 }
1332