1 
2 /**********************************************************************
3  *
4  * MODULE:       v.outlier
5  *
6  * AUTHOR(S):    Roberto Antolin
7  *
8  * PURPOSE:      Removal of data outliers
9  *
10  * COPYRIGHT:    (C) 2006 by Politecnico di Milano -
11  *			     Polo Regionale di Como
12  *
13  *               This program is free software under the
14  *               GNU General Public License (>=v2).
15  *               Read the file COPYING that comes with GRASS
16  *               for details.
17  *
18  **********************************************************************/
19 
20 /* INCLUDES */
21 #include <stdlib.h>
22 #include <string.h>
23 #include <math.h>
24 #include "outlier.h"
25 
26 /* GLOBAL VARIABLES */
27 int nsply, nsplx;
28 double stepN, stepE, Thres_Outlier;
29 
30 /*--------------------------------------------------------------------*/
main(int argc,char * argv[])31 int main(int argc, char *argv[])
32 {
33     /* Variables declarations */
34     int nsplx_adj, nsply_adj;
35     int nsubregion_col, nsubregion_row;
36     int subregion = 0, nsubregions = 0;
37     double N_extension, E_extension, edgeE, edgeN;
38     int dim_vect, nparameters, BW, npoints;
39     double mean, lambda;
40     const char *dvr, *db, *mapset;
41     char table_name[GNAME_MAX];
42     char xname[GNAME_MAX], xmapset[GMAPSET_MAX];
43 
44     int last_row, last_column, flag_auxiliar = FALSE;
45     int filter_mode;
46 
47     int *lineVect;
48     double *TN, *Q, *parVect;	/* Interpolating and least-square vectors */
49     double **N, **obsVect;	/* Interpolation and least-square matrix */
50 
51     /* Structs declarations */
52     struct Map_info In, Out, Outlier, Qgis;
53     struct Option *in_opt, *out_opt, *outlier_opt, *qgis_opt, *stepE_opt,
54 	*stepN_opt, *lambda_f_opt, *Thres_O_opt, *filter_opt;
55     struct Flag *spline_step_flag;
56     struct GModule *module;
57 
58     struct Reg_dimens dims;
59     struct Cell_head elaboration_reg, original_reg;
60     struct bound_box general_box, overlap_box;
61 
62     struct Point *observ;
63 
64     dbDriver *driver;
65 
66     /*----------------------------------------------------------------*/
67     /* Options declaration */
68     module = G_define_module();
69     G_add_keyword(_("vector"));
70     G_add_keyword(_("statistics"));
71     G_add_keyword(_("extract"));
72     G_add_keyword(_("select"));
73     G_add_keyword(_("filter"));
74     G_add_keyword(_("LIDAR"));
75     module->description = _("Removes outliers from vector point data.");
76 
77     spline_step_flag = G_define_flag();
78     spline_step_flag->key = 'e';
79     spline_step_flag->label = _("Estimate point density and distance");
80     spline_step_flag->description =
81 	_("Estimate point density and distance for the input vector points within the current region extends and quit");
82     spline_step_flag->suppress_required = YES;
83 
84     in_opt = G_define_standard_option(G_OPT_V_INPUT);
85 
86     out_opt = G_define_standard_option(G_OPT_V_OUTPUT);
87 
88     outlier_opt = G_define_standard_option(G_OPT_V_OUTPUT);
89     outlier_opt->key = "outlier";
90     outlier_opt->description = _("Name for output outlier vector map");
91 
92     qgis_opt = G_define_standard_option(G_OPT_V_OUTPUT);
93     qgis_opt->key = "qgis";
94     qgis_opt->required = NO;
95     qgis_opt->description = _("Name for vector map for visualization in QGIS");
96 
97     stepE_opt = G_define_option();
98     stepE_opt->key = "ew_step";
99     stepE_opt->type = TYPE_DOUBLE;
100     stepE_opt->required = NO;
101     stepE_opt->label =
102 	_("Length of each spline step in the east-west direction");
103     stepE_opt->description = _("Default: 10 * east-west resolution");
104     stepE_opt->guisection = _("Settings");
105 
106     stepN_opt = G_define_option();
107     stepN_opt->key = "ns_step";
108     stepN_opt->type = TYPE_DOUBLE;
109     stepN_opt->required = NO;
110     stepN_opt->label =
111 	_("Length of each spline step in the north-south direction");
112     stepN_opt->description = _("Default: 10 * north-south resolution");
113     stepN_opt->guisection = _("Settings");
114 
115     lambda_f_opt = G_define_option();
116     lambda_f_opt->key = "lambda";
117     lambda_f_opt->type = TYPE_DOUBLE;
118     lambda_f_opt->required = NO;
119     lambda_f_opt->description = _("Tykhonov regularization weight");
120     lambda_f_opt->answer = "0.1";
121     lambda_f_opt->guisection = _("Settings");
122 
123     Thres_O_opt = G_define_option();
124     Thres_O_opt->key = "threshold";
125     Thres_O_opt->type = TYPE_DOUBLE;
126     Thres_O_opt->required = NO;
127     Thres_O_opt->description = _("Threshold for the outliers");
128     Thres_O_opt->answer = "50";
129 
130     filter_opt = G_define_option();
131     filter_opt->key = "filter";
132     filter_opt->type = TYPE_STRING;
133     filter_opt->required = NO;
134     filter_opt->description = _("Filtering option");
135     filter_opt->options = "both,positive,negative";
136     filter_opt->answer = "both";
137 
138     G_gisinit(argv[0]);
139     G_option_requires(spline_step_flag, in_opt, NULL);
140 
141     /* Parsing */
142     if (G_parser(argc, argv))
143 	exit(EXIT_FAILURE);
144 
145     if (!(db = G_getenv_nofatal2("DB_DATABASE", G_VAR_MAPSET)))
146 	G_fatal_error(_("Unable to read name of database"));
147 
148     if (!(dvr = G_getenv_nofatal2("DB_DRIVER", G_VAR_MAPSET)))
149 	G_fatal_error(_("Unable to read name of driver"));
150 
151     G_get_set_window(&original_reg);
152     stepN = 10 * original_reg.ns_res;
153     if (stepN_opt->answer)
154 	stepN = atof(stepN_opt->answer);
155     stepE = 10 * original_reg.ew_res;
156     if (stepE_opt->answer)
157 	stepE = atof(stepE_opt->answer);
158     lambda = atof(lambda_f_opt->answer);
159     Thres_Outlier = atof(Thres_O_opt->answer);
160 
161     filter_mode = 0;
162     if (strcmp(filter_opt->answer, "positive") == 0)
163 	filter_mode = 1;
164     else if (strcmp(filter_opt->answer, "negative") == 0)
165 	filter_mode = -1;
166     P_set_outlier_fn(filter_mode);
167 
168     flag_auxiliar = FALSE;
169 
170     /* Checking vector names */
171     if (out_opt->answer)
172         Vect_check_input_output_name(in_opt->answer, out_opt->answer,
173                                      G_FATAL_EXIT);
174 
175     if ((mapset = G_find_vector2(in_opt->answer, "")) == NULL) {
176 	G_fatal_error(_("Vector map <%s> not found"), in_opt->answer);
177     }
178 
179     /* Setting auxiliary table's name */
180     if (out_opt->answer) {
181         if (G_name_is_fully_qualified(out_opt->answer, xname, xmapset)) {
182             sprintf(table_name, "%s_aux", xname);
183         }
184         else
185             sprintf(table_name, "%s_aux", out_opt->answer);
186     }
187 
188     /* Something went wrong in a previous v.outlier execution */
189     if (db_table_exists(dvr, db, table_name)) {
190 	/* Start driver and open db */
191 	driver = db_start_driver_open_database(dvr, db);
192 	if (driver == NULL)
193 	    G_fatal_error(_("No database connection for driver <%s> is defined. Run db.connect."),
194 			  dvr);
195         db_set_error_handler_driver(driver);
196 
197 	if (P_Drop_Aux_Table(driver, table_name) != DB_OK)
198 	    G_fatal_error(_("Old auxiliary table could not be dropped"));
199 	db_close_database_shutdown_driver(driver);
200     }
201 
202     /* Open input vector */
203     Vect_set_open_level(1);	/* WITHOUT TOPOLOGY */
204     if (1 > Vect_open_old(&In, in_opt->answer, mapset))
205 	G_fatal_error(_("Unable to open vector map <%s> at the topological level"),
206 		      in_opt->answer);
207 
208     /* Input vector must be 3D */
209     if (!Vect_is_3d(&In))
210 	G_fatal_error(_("Input vector map <%s> is not 3D!"), in_opt->answer);
211 
212     /* Estimate point density and mean distance for current region */
213     if (spline_step_flag->answer) {
214 	double dens, dist;
215 	if (P_estimate_splinestep(&In, &dens, &dist) == 0) {
216 	    G_message("Estimated point density: %.4g", dens);
217 	    G_message("Estimated mean distance between points: %.4g", dist);
218 	}
219 	else
220 	    G_warning(_("No points in current region!"));
221 
222 	Vect_close(&In);
223 	exit(EXIT_SUCCESS);
224     }
225 
226     /* Open output vector */
227     if (qgis_opt->answer)
228 	if (0 > Vect_open_new(&Qgis, qgis_opt->answer, WITHOUT_Z))
229 	    G_fatal_error(_("Unable to create vector map <%s>"),
230 			  qgis_opt->answer);
231 
232     if (0 > Vect_open_new(&Out, out_opt->answer, WITH_Z)) {
233 	Vect_close(&Qgis);
234 	G_fatal_error(_("Unable to create vector map <%s>"), out_opt->answer);
235     }
236 
237     if (0 > Vect_open_new(&Outlier, outlier_opt->answer, WITH_Z)) {
238 	Vect_close(&Out);
239 	Vect_close(&Qgis);
240 	G_fatal_error(_("Unable to create vector map <%s>"), out_opt->answer);
241     }
242 
243     /* Copy vector Head File */
244     Vect_copy_head_data(&In, &Out);
245     Vect_hist_copy(&In, &Out);
246     Vect_hist_command(&Out);
247 
248     Vect_copy_head_data(&In, &Outlier);
249     Vect_hist_copy(&In, &Outlier);
250     Vect_hist_command(&Outlier);
251 
252     if (qgis_opt->answer) {
253 	Vect_copy_head_data(&In, &Qgis);
254 	Vect_hist_copy(&In, &Qgis);
255 	Vect_hist_command(&Qgis);
256     }
257 
258     /* Open driver and database */
259     driver = db_start_driver_open_database(dvr, db);
260     if (driver == NULL)
261 	G_fatal_error(_("No database connection for driver <%s> is defined. Run db.connect."),
262 		      dvr);
263     db_set_error_handler_driver(driver);
264 
265     /* Create auxiliary table */
266     if ((flag_auxiliar =
267 	 P_Create_Aux2_Table(driver, table_name)) == FALSE)
268 	G_fatal_error(_("It was impossible to create <%s> table."), table_name);
269 
270     db_create_index2(driver, table_name, "ID");
271     /* sqlite likes that ??? */
272     db_close_database_shutdown_driver(driver);
273     driver = db_start_driver_open_database(dvr, db);
274 
275     /* Setting regions and boxes */
276     G_get_set_window(&elaboration_reg);
277     Vect_region_box(&elaboration_reg, &overlap_box);
278     Vect_region_box(&elaboration_reg, &general_box);
279 
280     /*------------------------------------------------------------------
281       | Subdividing and working with tiles:
282       | Each original region will be divided into several subregions.
283       | Each one will be overlapped by its neighbouring subregions.
284       | The overlapping is calculated as a fixed OVERLAP_SIZE times
285       | the largest spline step plus 2 * edge
286       ----------------------------------------------------------------*/
287 
288     /* Fixing parameters of the elaboration region */
289     P_zero_dim(&dims);		/* Set dim struct to zero */
290 
291     nsplx_adj = NSPLX_MAX;
292     nsply_adj = NSPLY_MAX;
293     if (stepN > stepE)
294 	dims.overlap = OVERLAP_SIZE * stepN;
295     else
296 	dims.overlap = OVERLAP_SIZE * stepE;
297     P_get_edge(P_BILINEAR, &dims, stepE, stepN);
298     P_set_dim(&dims, stepE, stepN, &nsplx_adj, &nsply_adj);
299 
300     G_verbose_message(_("Adjusted EW splines %d"), nsplx_adj);
301     G_verbose_message(_("Adjusted NS splines %d"), nsply_adj);
302 
303     /* calculate number of subregions */
304     edgeE = dims.ew_size - dims.overlap - 2 * dims.edge_v;
305     edgeN = dims.sn_size - dims.overlap - 2 * dims.edge_h;
306 
307     N_extension = original_reg.north - original_reg.south;
308     E_extension = original_reg.east - original_reg.west;
309 
310     nsubregion_col = ceil(E_extension / edgeE) + 0.5;
311     nsubregion_row = ceil(N_extension / edgeN) + 0.5;
312 
313     if (nsubregion_col < 0)
314 	nsubregion_col = 0;
315     if (nsubregion_row < 0)
316 	nsubregion_row = 0;
317 
318     nsubregions = nsubregion_row * nsubregion_col;
319 
320     elaboration_reg.south = original_reg.north;
321     last_row = FALSE;
322 
323     while (last_row == FALSE) {	/* For each row */
324 
325 	P_set_regions(&elaboration_reg, &general_box, &overlap_box, dims,
326 		      GENERAL_ROW);
327 
328 	if (elaboration_reg.north > original_reg.north) {	/* First row */
329 
330 	    P_set_regions(&elaboration_reg, &general_box, &overlap_box, dims,
331 			  FIRST_ROW);
332 	}
333 
334 	if (elaboration_reg.south <= original_reg.south) {	/* Last row */
335 
336 	    P_set_regions(&elaboration_reg, &general_box, &overlap_box, dims,
337 			  LAST_ROW);
338 	    last_row = TRUE;
339 	}
340 
341 	nsply =
342 	    ceil((elaboration_reg.north -
343 		  elaboration_reg.south) / stepN) + 0.5;
344 	/*
345 	if (nsply > NSPLY_MAX)
346 	    nsply = NSPLY_MAX;
347 	*/
348 	G_debug(1, "nsply = %d", nsply);
349 
350 	elaboration_reg.east = original_reg.west;
351 	last_column = FALSE;
352 
353 	while (last_column == FALSE) {	/* For each column */
354 
355 	    subregion++;
356 	    if (nsubregions > 1)
357 		G_message(_("Processing subregion %d of %d..."), subregion, nsubregions);
358 
359 	    P_set_regions(&elaboration_reg, &general_box, &overlap_box, dims,
360 			  GENERAL_COLUMN);
361 
362 	    if (elaboration_reg.west < original_reg.west) {	/* First column */
363 
364 		P_set_regions(&elaboration_reg, &general_box, &overlap_box,
365 			      dims, FIRST_COLUMN);
366 	    }
367 
368 	    if (elaboration_reg.east >= original_reg.east) {	/* Last column */
369 
370 		P_set_regions(&elaboration_reg, &general_box, &overlap_box,
371 			      dims, LAST_COLUMN);
372 		last_column = TRUE;
373 	    }
374 	    nsplx =
375 		ceil((elaboration_reg.east -
376 		      elaboration_reg.west) / stepE) + 0.5;
377 	    /*
378 	    if (nsplx > NSPLX_MAX)
379 		nsplx = NSPLX_MAX;
380 	    */
381 	    G_debug(1, "nsplx = %d", nsplx);
382 
383 	    /*Setting the active region */
384 	    dim_vect = nsplx * nsply;
385 	    observ =
386 		P_Read_Vector_Region_Map(&In, &elaboration_reg, &npoints,
387 					 dim_vect, 1);
388 
389 	    if (npoints > 0) {	/* If there is any point falling into elaboration_reg... */
390 		int i;
391 
392 		nparameters = nsplx * nsply;
393 
394 		/* Mean calculation */
395 		mean = P_Mean_Calc(&elaboration_reg, observ, npoints);
396 
397 		/* Least Squares system */
398 		G_debug(1, "Allocation memory for bilinear interpolation");
399 		BW = P_get_BandWidth(P_BILINEAR, nsply);	/* Bilinear interpolation */
400 		N = G_alloc_matrix(nparameters, BW);	/* Normal matrix */
401 		TN = G_alloc_vector(nparameters);	/* vector */
402 		parVect = G_alloc_vector(nparameters);	/* Bicubic parameters vector */
403 		obsVect = G_alloc_matrix(npoints, 3);	/* Observation vector */
404 		Q = G_alloc_vector(npoints);	/* "a priori" var-cov matrix */
405 		lineVect = G_alloc_ivector(npoints);
406 
407 		/* Setting obsVect vector & Q matrix */
408 		for (i = 0; i < npoints; i++) {
409 		    obsVect[i][0] = observ[i].coordX;
410 		    obsVect[i][1] = observ[i].coordY;
411 		    obsVect[i][2] = observ[i].coordZ - mean;
412 		    lineVect[i] = observ[i].lineID;
413 		    Q[i] = 1;	/* Q=I */
414 		}
415 
416 		G_free(observ);
417 
418 		G_verbose_message(_("Bilinear interpolation"));
419 		normalDefBilin(N, TN, Q, obsVect, stepE, stepN, nsplx,
420 			       nsply, elaboration_reg.west,
421 			       elaboration_reg.south, npoints, nparameters,
422 			       BW);
423 		nCorrectGrad(N, lambda, nsplx, nsply, stepE, stepN);
424 		G_math_solver_cholesky_sband(N, parVect, TN, nparameters, BW);
425 
426 		G_free_matrix(N);
427 		G_free_vector(TN);
428 		G_free_vector(Q);
429 
430 		G_verbose_message(_("Outlier detection"));
431 		if (qgis_opt->answer)
432 		    P_Outlier(&Out, &Outlier, &Qgis, elaboration_reg,
433 			      general_box, overlap_box, obsVect, parVect,
434 			      mean, dims.overlap, lineVect, npoints,
435 			      driver, table_name);
436 		else
437 		    P_Outlier(&Out, &Outlier, NULL, elaboration_reg,
438 			      general_box, overlap_box, obsVect, parVect,
439 			      mean, dims.overlap, lineVect, npoints,
440 			      driver, table_name);
441 
442 
443 		G_free_vector(parVect);
444 		G_free_matrix(obsVect);
445 		G_free_ivector(lineVect);
446 
447 	    }			/*! END IF; npoints > 0 */
448 	    else {
449 		G_free(observ);
450 		G_warning(_("No data within this subregion. "
451 			    "Consider increasing spline step values."));
452 	    }
453 	}			/*! END WHILE; last_column = TRUE */
454     }				/*! END WHILE; last_row = TRUE */
455 
456     /* Drop auxiliary table */
457     if (npoints > 0) {
458 	G_debug(1, "%s: Dropping <%s>", argv[0], table_name);
459 	if (P_Drop_Aux_Table(driver, table_name) != DB_OK)
460 	    G_fatal_error(_("Auxiliary table could not be dropped"));
461     }
462 
463     db_close_database_shutdown_driver(driver);
464 
465     Vect_close(&In);
466     Vect_close(&Out);
467     Vect_close(&Outlier);
468     if (qgis_opt->answer) {
469 	Vect_build(&Qgis);
470 	Vect_close(&Qgis);
471     }
472 
473     G_done_msg(" ");
474 
475     exit(EXIT_SUCCESS);
476 }				/*END MAIN */
477