1 
2 /****************************************************************
3  *
4  * MODULE:     v.extract
5  *
6  * AUTHOR(S):  R.L.Glenn , Soil Conservation Service, USDA
7  *             Updated for 5.7 by Radim Blazek
8  *             Updated for 7.0 by Martin Landa <landa.martin gmail.com>
9  *
10  * PURPOSE:    Selects vector features from an existing vector map and
11  *             creates a new vector map containing only the selected
12  *             features.
13  *
14  * COPYRIGHT:  (C) 2002-2009, 2011 by the GRASS Development Team
15  *
16  *             This program is free software under the GNU General
17  *             Public License (>=v2). Read the file COPYING that
18  *             comes with GRASS for details.
19  *
20  * TODO:       - fix white space problems for file= option
21  ****************************************************************/
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <ctype.h>
26 #include <math.h>
27 #include <sys/types.h>
28 #include <unistd.h>
29 
30 #include <grass/gis.h>
31 #include <grass/vector.h>
32 #include <grass/dbmi.h>
33 #include <grass/gmath.h>
34 #include <grass/glocale.h>
35 
36 #include "local_proto.h"
37 
38 static int *cat_array, cat_count, cat_size;
39 
40 static int scan_cats(char *, int *, int *);
41 static void add_cat(int);
42 
main(int argc,char ** argv)43 int main(int argc, char **argv)
44 {
45     int i, new_cat, type, ncats, *cats, c, native;
46     int field, dissolve, x, y, type_only;
47     char buffr[1024], text[80];
48     char *input, *output;
49 
50     FILE *in;
51 
52     struct GModule *module;
53     struct {
54 	struct Option *input, *output, *file, *new, *type, *list,
55 	    *field, *where, *nrand, *d_key;
56     } opt;
57     struct {
58 	struct Flag *t, *d, *r;
59     } flag;
60 
61     struct Map_info In, Out;
62     struct field_info *Fi;
63 
64     dbDriver *driver;
65 
66     struct Cat_index *ci;
67 
68     int ucat_count, *ucat_array, prnd, seed, nrandom, nfeatures;
69     char *dissolve_key;
70 
71     Fi = NULL;
72     ucat_array = NULL;
73 
74     G_gisinit(argv[0]);
75 
76     /* set up the options and flags for the command line parser */
77     module = G_define_module();
78     G_add_keyword(_("vector"));
79     G_add_keyword(_("extract"));
80     G_add_keyword(_("select"));
81     G_add_keyword(_("dissolve"));
82     G_add_keyword(_("random"));
83     module->description =
84 	_("Selects vector features from an existing vector map and "
85 	  "creates a new vector map containing only the selected features.");
86 
87     flag.d = G_define_flag();
88     flag.d->key = 'd';
89     flag.d->description = _("Dissolve common boundaries (default is no)");
90 
91     flag.t = G_define_flag();
92     flag.t->key = 't';
93     flag.t->description = _("Do not copy attributes (see also 'new' parameter)");
94     flag.t->guisection = _("Attributes");
95 
96     flag.r = G_define_flag();
97     flag.r->key = 'r';
98     flag.r->description = _("Reverse selection");
99     flag.r->guisection = _("Selection");
100 
101     opt.input = G_define_standard_option(G_OPT_V_INPUT);
102 
103     opt.field = G_define_standard_option(G_OPT_V_FIELD_ALL);
104     opt.field->answer = "1";
105     opt.field->guisection = _("Selection");
106 
107     opt.type = G_define_standard_option(G_OPT_V_TYPE);
108     opt.type->answer = "point,line,boundary,centroid,area,face";
109     opt.type->options = "point,line,boundary,centroid,area,face";
110     opt.type->label = _("Types to be extracted");
111     opt.type->guisection = _("Selection");
112 
113     opt.list = G_define_standard_option(G_OPT_V_CATS);
114     opt.list->guisection = _("Selection");
115 
116     opt.where = G_define_standard_option(G_OPT_DB_WHERE);
117     opt.where->guisection = _("Selection");
118 
119     opt.output = G_define_standard_option(G_OPT_V_OUTPUT);
120 
121     opt.file = G_define_standard_option(G_OPT_F_INPUT);
122     opt.file->key = "file";
123     opt.file->required = NO;
124     opt.file->label =
125 	_("Input text file with category numbers/number ranges to be extracted");
126     opt.file->description = _("If '-' given reads from standard input");
127     opt.file->guisection = _("Selection");
128 
129     opt.nrand = G_define_option();
130     opt.nrand->key = "random";
131     opt.nrand->type = TYPE_INTEGER;
132     opt.nrand->required = NO;
133     opt.nrand->label =
134 	_("Number of random categories matching vector objects to extract");
135     opt.nrand->description =
136 	_("Number must be smaller than unique cat count in layer");
137     opt.nrand->guisection = _("Selection");
138 
139     opt.new = G_define_option();
140     opt.new->key = "new";
141     opt.new->type = TYPE_INTEGER;
142     opt.new->required = NO;
143     opt.new->answer = "-1";
144     opt.new->label = _("Desired new category value "
145 		       "(enter -1 to keep original categories)");
146     opt.new->description = _("If new >= 0, attributes is not copied");
147     opt.new->guisection = _("Attributes");
148 
149     opt.d_key = G_define_standard_option(G_OPT_DB_COLUMN);
150     opt.d_key->key = "dissolve_column";
151     opt.d_key->label = _("Name of attribute column for dissolving areas");
152     opt.d_key->description = _("Preserves category values");
153     opt.d_key->required = NO;
154 
155     if (G_parser(argc, argv))
156 	exit(EXIT_FAILURE);
157 
158     /* start checking options and flags */
159     c = 0;
160     if (opt.file->answer != NULL)
161 	c++;
162     if (opt.list->answers != NULL)
163 	c++;
164     if (opt.where->answer != NULL)
165 	c++;
166     if (opt.nrand->answer != NULL)
167 	c++;
168     if (c > 1)
169 	G_fatal_error(_("Options <%s>, <%s>, <%s> and <%s> options are exclusive. "
170 			"Please specify only one of them."),
171 		      opt.list->key, opt.file->key, opt.where->key,
172 		      opt.nrand->key);
173 
174     type_only = FALSE;
175     if (!opt.list->answers && !opt.file->answer && !opt.where->answer &&
176 	!opt.nrand->answer) {
177 	type_only = TRUE;
178     }
179 
180     input = opt.input->answer;
181     output = opt.output->answer;
182     Vect_check_input_output_name(input, output,
183 				 G_FATAL_EXIT);
184 
185     if (!opt.new->answer)
186 	new_cat = 0;
187     else
188 	new_cat = atoi(opt.new->answer);
189 
190     /* Do initial read of input file */
191     Vect_set_open_level(2); /* topology required */
192 
193     if (Vect_open_old2(&In, input, "", opt.field->answer) < 0)
194 	G_fatal_error(_("Unable to open vector map <%s>"), input);
195 
196     field = Vect_get_field_number(&In, opt.field->answer);
197 
198     type = Vect_option_to_types(opt.type);
199     if (type & GV_AREA) {
200 	type |= GV_CENTROID;
201     }
202 
203     dissolve_key = NULL;
204     if (flag.d->answer &&
205         ((type & GV_AREA) || ((type & GV_CENTROID) && (type & GV_BOUNDARY)))) {
206 	dissolve = TRUE;
207 	if (field > 0 && opt.d_key->answer) {
208 	    int ncols, ret;
209 	    dbTable *Table;
210 	    dbColumn *Col;
211 	    dbString tabname;
212 
213 	    dissolve_key = opt.d_key->answer;
214 
215 	    Fi = Vect_get_field(&In, field);
216 	    if (!Fi) {
217 		G_fatal_error(_("Database connection not defined for layer <%s>"),
218 			      opt.field->answer);
219 	    }
220 
221 	    G_verbose_message(_("Searching for column <%s> in table <%s>..."),
222 	                      dissolve_key, Fi->table);
223 
224 	    driver = db_start_driver_open_database(Fi->driver, Fi->database);
225 	    if (driver == NULL)
226 		G_fatal_error(_("Unable to open database <%s> by driver <%s>"),
227 			      Fi->database, Fi->driver);
228 
229 	    db_init_string(&tabname);
230 	    db_set_string(&tabname, Fi->table);
231 
232 	    if (db_describe_table(driver, &tabname, &Table) != DB_OK) {
233 		G_fatal_error(_("Unable to describe table <%s>"), Fi->table);
234 	    }
235 
236 	    ret = 0;
237 
238 	    ncols = db_get_table_number_of_columns(Table);
239 	    G_debug(3, "ncol = %d", ncols);
240 
241 	    for (i = 0; i < ncols; i++) {
242 		Col = db_get_table_column(Table, i);
243 		if (G_strcasecmp(db_get_column_name(Col), dissolve_key) == 0) {
244 		    ret = 1;
245 		    break;
246 		}
247 	    }
248 	    db_free_table(Table);
249 	    db_free_string(&tabname);
250 	    db_close_database_shutdown_driver(driver);
251 
252 	    if (!ret)
253 		G_fatal_error(_("Column <%s> does not exist for layer %d"),
254 		              dissolve_key, field);
255 	}
256     }
257     else
258 	dissolve = FALSE;
259 
260     /* Read categoy list */
261     cat_count = 0;
262     if (opt.list->answer != NULL) {
263 	/* no file of categories to read, process cat list */
264 	/* first check for valid list */
265 	for (i = 0; opt.list->answers[i]; i++) {
266 	    G_debug(2, "catlist item: %s", opt.list->answers[i]);
267 	    if (!scan_cats(opt.list->answers[i], &x, &y))
268 		G_fatal_error(_("Category value in '%s' not valid"),
269 			      opt.list->answers[i]);
270 	}
271 
272 	/* valid list, put into cat value array */
273 	for (i = 0; opt.list->answers[i]; i++) {
274 	    scan_cats(opt.list->answers[i], &x, &y);
275 	    while (x <= y)
276 		add_cat(x++);
277 	}
278     }
279     else if (opt.file->answer != NULL) {	/* got a file of category numbers */
280 	if (strcmp(opt.file->answer, "-") == 0) {
281 	    in = stdin;
282 	}
283 	else {
284 	    G_verbose_message(_("Process file <%s> for category numbers..."),
285 			      opt.file->answer);
286 
287 	    /* open input file */
288 	    if ((in = fopen(opt.file->answer, "r")) == NULL)
289 		G_fatal_error(_("Unable to open specified file <%s>"),
290 			      opt.file->answer);
291 	}
292 
293 	while (TRUE) {
294 	    if (!fgets(buffr, 39, in))
295 		break;
296 	    /* eliminate some white space, we accept numbers and dashes only */
297 	    G_chop(buffr);
298 	    sscanf(buffr, "%[-0-9]", text);
299 	    if (strlen(text) == 0)
300 		G_warning(_("Ignored text entry: %s"), buffr);
301 
302 	    scan_cats(text, &x, &y);
303 	    /* two BUGS here: - fgets stops if white space appears
304 	       - if white space appears, number is not read correctly */
305 	    while (x <= y && x >= 0 && y >= 0)
306 		add_cat(x++);
307 	}
308 
309 	if (strcmp(opt.file->answer, "-") != 0)
310 	    fclose(in);
311     }
312     else if (opt.where->answer != NULL) {
313 	Fi = Vect_get_field(&In, field);
314 	if (!Fi) {
315 	    G_fatal_error(_("Database connection not defined for layer <%s>"),
316 			  opt.field->answer);
317 	}
318 
319 	G_verbose_message(_("Loading categories from table <%s>..."), Fi->table);
320 
321 	driver = db_start_driver_open_database(Fi->driver, Fi->database);
322 	if (driver == NULL)
323 	    G_fatal_error(_("Unable to open database <%s> by driver <%s>"),
324 			  Fi->database, Fi->driver);
325 
326 	ncats = db_select_int(driver, Fi->table, Fi->key, opt.where->answer,
327 			      &cats);
328 	if (ncats == -1)
329 		G_fatal_error(_("Unable select records from table <%s>"),
330 			      Fi->table);
331 	G_verbose_message(n_("%d category loaded",
332                              "%d categories loaded",
333                              ncats), ncats);
334 
335 	db_close_database(driver);
336 	db_shutdown_driver(driver);
337 
338 	for (i = 0; i < ncats; i++)
339 	    add_cat(cats[i]);
340 	if (ncats >= 0)
341 	    G_free(cats);
342     }
343     else if (opt.nrand->answer != NULL) { /* Generate random category list */
344 	/* We operate on layer's CAT's and thus valid layer is required */
345 	if (Vect_cidx_get_field_index(&In, field) < 0)
346 	    G_fatal_error(_("This map has no categories attached. "
347 			    "Use v.category to attach categories to "
348 			    "this vector map."));
349 
350 	/* Don't do any processing, if user input is wrong */
351 	nrandom = atoi(opt.nrand->answer);
352 	if (nrandom < 1)
353 	    G_fatal_error(_("Please specify random number larger than 0"));
354 
355 	nfeatures = Vect_cidx_get_type_count(&In, field, type);
356 	if (nrandom >= nfeatures)
357 	    G_fatal_error(_("Random category count must be smaller than feature count. "
358 			    "There are only %d features of type(s): %s"),
359 			  nfeatures, opt.type->answer);
360 
361 	/* Let's create an array of uniq CAT values
362 	   According to Vlib/build.c, cidx should be already sorted by dig_cidx_sort() */
363 	ci = &(In.plus.cidx[Vect_cidx_get_field_index(&In, field)]);
364 	ucat_count = 0;
365 	for (c = 0; c < ci->n_cats; c++) {
366 	    /* Bitwise AND compares ci feature type with user's requested types */
367 	    if (ci->cat[c][1] & type) {
368 		/* Don't do anything if such value already exists */
369 		if (ucat_count > 0 &&
370 		    ucat_array[ucat_count - 1] == ci->cat[c][0])
371 		    continue;
372 		ucat_array =
373 		    G_realloc(ucat_array, (ucat_count + 1) * sizeof(int));
374 		ucat_array[ucat_count] = ci->cat[c][0];
375 		ucat_count++;
376 	    }
377 	}
378 
379 	if (nrandom >= ucat_count)
380 	    G_fatal_error(_("Random category count is larger or equal to "
381 			    "uniq <%s> feature category count %d"),
382 			  opt.type->answer, ucat_count);
383 
384 	/* Initialise random number generator */
385 	/* FIXME - allow seed to be specified for repeatability */
386 	G_math_srand_auto();
387 
388 	/* Fill cat_array with list of valid random numbers */
389 	while (cat_count < nrandom) {
390 	    /* Random number in range from 0 to largest CAT value */
391 	    prnd = (int) floor(G_math_rand() *
392 			       (ucat_array[ucat_count - 1] + 1));
393 	    qsort(cat_array, cat_count, sizeof(int), cmp);
394 	    /* Check if generated number isn't already in
395 	       final list and is in list of existing CATs */
396 	    if (bsearch(&prnd, cat_array, cat_count, sizeof(int), cmp) == NULL
397 		&& bsearch(&prnd, ucat_array, ucat_count, sizeof(int),
398 			   cmp) != NULL)
399 		add_cat(prnd);
400 	}
401 	G_free(ucat_array);
402 	qsort(cat_array, cat_count, sizeof(int), cmp);
403     }
404 
405     if (Vect_open_new(&Out, output, Vect_is_3d(&In)) < 0)
406 	G_fatal_error(_("Unable to create vector map <%s>"), output);
407 
408     Vect_hist_copy(&In, &Out);
409     Vect_hist_command(&Out);
410 
411     /* Read and write header info */
412     Vect_copy_head_data(&In, &Out);
413 
414     G_message(_("Extracting features..."));
415 
416     native = Vect_maptype(&Out) == GV_FORMAT_NATIVE;
417     if (!flag.t->answer && !native) {
418 	/* Copy attributes for OGR output */
419 	Vect_copy_map_dblinks(&In, &Out, TRUE);
420     }
421 
422     extract_line(cat_count, cat_array, &In, &Out, new_cat, type,
423 		 dissolve, dissolve_key, field, type_only, flag.r->answer ? 1 : 0);
424 
425     Vect_build(&Out);
426 
427     /* Copy tables */
428     if (!flag.t->answer && native) {
429 	copy_tabs(&In, field, new_cat, &Out);
430     }
431 
432     Vect_close(&In);
433 
434     /* remove duplicate centroids */
435     if (dissolve) {
436 	int line, nlines, ltype, area;
437 
438 	G_message(_("Removing duplicate centroids..."));
439 	nlines = Vect_get_num_lines(&Out);
440 	for (line = 1; line <= nlines; line++) {
441 	    if (!Vect_line_alive(&Out, line))
442 		continue;	/* should not happen */
443 
444 	    ltype = Vect_read_line(&Out, NULL, NULL, line);
445 	    if (!(ltype & GV_CENTROID)) {
446 		continue;
447 	    }
448 	    area = Vect_get_centroid_area(&Out, line);
449 
450 	    if (area < 0) {
451 		Vect_delete_line(&Out, line);
452 	    }
453 	}
454 	Vect_build_partial(&Out, GV_BUILD_NONE);
455 	Vect_build(&Out);
456     }
457 
458     Vect_close(&Out);
459 
460     exit(EXIT_SUCCESS);
461 }
462 
add_cat(int x)463 void add_cat(int x)
464 {
465     G_debug(2, "add_cat %d", x);
466 
467     if (cat_count >= cat_size) {
468 	cat_size = (cat_size < 1000) ? 1000 : cat_size * 2;
469 	cat_array = G_realloc(cat_array, cat_size * sizeof(int));
470     }
471 
472     cat_array[cat_count++] = x;
473 }
474 
scan_cats(char * s,int * x,int * y)475 int scan_cats(char *s, int *x, int *y)
476 {
477     char dummy[2];
478 
479     if (strlen(s) == 0) {	/* error */
480 	*y = *x = -1;
481 	return 1;
482     }
483 
484     *dummy = 0;
485     if (sscanf(s, "%d-%d%1s", x, y, dummy) == 2)
486 	return (*dummy == 0 && *x <= *y);
487 
488     *dummy = 0;
489     if (sscanf(s, "%d%1s", x, dummy) == 1 && *dummy == 0) {
490 	*y = *x;
491 	return 1;
492     }
493     return 0;
494 }
495