1
2 /****************************************************************
3 *
4 * MODULE: v.extract
5 *
6 * AUTHOR(S): R.L.Glenn , Soil Conservation Service, USDA
7 * Updated for 5.7 by Radim Blazek
8 * Updated for 7.0 by Martin Landa <landa.martin gmail.com>
9 *
10 * PURPOSE: Selects vector features from an existing vector map and
11 * creates a new vector map containing only the selected
12 * features.
13 *
14 * COPYRIGHT: (C) 2002-2009, 2011 by the GRASS Development Team
15 *
16 * This program is free software under the GNU General
17 * Public License (>=v2). Read the file COPYING that
18 * comes with GRASS for details.
19 *
20 * TODO: - fix white space problems for file= option
21 ****************************************************************/
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <ctype.h>
26 #include <math.h>
27 #include <sys/types.h>
28 #include <unistd.h>
29
30 #include <grass/gis.h>
31 #include <grass/vector.h>
32 #include <grass/dbmi.h>
33 #include <grass/gmath.h>
34 #include <grass/glocale.h>
35
36 #include "local_proto.h"
37
38 static int *cat_array, cat_count, cat_size;
39
40 static int scan_cats(char *, int *, int *);
41 static void add_cat(int);
42
main(int argc,char ** argv)43 int main(int argc, char **argv)
44 {
45 int i, new_cat, type, ncats, *cats, c, native;
46 int field, dissolve, x, y, type_only;
47 char buffr[1024], text[80];
48 char *input, *output;
49
50 FILE *in;
51
52 struct GModule *module;
53 struct {
54 struct Option *input, *output, *file, *new, *type, *list,
55 *field, *where, *nrand, *d_key;
56 } opt;
57 struct {
58 struct Flag *t, *d, *r;
59 } flag;
60
61 struct Map_info In, Out;
62 struct field_info *Fi;
63
64 dbDriver *driver;
65
66 struct Cat_index *ci;
67
68 int ucat_count, *ucat_array, prnd, seed, nrandom, nfeatures;
69 char *dissolve_key;
70
71 Fi = NULL;
72 ucat_array = NULL;
73
74 G_gisinit(argv[0]);
75
76 /* set up the options and flags for the command line parser */
77 module = G_define_module();
78 G_add_keyword(_("vector"));
79 G_add_keyword(_("extract"));
80 G_add_keyword(_("select"));
81 G_add_keyword(_("dissolve"));
82 G_add_keyword(_("random"));
83 module->description =
84 _("Selects vector features from an existing vector map and "
85 "creates a new vector map containing only the selected features.");
86
87 flag.d = G_define_flag();
88 flag.d->key = 'd';
89 flag.d->description = _("Dissolve common boundaries (default is no)");
90
91 flag.t = G_define_flag();
92 flag.t->key = 't';
93 flag.t->description = _("Do not copy attributes (see also 'new' parameter)");
94 flag.t->guisection = _("Attributes");
95
96 flag.r = G_define_flag();
97 flag.r->key = 'r';
98 flag.r->description = _("Reverse selection");
99 flag.r->guisection = _("Selection");
100
101 opt.input = G_define_standard_option(G_OPT_V_INPUT);
102
103 opt.field = G_define_standard_option(G_OPT_V_FIELD_ALL);
104 opt.field->answer = "1";
105 opt.field->guisection = _("Selection");
106
107 opt.type = G_define_standard_option(G_OPT_V_TYPE);
108 opt.type->answer = "point,line,boundary,centroid,area,face";
109 opt.type->options = "point,line,boundary,centroid,area,face";
110 opt.type->label = _("Types to be extracted");
111 opt.type->guisection = _("Selection");
112
113 opt.list = G_define_standard_option(G_OPT_V_CATS);
114 opt.list->guisection = _("Selection");
115
116 opt.where = G_define_standard_option(G_OPT_DB_WHERE);
117 opt.where->guisection = _("Selection");
118
119 opt.output = G_define_standard_option(G_OPT_V_OUTPUT);
120
121 opt.file = G_define_standard_option(G_OPT_F_INPUT);
122 opt.file->key = "file";
123 opt.file->required = NO;
124 opt.file->label =
125 _("Input text file with category numbers/number ranges to be extracted");
126 opt.file->description = _("If '-' given reads from standard input");
127 opt.file->guisection = _("Selection");
128
129 opt.nrand = G_define_option();
130 opt.nrand->key = "random";
131 opt.nrand->type = TYPE_INTEGER;
132 opt.nrand->required = NO;
133 opt.nrand->label =
134 _("Number of random categories matching vector objects to extract");
135 opt.nrand->description =
136 _("Number must be smaller than unique cat count in layer");
137 opt.nrand->guisection = _("Selection");
138
139 opt.new = G_define_option();
140 opt.new->key = "new";
141 opt.new->type = TYPE_INTEGER;
142 opt.new->required = NO;
143 opt.new->answer = "-1";
144 opt.new->label = _("Desired new category value "
145 "(enter -1 to keep original categories)");
146 opt.new->description = _("If new >= 0, attributes is not copied");
147 opt.new->guisection = _("Attributes");
148
149 opt.d_key = G_define_standard_option(G_OPT_DB_COLUMN);
150 opt.d_key->key = "dissolve_column";
151 opt.d_key->label = _("Name of attribute column for dissolving areas");
152 opt.d_key->description = _("Preserves category values");
153 opt.d_key->required = NO;
154
155 if (G_parser(argc, argv))
156 exit(EXIT_FAILURE);
157
158 /* start checking options and flags */
159 c = 0;
160 if (opt.file->answer != NULL)
161 c++;
162 if (opt.list->answers != NULL)
163 c++;
164 if (opt.where->answer != NULL)
165 c++;
166 if (opt.nrand->answer != NULL)
167 c++;
168 if (c > 1)
169 G_fatal_error(_("Options <%s>, <%s>, <%s> and <%s> options are exclusive. "
170 "Please specify only one of them."),
171 opt.list->key, opt.file->key, opt.where->key,
172 opt.nrand->key);
173
174 type_only = FALSE;
175 if (!opt.list->answers && !opt.file->answer && !opt.where->answer &&
176 !opt.nrand->answer) {
177 type_only = TRUE;
178 }
179
180 input = opt.input->answer;
181 output = opt.output->answer;
182 Vect_check_input_output_name(input, output,
183 G_FATAL_EXIT);
184
185 if (!opt.new->answer)
186 new_cat = 0;
187 else
188 new_cat = atoi(opt.new->answer);
189
190 /* Do initial read of input file */
191 Vect_set_open_level(2); /* topology required */
192
193 if (Vect_open_old2(&In, input, "", opt.field->answer) < 0)
194 G_fatal_error(_("Unable to open vector map <%s>"), input);
195
196 field = Vect_get_field_number(&In, opt.field->answer);
197
198 type = Vect_option_to_types(opt.type);
199 if (type & GV_AREA) {
200 type |= GV_CENTROID;
201 }
202
203 dissolve_key = NULL;
204 if (flag.d->answer &&
205 ((type & GV_AREA) || ((type & GV_CENTROID) && (type & GV_BOUNDARY)))) {
206 dissolve = TRUE;
207 if (field > 0 && opt.d_key->answer) {
208 int ncols, ret;
209 dbTable *Table;
210 dbColumn *Col;
211 dbString tabname;
212
213 dissolve_key = opt.d_key->answer;
214
215 Fi = Vect_get_field(&In, field);
216 if (!Fi) {
217 G_fatal_error(_("Database connection not defined for layer <%s>"),
218 opt.field->answer);
219 }
220
221 G_verbose_message(_("Searching for column <%s> in table <%s>..."),
222 dissolve_key, Fi->table);
223
224 driver = db_start_driver_open_database(Fi->driver, Fi->database);
225 if (driver == NULL)
226 G_fatal_error(_("Unable to open database <%s> by driver <%s>"),
227 Fi->database, Fi->driver);
228
229 db_init_string(&tabname);
230 db_set_string(&tabname, Fi->table);
231
232 if (db_describe_table(driver, &tabname, &Table) != DB_OK) {
233 G_fatal_error(_("Unable to describe table <%s>"), Fi->table);
234 }
235
236 ret = 0;
237
238 ncols = db_get_table_number_of_columns(Table);
239 G_debug(3, "ncol = %d", ncols);
240
241 for (i = 0; i < ncols; i++) {
242 Col = db_get_table_column(Table, i);
243 if (G_strcasecmp(db_get_column_name(Col), dissolve_key) == 0) {
244 ret = 1;
245 break;
246 }
247 }
248 db_free_table(Table);
249 db_free_string(&tabname);
250 db_close_database_shutdown_driver(driver);
251
252 if (!ret)
253 G_fatal_error(_("Column <%s> does not exist for layer %d"),
254 dissolve_key, field);
255 }
256 }
257 else
258 dissolve = FALSE;
259
260 /* Read categoy list */
261 cat_count = 0;
262 if (opt.list->answer != NULL) {
263 /* no file of categories to read, process cat list */
264 /* first check for valid list */
265 for (i = 0; opt.list->answers[i]; i++) {
266 G_debug(2, "catlist item: %s", opt.list->answers[i]);
267 if (!scan_cats(opt.list->answers[i], &x, &y))
268 G_fatal_error(_("Category value in '%s' not valid"),
269 opt.list->answers[i]);
270 }
271
272 /* valid list, put into cat value array */
273 for (i = 0; opt.list->answers[i]; i++) {
274 scan_cats(opt.list->answers[i], &x, &y);
275 while (x <= y)
276 add_cat(x++);
277 }
278 }
279 else if (opt.file->answer != NULL) { /* got a file of category numbers */
280 if (strcmp(opt.file->answer, "-") == 0) {
281 in = stdin;
282 }
283 else {
284 G_verbose_message(_("Process file <%s> for category numbers..."),
285 opt.file->answer);
286
287 /* open input file */
288 if ((in = fopen(opt.file->answer, "r")) == NULL)
289 G_fatal_error(_("Unable to open specified file <%s>"),
290 opt.file->answer);
291 }
292
293 while (TRUE) {
294 if (!fgets(buffr, 39, in))
295 break;
296 /* eliminate some white space, we accept numbers and dashes only */
297 G_chop(buffr);
298 sscanf(buffr, "%[-0-9]", text);
299 if (strlen(text) == 0)
300 G_warning(_("Ignored text entry: %s"), buffr);
301
302 scan_cats(text, &x, &y);
303 /* two BUGS here: - fgets stops if white space appears
304 - if white space appears, number is not read correctly */
305 while (x <= y && x >= 0 && y >= 0)
306 add_cat(x++);
307 }
308
309 if (strcmp(opt.file->answer, "-") != 0)
310 fclose(in);
311 }
312 else if (opt.where->answer != NULL) {
313 Fi = Vect_get_field(&In, field);
314 if (!Fi) {
315 G_fatal_error(_("Database connection not defined for layer <%s>"),
316 opt.field->answer);
317 }
318
319 G_verbose_message(_("Loading categories from table <%s>..."), Fi->table);
320
321 driver = db_start_driver_open_database(Fi->driver, Fi->database);
322 if (driver == NULL)
323 G_fatal_error(_("Unable to open database <%s> by driver <%s>"),
324 Fi->database, Fi->driver);
325
326 ncats = db_select_int(driver, Fi->table, Fi->key, opt.where->answer,
327 &cats);
328 if (ncats == -1)
329 G_fatal_error(_("Unable select records from table <%s>"),
330 Fi->table);
331 G_verbose_message(n_("%d category loaded",
332 "%d categories loaded",
333 ncats), ncats);
334
335 db_close_database(driver);
336 db_shutdown_driver(driver);
337
338 for (i = 0; i < ncats; i++)
339 add_cat(cats[i]);
340 if (ncats >= 0)
341 G_free(cats);
342 }
343 else if (opt.nrand->answer != NULL) { /* Generate random category list */
344 /* We operate on layer's CAT's and thus valid layer is required */
345 if (Vect_cidx_get_field_index(&In, field) < 0)
346 G_fatal_error(_("This map has no categories attached. "
347 "Use v.category to attach categories to "
348 "this vector map."));
349
350 /* Don't do any processing, if user input is wrong */
351 nrandom = atoi(opt.nrand->answer);
352 if (nrandom < 1)
353 G_fatal_error(_("Please specify random number larger than 0"));
354
355 nfeatures = Vect_cidx_get_type_count(&In, field, type);
356 if (nrandom >= nfeatures)
357 G_fatal_error(_("Random category count must be smaller than feature count. "
358 "There are only %d features of type(s): %s"),
359 nfeatures, opt.type->answer);
360
361 /* Let's create an array of uniq CAT values
362 According to Vlib/build.c, cidx should be already sorted by dig_cidx_sort() */
363 ci = &(In.plus.cidx[Vect_cidx_get_field_index(&In, field)]);
364 ucat_count = 0;
365 for (c = 0; c < ci->n_cats; c++) {
366 /* Bitwise AND compares ci feature type with user's requested types */
367 if (ci->cat[c][1] & type) {
368 /* Don't do anything if such value already exists */
369 if (ucat_count > 0 &&
370 ucat_array[ucat_count - 1] == ci->cat[c][0])
371 continue;
372 ucat_array =
373 G_realloc(ucat_array, (ucat_count + 1) * sizeof(int));
374 ucat_array[ucat_count] = ci->cat[c][0];
375 ucat_count++;
376 }
377 }
378
379 if (nrandom >= ucat_count)
380 G_fatal_error(_("Random category count is larger or equal to "
381 "uniq <%s> feature category count %d"),
382 opt.type->answer, ucat_count);
383
384 /* Initialise random number generator */
385 /* FIXME - allow seed to be specified for repeatability */
386 G_math_srand_auto();
387
388 /* Fill cat_array with list of valid random numbers */
389 while (cat_count < nrandom) {
390 /* Random number in range from 0 to largest CAT value */
391 prnd = (int) floor(G_math_rand() *
392 (ucat_array[ucat_count - 1] + 1));
393 qsort(cat_array, cat_count, sizeof(int), cmp);
394 /* Check if generated number isn't already in
395 final list and is in list of existing CATs */
396 if (bsearch(&prnd, cat_array, cat_count, sizeof(int), cmp) == NULL
397 && bsearch(&prnd, ucat_array, ucat_count, sizeof(int),
398 cmp) != NULL)
399 add_cat(prnd);
400 }
401 G_free(ucat_array);
402 qsort(cat_array, cat_count, sizeof(int), cmp);
403 }
404
405 if (Vect_open_new(&Out, output, Vect_is_3d(&In)) < 0)
406 G_fatal_error(_("Unable to create vector map <%s>"), output);
407
408 Vect_hist_copy(&In, &Out);
409 Vect_hist_command(&Out);
410
411 /* Read and write header info */
412 Vect_copy_head_data(&In, &Out);
413
414 G_message(_("Extracting features..."));
415
416 native = Vect_maptype(&Out) == GV_FORMAT_NATIVE;
417 if (!flag.t->answer && !native) {
418 /* Copy attributes for OGR output */
419 Vect_copy_map_dblinks(&In, &Out, TRUE);
420 }
421
422 extract_line(cat_count, cat_array, &In, &Out, new_cat, type,
423 dissolve, dissolve_key, field, type_only, flag.r->answer ? 1 : 0);
424
425 Vect_build(&Out);
426
427 /* Copy tables */
428 if (!flag.t->answer && native) {
429 copy_tabs(&In, field, new_cat, &Out);
430 }
431
432 Vect_close(&In);
433
434 /* remove duplicate centroids */
435 if (dissolve) {
436 int line, nlines, ltype, area;
437
438 G_message(_("Removing duplicate centroids..."));
439 nlines = Vect_get_num_lines(&Out);
440 for (line = 1; line <= nlines; line++) {
441 if (!Vect_line_alive(&Out, line))
442 continue; /* should not happen */
443
444 ltype = Vect_read_line(&Out, NULL, NULL, line);
445 if (!(ltype & GV_CENTROID)) {
446 continue;
447 }
448 area = Vect_get_centroid_area(&Out, line);
449
450 if (area < 0) {
451 Vect_delete_line(&Out, line);
452 }
453 }
454 Vect_build_partial(&Out, GV_BUILD_NONE);
455 Vect_build(&Out);
456 }
457
458 Vect_close(&Out);
459
460 exit(EXIT_SUCCESS);
461 }
462
add_cat(int x)463 void add_cat(int x)
464 {
465 G_debug(2, "add_cat %d", x);
466
467 if (cat_count >= cat_size) {
468 cat_size = (cat_size < 1000) ? 1000 : cat_size * 2;
469 cat_array = G_realloc(cat_array, cat_size * sizeof(int));
470 }
471
472 cat_array[cat_count++] = x;
473 }
474
scan_cats(char * s,int * x,int * y)475 int scan_cats(char *s, int *x, int *y)
476 {
477 char dummy[2];
478
479 if (strlen(s) == 0) { /* error */
480 *y = *x = -1;
481 return 1;
482 }
483
484 *dummy = 0;
485 if (sscanf(s, "%d-%d%1s", x, y, dummy) == 2)
486 return (*dummy == 0 && *x <= *y);
487
488 *dummy = 0;
489 if (sscanf(s, "%d%1s", x, dummy) == 1 && *dummy == 0) {
490 *y = *x;
491 return 1;
492 }
493 return 0;
494 }
495