1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <ctype.h>
5 #include <grass/gis.h>
6 #include <grass/dbmi.h>
7 #include <grass/vector.h>
8 #include <grass/glocale.h>
9 #include "local_proto.h"
10
11 /* Determine if the string is integer, e.g. 123, +123, -123,
12 * return 1 if integer, 0 otherwise */
is_int(char * str)13 static int is_int(char *str)
14 {
15 char *tail;
16
17 if (strtol(str, &tail, 10), tail == str || *tail != '\0') {
18 /* doesn't look like a number,
19 or has extra characters after what looks to be a number */
20 return 0;
21 }
22
23 return 1;
24 }
25
26
27 /* Determine if the string is double, e.g. 123.456, +123.456, -123.456, 1.23456e2
28 * return 1 if double, 0 otherwise */
is_double(char * str)29 static int is_double(char *str)
30 {
31 char *tail;
32
33 if (strtod(str, &tail), tail == str || *tail != '\0') {
34 /* doesn't look like a number,
35 or has extra characters after what looks to be a number */
36 return 0;
37 }
38
39 return 1;
40 }
41
42
43
44 /* Analyse points ascii file. Determine number of columns and column types.
45 * ascii_tmp: write copy of tempfile to ascii_tmp:
46 * rowlength: maximum row length
47 * ncolumns: number of columns
48 * minncolumns: minimum number of columns
49 * nrows: number of rows
50 * column_type: column types
51 * column_sample: values which was used to decide the type or NULLs
52 * column_length: column lengths (string only)
53 *
54 * If the who whole column is empty, column_sample will contain NULL
55 * for that given column.
56 */
57
points_analyse(FILE * ascii_in,FILE * ascii,char * fs,char * td,int * rowlength,int * ncolumns,int * minncolumns,int * nrows,int ** column_type,char *** column_sample,int ** column_length,int skip_lines,int xcol,int ycol,int zcol,int catcol,int region_flag,int ignore_flag)58 int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
59 int *rowlength, int *ncolumns, int *minncolumns,
60 int *nrows, int **column_type, char ***column_sample,
61 int **column_length,
62 int skip_lines, int xcol, int ycol, int zcol, int catcol,
63 int region_flag, int ignore_flag)
64 {
65 int i;
66 int buflen; /* buffer length */
67 char *buf; /* buffer */
68 int row = 1; /* line number, first is 1 */
69 int ncols = 0; /* number of columns */
70 int minncols = -1;
71 int *coltype = NULL; /* column types */
72 char **colsample = NULL; /* column samples */
73 int *collen = NULL; /* column lengths */
74 char **tokens;
75 int ntokens; /* number of tokens */
76 int len, rowlen = 0; /* maximum row length */
77 struct Cell_head window;
78 double northing = .0;
79 double easting = .0;
80 char *xtoken, *ytoken, *sav_buf;
81 int skip = FALSE, skipped = 0;
82
83 buflen = 4000;
84 buf = (char *)G_malloc(buflen);
85 xtoken = (char *)G_malloc(256);
86 ytoken = (char *)G_malloc(256);
87
88 G_message(_("Scanning input for column types..."));
89 /* fetch projection for LatLong test */
90 G_get_window(&window);
91
92 /* points_to_bin() would be faster if we would write out
93 * clean data to ascii
94 * points_to_bin() would then not need G_chop() and
95 * for latlon not G_scan_[easting|northing]() */
96
97 while (1) {
98 len = 0; /* not really needed, but what the heck */
99 skip = FALSE; /* reset out-of-region check */
100 sav_buf = NULL;
101
102 if (G_getl2(buf, buflen - 1, ascii_in) == 0)
103 break; /* EOF */
104
105 if (row <= skip_lines) {
106 G_debug(3, "skipping header row %d : %d chars", row,
107 (int)strlen(buf));
108 /* this fn is read-only, write to hist with points_to_bin() */
109 fprintf(ascii, "%s\n", buf);
110 len = strlen(buf) + 1;
111 if (len > rowlen)
112 rowlen = len;
113
114 row++;
115 continue;
116 }
117
118 if ((buf[0] == '#') || (buf[0] == '\0')) {
119 G_debug(3, "skipping comment row %d : %d chars", row,
120 (int)strlen(buf));
121 continue;
122 }
123
124 /* no G_chop() as first/last column may be empty fs=tab value */
125 G_debug(3, "row %d : %d chars", row, (int)strlen(buf));
126
127 tokens = G_tokenize2(buf, fs, td);
128 ntokens = G_number_of_tokens(tokens);
129 if (ntokens == 0) {
130 continue;
131 }
132
133 if (ncols > 0 && ntokens != ncols) {
134 /* these rows can not be imported into the attribute table */
135 if (ignore_flag) {
136 G_warning(_("Row %d: '%s' can not be imported into the attribute table"),
137 row, buf);
138 }
139 else {
140 G_warning(_("Expected %d columns, found %d columns"), ncols, ntokens);
141 G_fatal_error(_("Broken row %d: '%s'"), row, buf);
142 }
143 }
144 if (xcol >= ntokens || ycol >= ntokens || zcol >= ntokens ||
145 catcol >= ntokens) {
146 if (ignore_flag) {
147 G_debug(3, "Skipping broken row %d: '%s'", row, buf);
148 continue;
149 }
150 else {
151 G_warning(_("ntokens: %d, xcol: %d, ycol: %d, zcol: %d"), xcol, ycol, zcol);
152 G_fatal_error(_("Broken row %d: '%s'"), row, buf);
153 }
154 }
155
156 len = strlen(buf) + 1;
157 if (len > rowlen)
158 rowlen = len;
159
160 if (ntokens > ncols) {
161 coltype = (int *)G_realloc(coltype, ntokens * sizeof(int));
162 colsample = (char **)G_realloc(colsample, ntokens * sizeof(char *));
163 collen = (int *)G_realloc(collen, ntokens * sizeof(int));
164 for (i = ncols; i < ntokens; i++) {
165 coltype[i] = DB_C_TYPE_INT; /* default type */
166 /* We store a value later if column is not empty. */
167 colsample[i] = NULL;
168 collen[i] = 0;
169 }
170 ncols = ntokens;
171 }
172
173 if (minncols == -1 || minncols > ntokens)
174 minncols = ntokens;
175
176 /* Determine column types */
177 for (i = 0; i < ntokens; i++) {
178 G_chop(tokens[i]);
179 if ((G_projection() == PROJECTION_LL)) {
180 if (i == xcol || i == ycol) {
181 if (i == 0) { /* Save position of original internal token buffer */
182 /* Prevent memory leaks */
183 sav_buf = tokens[0];
184 }
185 /* check if coordinates are DMS or decimal or not latlong at all */
186 if (i == xcol) {
187 if (G_scan_easting(tokens[i], &easting, window.proj)) {
188 G_debug(4, "is_latlong east: %g", easting);
189 sprintf(xtoken, "%.15g", easting);
190 /* replace current DMS token by decimal degree */
191 tokens[i] = xtoken;
192 if (region_flag) {
193 if ((window.east < easting) ||
194 (window.west > easting))
195 skip = TRUE;
196 }
197 }
198 else {
199 fprintf(stderr, _("Current row %d:\n%s\n"), row, buf);
200 G_fatal_error(_("Unparsable longitude value in column %d: %s"),
201 i + 1, tokens[i]);
202 }
203 }
204
205 if (i == ycol) {
206 if (G_scan_northing(tokens[i], &northing, window.proj)) {
207 G_debug(4, "is_latlong north: %g", northing);
208 sprintf(ytoken, "%.15g", northing);
209 /* replace current DMS token by decimal degree */
210 tokens[i] = ytoken;
211 if (region_flag) {
212 if ((window.north < northing) ||
213 (window.south > northing))
214 skip = TRUE;
215 }
216 }
217 else {
218 fprintf(stderr, _("Current row %d:\n%s\n"), row, buf);
219 G_fatal_error(_("Unparsable latitude value in column %d: %s"),
220 i + 1, tokens[i]);
221 }
222 }
223 } /* if (x or y) */
224 } /* PROJECTION_LL */
225 else {
226 if (strlen(tokens[i]) == 0) {
227 if (i == xcol) {
228 G_fatal_error(_("Unparsable longitude value in column %d: %s"),
229 i + 1, tokens[i]);
230 }
231 if (i == ycol) {
232 G_fatal_error(_("Unparsable latitude value in column %d: %s"),
233 i + 1, tokens[i]);
234 }
235 }
236 if (region_flag) {
237 /* consider z range if -z flag is used? */
238 /* change to if(>= east,north){skip=1;} to allow correct tiling */
239 /* don't "continue;" so multiple passes will have the
240 same column types and length for patching */
241 if (i == xcol) {
242 easting = atof(tokens[i]);
243 if ((window.east < easting) ||
244 (window.west > easting))
245 skip = TRUE;
246 }
247 if (i == ycol) {
248 northing = atof(tokens[i]);
249 if ((window.north < northing) ||
250 (window.south > northing))
251 skip = TRUE;
252 }
253 }
254 }
255
256 len = strlen(tokens[i]);
257 /* do not guess column type for missing values */
258 /* continue here ensures that we preserve NULLs in
259 * colsample for (completely) empty columns (which, however,
260 * should probably default to string rather than int). */
261 if (len == 0)
262 continue;
263
264 G_debug(4, "row %d col %d: '%s' is_int = %d is_double = %d",
265 row + 1, i + 1, tokens[i], is_int(tokens[i]),
266 is_double(tokens[i]));
267
268 if (is_int(tokens[i])) {
269 /* We store the first encountered value for integers.
270 * Rest is for consistency. */
271 if (!colsample[i] || coltype[i] != DB_C_TYPE_INT) {
272 G_free(colsample[i]);
273 colsample[i] = G_store(tokens[i]);
274 }
275 continue; /* integer */
276 }
277 if (is_double(tokens[i])) { /* double */
278 if (coltype[i] == DB_C_TYPE_INT) {
279 coltype[i] = DB_C_TYPE_DOUBLE;
280 G_free(colsample[i]);
281 colsample[i] = G_store(tokens[i]);
282 }
283 continue;
284 }
285 /* string */
286 if (coltype[i] != DB_C_TYPE_STRING) {
287 /* Only set type if not already set to store the field
288 * only once and to show the first encountered item. */
289 coltype[i] = DB_C_TYPE_STRING;
290 G_free(colsample[i]);
291 colsample[i] = G_store(tokens[i]);
292 }
293 if (len > collen[i])
294 collen[i] = len;
295 }
296
297 /* write dataline to tmp file */
298 if (!skip)
299 fprintf(ascii, "%s\n", buf);
300 else
301 skipped++;
302
303 if (sav_buf != NULL) {
304 /* Restore original token buffer so free_tokens works */
305 /* Only do this if tokens[0] was re-assigned */
306 tokens[0] = sav_buf;
307 sav_buf = NULL;
308 }
309
310 G_free_tokens(tokens);
311 row++;
312 }
313
314 *rowlength = rowlen;
315 *ncolumns = ncols;
316 *minncolumns = minncols;
317 *column_type = coltype;
318 *column_sample = colsample;
319 *column_length = collen;
320 *nrows = row - 1; /* including skipped lines */
321
322 G_free(buf);
323 G_free(xtoken);
324 G_free(ytoken);
325
326 if (region_flag)
327 G_message(n_("Skipping %d of %d row falling outside of current region",
328 "Skipping %d of %d rows falling outside of current region",
329 row - 1),
330 skipped, row - 1);
331
332 return 0;
333 }
334
335
336 /* Import points from ascii file.
337 *
338 * fs: field separator
339 * xcol, ycol, zcol, catcol: x,y,z,cat column in input file, first column is 1,
340 * zcol and catcol may be 0 (do not use)
341 * rowlen: maximum row length
342 * Note: column types (both in header or coldef) must be supported by driver
343 */
points_to_bin(FILE * ascii,int rowlen,struct Map_info * Map,dbDriver * driver,char * table,char * fs,char * td,int nrows,int * coltype,int xcol,int ycol,int zcol,int catcol,int skip_lines)344 int points_to_bin(FILE * ascii, int rowlen, struct Map_info *Map,
345 dbDriver * driver, char *table, char *fs, char *td,
346 int nrows, int *coltype, int xcol, int ycol, int zcol,
347 int catcol, int skip_lines)
348 {
349 char *buf, buf2[4000];
350 int cat = 0;
351 int row = 0;
352 struct line_pnts *Points;
353 struct line_cats *Cats;
354 dbString sql, val;
355 struct Cell_head window;
356
357 G_message(_("Importing points..."));
358 /* fetch projection for LatLong test */
359 G_get_window(&window);
360
361 rewind(ascii);
362 Points = Vect_new_line_struct();
363 Cats = Vect_new_cats_struct();
364
365 /* actually last 2 characters won't be read */
366 buf = (char *)G_malloc(rowlen + 2);
367 db_init_string(&sql);
368 db_init_string(&val);
369
370 if (skip_lines > 0) {
371 sprintf(buf2, "HEADER: (%d lines)\n", skip_lines);
372 Vect_hist_write(Map, buf2);
373 }
374
375 /* rowlen + 2 to read till the end of line on both UNIX and Windows */
376 while (G_getl2(buf, rowlen + 2, ascii) != 0) {
377 int i, len;
378 double x, y, z;
379 char **tokens;
380 int ntokens; /* number of tokens */
381
382 G_percent(row, nrows, 2);
383 row++;
384
385 if (row <= skip_lines) {
386 G_debug(4, "writing skip line %d to hist : %d chars", row,
387 (int)strlen(buf));
388 Vect_hist_write(Map, buf);
389 Vect_hist_write(Map, "\n");
390 continue;
391 }
392
393 len = strlen(buf);
394 if (len == 0)
395 continue; /* should not happen */
396
397 G_debug(4, "row: %s", buf);
398
399 tokens = G_tokenize2(buf, fs, td);
400 ntokens = G_number_of_tokens(tokens);
401
402 G_chop(tokens[xcol]);
403 G_chop(tokens[ycol]);
404
405 if ((G_projection() == PROJECTION_LL)) {
406 G_scan_easting(tokens[xcol], &x, window.proj);
407 G_scan_northing(tokens[ycol], &y, window.proj);
408 }
409 else {
410 x = atof(tokens[xcol]);
411 y = atof(tokens[ycol]);
412 }
413 G_debug(4, "x: %f, y: %f", x, y);
414
415 if (zcol >= 0) {
416 G_chop(tokens[zcol]);
417 z = atof(tokens[zcol]);
418 }
419 else
420 z = 0.0;
421
422 if (catcol >= 0) {
423 G_chop(tokens[catcol]);
424 cat = atof(tokens[catcol]);
425 }
426 else
427 cat++;
428
429 Vect_reset_line(Points);
430 Vect_reset_cats(Cats);
431
432 Vect_append_point(Points, x, y, z);
433 Vect_cat_set(Cats, 1, cat);
434
435 Vect_write_line(Map, GV_POINT, Points, Cats);
436
437 /* Attributes */
438 if (driver) {
439 sprintf(buf2, "insert into %s values ( ", table);
440 db_set_string(&sql, buf2);
441
442 if (catcol < 0) {
443 sprintf(buf2, "%d, ", cat);
444 db_append_string(&sql, buf2);
445 }
446
447 for (i = 0; i < ntokens; i++) {
448 G_chop(tokens[i]);
449 if (i > 0)
450 db_append_string(&sql, ", ");
451
452 if (strlen(tokens[i]) > 0) {
453 if (coltype[i] == DB_C_TYPE_INT ||
454 coltype[i] == DB_C_TYPE_DOUBLE) {
455 if (G_projection() == PROJECTION_LL &&
456 (i == xcol || i == ycol)) {
457 if (i == xcol)
458 sprintf(buf2, "%.15g", x);
459 else
460 sprintf(buf2, "%.15g", y);
461 }
462 else
463 sprintf(buf2, "%s", tokens[i]);
464 }
465 else {
466 db_set_string(&val, tokens[i]);
467 /* TODO: strip leading and trailing "quotes" from input string */
468 db_double_quote_string(&val);
469 sprintf(buf2, "'%s'", db_get_string(&val));
470 }
471 }
472 else {
473 sprintf(buf2, "null");
474 }
475 db_append_string(&sql, buf2);
476 }
477 db_append_string(&sql, ")");
478 G_debug(3, "%s", db_get_string(&sql));
479
480 if (db_execute_immediate(driver, &sql) != DB_OK) {
481 G_fatal_error(_("Unable to insert new record: %s"),
482 db_get_string(&sql));
483 }
484 }
485
486 G_free_tokens(tokens);
487 }
488 G_percent(nrows, nrows, 2);
489
490 return 0;
491 }
492