1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <ctype.h>
5 #include <grass/gis.h>
6 #include <grass/dbmi.h>
7 #include <grass/vector.h>
8 #include <grass/glocale.h>
9 #include "local_proto.h"
10 
11 /* Determine if the string is integer, e.g. 123, +123, -123,
12  * return 1 if integer, 0 otherwise */
is_int(char * str)13 static int is_int(char *str)
14 {
15     char *tail;
16 
17     if (strtol(str, &tail, 10), tail == str || *tail != '\0') {
18 	/* doesn't look like a number,
19 	   or has extra characters after what looks to be a number */
20 	return 0;
21     }
22 
23     return 1;
24 }
25 
26 
27 /* Determine if the string is double, e.g. 123.456, +123.456, -123.456, 1.23456e2
28  * return 1 if double, 0 otherwise */
is_double(char * str)29 static int is_double(char *str)
30 {
31     char *tail;
32 
33     if (strtod(str, &tail), tail == str || *tail != '\0') {
34 	/* doesn't look like a number,
35 	   or has extra characters after what looks to be a number */
36 	return 0;
37     }
38 
39     return 1;
40 }
41 
42 
43 
44 /* Analyse points ascii file. Determine number of columns and column types.
45  * ascii_tmp: write copy of tempfile to ascii_tmp:
46  * rowlength: maximum row length
47  * ncolumns: number of columns
48  * minncolumns: minimum number of columns
49  * nrows: number of rows
50  * column_type: column types
51  * column_sample: values which was used to decide the type or NULLs
52  * column_length: column lengths (string only)
53  *
54  * If the who whole column is empty, column_sample will contain NULL
55  * for that given column.
56  */
57 
points_analyse(FILE * ascii_in,FILE * ascii,char * fs,char * td,int * rowlength,int * ncolumns,int * minncolumns,int * nrows,int ** column_type,char *** column_sample,int ** column_length,int skip_lines,int xcol,int ycol,int zcol,int catcol,int region_flag,int ignore_flag)58 int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
59 		   int *rowlength, int *ncolumns, int *minncolumns,
60 		   int *nrows, int **column_type, char ***column_sample,
61 		   int **column_length,
62 		   int skip_lines, int xcol, int ycol, int zcol, int catcol,
63 		   int region_flag, int ignore_flag)
64 {
65     int i;
66     int buflen;			/* buffer length */
67     char *buf;			/* buffer */
68     int row = 1;		/* line number, first is 1 */
69     int ncols = 0;		/* number of columns */
70     int minncols = -1;
71     int *coltype = NULL;	/* column types */
72     char **colsample = NULL;	/* column samples */
73     int *collen = NULL;		/* column lengths */
74     char **tokens;
75     int ntokens;		/* number of tokens */
76     int len, rowlen = 0;	/* maximum row length */
77     struct Cell_head window;
78     double northing = .0;
79     double easting = .0;
80     char *xtoken, *ytoken, *sav_buf;
81     int skip = FALSE, skipped = 0;
82 
83     buflen = 4000;
84     buf = (char *)G_malloc(buflen);
85     xtoken = (char *)G_malloc(256);
86     ytoken = (char *)G_malloc(256);
87 
88     G_message(_("Scanning input for column types..."));
89     /* fetch projection for LatLong test */
90     G_get_window(&window);
91 
92     /* points_to_bin() would be faster if we would write out
93      * clean data to ascii
94      * points_to_bin() would then not need G_chop() and
95      * for latlon not G_scan_[easting|northing]() */
96 
97     while (1) {
98 	len = 0;		/* not really needed, but what the heck */
99 	skip = FALSE;		/* reset out-of-region check */
100 	sav_buf = NULL;
101 
102 	if (G_getl2(buf, buflen - 1, ascii_in) == 0)
103 	    break;		/* EOF */
104 
105 	if (row <= skip_lines) {
106 	    G_debug(3, "skipping header row %d : %d chars", row,
107 		    (int)strlen(buf));
108 	    /* this fn is read-only, write to hist with points_to_bin() */
109 	    fprintf(ascii, "%s\n", buf);
110 	    len = strlen(buf) + 1;
111 	    if (len > rowlen)
112 		rowlen = len;
113 
114 	    row++;
115 	    continue;
116 	}
117 
118 	if ((buf[0] == '#') || (buf[0] == '\0')) {
119 	    G_debug(3, "skipping comment row %d : %d chars", row,
120 		    (int)strlen(buf));
121 	    continue;
122 	}
123 
124 	/* no G_chop() as first/last column may be empty fs=tab value */
125 	G_debug(3, "row %d : %d chars", row, (int)strlen(buf));
126 
127 	tokens = G_tokenize2(buf, fs, td);
128 	ntokens = G_number_of_tokens(tokens);
129 	if (ntokens == 0) {
130 	    continue;
131 	}
132 
133 	if (ncols > 0 && ntokens != ncols) {
134 	    /* these rows can not be imported into the attribute table */
135 	    if (ignore_flag) {
136 		G_warning(_("Row %d: '%s' can not be imported into the attribute table"),
137 		          row, buf);
138 	    }
139 	    else {
140 		G_warning(_("Expected %d columns, found %d columns"), ncols, ntokens);
141 		G_fatal_error(_("Broken row %d: '%s'"), row, buf);
142 	    }
143 	}
144 	if (xcol >= ntokens || ycol >= ntokens || zcol >= ntokens ||
145 	    catcol >= ntokens) {
146 	    if (ignore_flag) {
147 		G_debug(3, "Skipping broken row %d: '%s'", row, buf);
148 		continue;
149 	    }
150 	    else {
151 		G_warning(_("ntokens: %d, xcol: %d, ycol: %d, zcol: %d"), xcol, ycol, zcol);
152 		G_fatal_error(_("Broken row %d: '%s'"), row, buf);
153 	    }
154 	}
155 
156 	len = strlen(buf) + 1;
157 	if (len > rowlen)
158 	    rowlen = len;
159 
160 	if (ntokens > ncols) {
161 	    coltype = (int *)G_realloc(coltype, ntokens * sizeof(int));
162 	    colsample = (char **)G_realloc(colsample, ntokens * sizeof(char *));
163 	    collen = (int *)G_realloc(collen, ntokens * sizeof(int));
164 	    for (i = ncols; i < ntokens; i++) {
165 		coltype[i] = DB_C_TYPE_INT;	/* default type */
166 		/* We store a value later if column is not empty. */
167 		colsample[i] = NULL;
168 		collen[i] = 0;
169 	    }
170 	    ncols = ntokens;
171 	}
172 
173 	if (minncols == -1 || minncols > ntokens)
174 	    minncols = ntokens;
175 
176 	/* Determine column types */
177 	for (i = 0; i < ntokens; i++) {
178 	    G_chop(tokens[i]);
179 	    if ((G_projection() == PROJECTION_LL)) {
180 		if (i == xcol || i == ycol) {
181 		    if (i == 0) {	/* Save position of original internal token buffer */
182 			/* Prevent memory leaks */
183 			sav_buf = tokens[0];
184 		    }
185 		    /* check if coordinates are DMS or decimal or not latlong at all */
186 		    if (i == xcol) {
187 			if (G_scan_easting(tokens[i], &easting, window.proj)) {
188 			    G_debug(4, "is_latlong east: %g", easting);
189 			    sprintf(xtoken, "%.15g", easting);
190 			    /* replace current DMS token by decimal degree */
191 			    tokens[i] = xtoken;
192 			    if (region_flag) {
193 				if ((window.east < easting) ||
194 				    (window.west > easting))
195 				    skip = TRUE;
196 			    }
197 			}
198 			else {
199                             fprintf(stderr, _("Current row %d:\n%s\n"), row, buf);
200 			    G_fatal_error(_("Unparsable longitude value in column %d: %s"),
201 					  i + 1, tokens[i]);
202 			}
203 		    }
204 
205 		    if (i == ycol) {
206 			if (G_scan_northing(tokens[i], &northing, window.proj)) {
207 			    G_debug(4, "is_latlong north: %g", northing);
208 			    sprintf(ytoken, "%.15g", northing);
209 			    /* replace current DMS token by decimal degree */
210 			    tokens[i] = ytoken;
211 			    if (region_flag) {
212 				if ((window.north < northing) ||
213 				    (window.south > northing))
214 				    skip = TRUE;
215 			    }
216 			}
217 			else {
218 			    fprintf(stderr, _("Current row %d:\n%s\n"), row, buf);
219 			    G_fatal_error(_("Unparsable latitude value in column %d: %s"),
220 					  i + 1, tokens[i]);
221 			}
222 		    }
223 		}		/* if (x or y) */
224 	    }			/* PROJECTION_LL */
225 	    else {
226 		if (strlen(tokens[i]) == 0) {
227 		    if (i == xcol) {
228 			G_fatal_error(_("Unparsable longitude value in column %d: %s"),
229 				      i + 1, tokens[i]);
230 		    }
231 		    if (i == ycol) {
232 			G_fatal_error(_("Unparsable latitude value in column %d: %s"),
233 				      i + 1, tokens[i]);
234 		    }
235 		}
236 		if (region_flag) {
237 		    /* consider z range if -z flag is used? */
238 		    /* change to if(>= east,north){skip=1;} to allow correct tiling */
239 		    /* don't "continue;" so multiple passes will have the
240 		       same column types and length for patching */
241 		    if (i == xcol) {
242 			easting = atof(tokens[i]);
243 			if ((window.east < easting) ||
244 			    (window.west > easting))
245 			    skip = TRUE;
246 		    }
247 		    if (i == ycol) {
248 			northing = atof(tokens[i]);
249 			if ((window.north < northing) ||
250 			    (window.south > northing))
251 			    skip = TRUE;
252 		    }
253 		}
254 	    }
255 
256 	    len = strlen(tokens[i]);
257 	    /* do not guess column type for missing values */
258 	    /* continue here ensures that we preserve NULLs in
259 	     * colsample for (completely) empty columns (which, however,
260 	     * should probably default to string rather than int). */
261 	    if (len == 0)
262 		continue;
263 
264 	    G_debug(4, "row %d col %d: '%s' is_int = %d is_double = %d",
265 		    row + 1, i + 1, tokens[i], is_int(tokens[i]),
266 		    is_double(tokens[i]));
267 
268 	    if (is_int(tokens[i])) {
269 		/* We store the first encountered value for integers.
270 		 * Rest is for consistency. */
271 		if (!colsample[i] || coltype[i] != DB_C_TYPE_INT) {
272 		    G_free(colsample[i]);
273 		    colsample[i] = G_store(tokens[i]);
274 		}
275 		continue;	/* integer */
276 	    }
277 	    if (is_double(tokens[i])) {	/* double */
278 		if (coltype[i] == DB_C_TYPE_INT) {
279 		    coltype[i] = DB_C_TYPE_DOUBLE;
280 		    G_free(colsample[i]);
281 		    colsample[i] = G_store(tokens[i]);
282 		}
283 		continue;
284 	    }
285 	    /* string */
286 	    if (coltype[i] != DB_C_TYPE_STRING) {
287 		/* Only set type if not already set to store the field
288 		 * only once and to show the first encountered item. */
289 		coltype[i] = DB_C_TYPE_STRING;
290 		G_free(colsample[i]);
291 		colsample[i] = G_store(tokens[i]);
292 	    }
293 	    if (len > collen[i])
294 		collen[i] = len;
295 	}
296 
297 	/* write dataline to tmp file */
298 	if (!skip)
299 	    fprintf(ascii, "%s\n", buf);
300 	else
301 	    skipped++;
302 
303 	if (sav_buf != NULL) {
304 	    /* Restore original token buffer so free_tokens works */
305 	    /* Only do this if tokens[0] was re-assigned */
306 	    tokens[0] = sav_buf;
307 	    sav_buf = NULL;
308 	}
309 
310 	G_free_tokens(tokens);
311 	row++;
312     }
313 
314     *rowlength = rowlen;
315     *ncolumns = ncols;
316     *minncolumns = minncols;
317     *column_type = coltype;
318     *column_sample = colsample;
319     *column_length = collen;
320     *nrows = row - 1;		/* including skipped lines */
321 
322     G_free(buf);
323     G_free(xtoken);
324     G_free(ytoken);
325 
326     if (region_flag)
327 	G_message(n_("Skipping %d of %d row falling outside of current region",
328                      "Skipping %d of %d rows falling outside of current region",
329                      row - 1),
330 		  skipped, row - 1);
331 
332     return 0;
333 }
334 
335 
336 /* Import points from ascii file.
337  *
338  * fs: field separator
339  * xcol, ycol, zcol, catcol: x,y,z,cat column in input file, first column is 1,
340  *                            zcol and catcol may be 0 (do not use)
341  * rowlen: maximum row length
342  * Note: column types (both in header or coldef) must be supported by driver
343  */
points_to_bin(FILE * ascii,int rowlen,struct Map_info * Map,dbDriver * driver,char * table,char * fs,char * td,int nrows,int * coltype,int xcol,int ycol,int zcol,int catcol,int skip_lines)344 int points_to_bin(FILE * ascii, int rowlen, struct Map_info *Map,
345 		  dbDriver * driver, char *table, char *fs, char *td,
346 		  int nrows, int *coltype, int xcol, int ycol, int zcol,
347 		  int catcol, int skip_lines)
348 {
349     char *buf, buf2[4000];
350     int cat = 0;
351     int row = 0;
352     struct line_pnts *Points;
353     struct line_cats *Cats;
354     dbString sql, val;
355     struct Cell_head window;
356 
357     G_message(_("Importing points..."));
358     /* fetch projection for LatLong test */
359     G_get_window(&window);
360 
361     rewind(ascii);
362     Points = Vect_new_line_struct();
363     Cats = Vect_new_cats_struct();
364 
365     /* actually last 2 characters won't be read */
366     buf = (char *)G_malloc(rowlen + 2);
367     db_init_string(&sql);
368     db_init_string(&val);
369 
370     if (skip_lines > 0) {
371 	sprintf(buf2, "HEADER: (%d lines)\n", skip_lines);
372 	Vect_hist_write(Map, buf2);
373     }
374 
375     /* rowlen + 2 to read till the end of line on both UNIX and Windows */
376     while (G_getl2(buf, rowlen + 2, ascii) != 0) {
377 	int i, len;
378 	double x, y, z;
379 	char **tokens;
380 	int ntokens;		/* number of tokens */
381 
382 	G_percent(row, nrows, 2);
383 	row++;
384 
385 	if (row <= skip_lines) {
386 	    G_debug(4, "writing skip line %d to hist : %d chars", row,
387 		    (int)strlen(buf));
388 	    Vect_hist_write(Map, buf);
389 	    Vect_hist_write(Map, "\n");
390 	    continue;
391 	}
392 
393 	len = strlen(buf);
394 	if (len == 0)
395 	    continue;		/* should not happen */
396 
397 	G_debug(4, "row: %s", buf);
398 
399 	tokens = G_tokenize2(buf, fs, td);
400 	ntokens = G_number_of_tokens(tokens);
401 
402 	G_chop(tokens[xcol]);
403 	G_chop(tokens[ycol]);
404 
405 	if ((G_projection() == PROJECTION_LL)) {
406 	    G_scan_easting(tokens[xcol], &x, window.proj);
407 	    G_scan_northing(tokens[ycol], &y, window.proj);
408 	}
409 	else {
410 	    x = atof(tokens[xcol]);
411 	    y = atof(tokens[ycol]);
412 	}
413 	G_debug(4, "x: %f, y: %f", x, y);
414 
415 	if (zcol >= 0) {
416 	    G_chop(tokens[zcol]);
417 	    z = atof(tokens[zcol]);
418 	}
419 	else
420 	    z = 0.0;
421 
422 	if (catcol >= 0) {
423 	    G_chop(tokens[catcol]);
424 	    cat = atof(tokens[catcol]);
425 	}
426 	else
427 	    cat++;
428 
429 	Vect_reset_line(Points);
430 	Vect_reset_cats(Cats);
431 
432 	Vect_append_point(Points, x, y, z);
433 	Vect_cat_set(Cats, 1, cat);
434 
435 	Vect_write_line(Map, GV_POINT, Points, Cats);
436 
437 	/* Attributes */
438 	if (driver) {
439 	    sprintf(buf2, "insert into %s values ( ", table);
440 	    db_set_string(&sql, buf2);
441 
442 	    if (catcol < 0) {
443 		sprintf(buf2, "%d, ", cat);
444 		db_append_string(&sql, buf2);
445 	    }
446 
447 	    for (i = 0; i < ntokens; i++) {
448 		G_chop(tokens[i]);
449 		if (i > 0)
450 		    db_append_string(&sql, ", ");
451 
452 		if (strlen(tokens[i]) > 0) {
453 		    if (coltype[i] == DB_C_TYPE_INT ||
454 			coltype[i] == DB_C_TYPE_DOUBLE) {
455 			if (G_projection() == PROJECTION_LL &&
456 			    (i == xcol || i == ycol)) {
457 			    if (i == xcol)
458 				sprintf(buf2, "%.15g", x);
459 			    else
460 				sprintf(buf2, "%.15g", y);
461 			}
462 			else
463 			    sprintf(buf2, "%s", tokens[i]);
464 		    }
465 		    else {
466 			db_set_string(&val, tokens[i]);
467 			/* TODO: strip leading and trailing "quotes" from input string */
468 			db_double_quote_string(&val);
469 			sprintf(buf2, "'%s'", db_get_string(&val));
470 		    }
471 		}
472 		else {
473 		    sprintf(buf2, "null");
474 		}
475 		db_append_string(&sql, buf2);
476 	    }
477 	    db_append_string(&sql, ")");
478 	    G_debug(3, "%s", db_get_string(&sql));
479 
480 	    if (db_execute_immediate(driver, &sql) != DB_OK) {
481 		G_fatal_error(_("Unable to insert new record: %s"),
482 			      db_get_string(&sql));
483 	    }
484 	}
485 
486 	G_free_tokens(tokens);
487     }
488     G_percent(nrows, nrows, 2);
489 
490     return 0;
491 }
492