1 /*--------------------------------------------------------------------
2 *
3 * Copyright (c) 1991-2021 by the GMT Team (https://www.generic-mapping-tools.org/team.html)
4 * See LICENSE.TXT file for copying and redistribution conditions.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; version 3 or any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * Contact info: www.generic-mapping-tools.org
16 *--------------------------------------------------------------------*/
17 /*
18 * Table input/output in GMT can be either ASCII or binary (where supported)
19 * and ASCII tables may consist of single or multiple segments. When the
20 * latter is the case usually there is a -M option to signal this case.
21 * The structure GMT_IO holds parameters that are used during the reading
22 * and processing of ASCII tables.
23 *
24 * Author: Paul Wessel
25 * Date: 15-NOV-2009
26 * Version: 6 API
27 *
28 */
29
30 /*!
31 * \file gmt_io.h
32 * \brief
33 */
34
35 #ifndef GMT_IO_H
36 #define GMT_IO_H
37
38 #ifdef HAVE_SETLOCALE
39 # include <locale.h>
40 #endif
41
__gmt_token_separators(unsigned int skip_comma)42 static inline const char* __gmt_token_separators (unsigned int skip_comma) {
43 static const char separators[] = ",; \t";
44 #ifdef HAVE_SETLOCALE
45 struct lconv *lc = localeconv();
46 if (skip_comma || (strcmp (lc->decimal_point, ",") == 0) )
47 return separators + 1; /* Omit comma */
48 #endif
49 return separators;
50 }
51 #define GMT_TOKEN_SEPARATORS __gmt_token_separators(0) /* Data columns may be separated by any of these characters */
52 #define GMT_TOKEN_SEPARATORS_PSTEXT __gmt_token_separators(1) /* No comma if pstext and fonts are in input records */
53
54 /* Must add M, m, E, Z, and/or S to the common option processing list */
55 #define GMT_OPT(opt) opt
56
57 /*! Three different i/o status: unused, actively using, or used */
58 enum GMT_enum_status {
59 GMT_IS_UNUSED = 0, /* We have not yet read from/written to this resource */
60 GMT_IS_USING, /* Means we have started reading from/writing to this file */
61 GMT_IS_USED}; /* Means we are done reading from/writing to this file */
62
63 /*! There are three GMT/OGR status values */
64 enum GMT_ogr_status {
65 GMT_OGR_UNKNOWN = -1, /* We have not parsed enough records to know yet */
66 GMT_OGR_FALSE, /* This is NOT a GMT/OGR file */
67 GMT_OGR_TRUE}; /* This is a GMT/OGR file */
68
69 #define gmt_M_polygon_is_hole(S) (S->pol_mode == GMT_IS_HOLE || (S->ogr && S->ogr->pol_mode == GMT_IS_HOLE))
70
71 /*! Codes for aspatial association with segment header options: */
72 enum GMT_enum_segopt {
73 /*! -D */ GMT_IS_D = -1,
74 /*! -G */ GMT_IS_G = -2,
75 /*! -I */ GMT_IS_I = -3,
76 /*! -L */ GMT_IS_L = -4,
77 /*! -T */ GMT_IS_T = -5,
78 /*! -W */ GMT_IS_W = -6,
79 /*! -Z */ GMT_IS_Z = -7};
80
81 /* Macros to simplify check for return status */
82 #define gmt_M_rec_is_table_header(C) (C->current.io.status & GMT_IO_TABLE_HEADER)
83 #define gmt_M_rec_is_segment_header(C) (C->current.io.status & GMT_IO_SEGMENT_HEADER)
84 #define gmt_M_rec_is_any_header(C) (C->current.io.status & GMT_IO_ANY_HEADER)
85 #define gmt_M_rec_is_error(C) (C->current.io.status & GMT_IO_MISMATCH)
86 #define gmt_M_rec_is_eof(C) (C->current.io.status & GMT_IO_EOF)
87 #define gmt_M_rec_is_nan(C) (C->current.io.status & GMT_IO_NAN)
88 #define gmt_M_rec_is_gap(C) (C->current.io.status & GMT_IO_GAP)
89 #define gmt_M_rec_is_new_segment(C) (C->current.io.status & GMT_IO_NEW_SEGMENT)
90 #define gmt_M_rec_is_line_break(C) (C->current.io.status & GMT_IO_LINE_BREAK)
91 #define gmt_M_rec_is_file_break(C) (C->current.io.status & GMT_IO_NEXT_FILE)
92 #define gmt_M_rec_is_data(C) (C->current.io.status == 0 || C->current.io.status == GMT_IO_NAN)
93
94 /* Get current setting for in/out columns */
95
96 /*! Types of possible column entries in a file: */
97 enum gmt_col_enum {
98 GMT_IS_NAN = 0, /* Returned by gmt_scanf routines when read fails */
99 GMT_IS_FLOAT = 1, /* Generic (double) data type, no special format */
100 GMT_IS_LAT = 2,
101 GMT_IS_LON = 4,
102 GMT_IS_GEO = 6, /* data type is either Lat or Lon */
103 GMT_IS_RELTIME = 8, /* For I/O of data in user units */
104 GMT_IS_ABSTIME = 16, /* For I/O of data in calendar+clock units */
105 GMT_IS_RATIME = 24, /* To see if time is either Relative or Absolute */
106 GMT_IS_ARGTIME = 32, /* To invoke gmt_scanf_argtime() */
107 GMT_IS_DURATION = 64, /* For elapsed time */
108 GMT_IS_DIMENSION = 128, /* A float with [optional] unit suffix, e.g., 7.5c, 0.4i; convert to inch */
109 GMT_IS_GEODIMENSION = 256, /* A float with [optional] geo-distance unit suffix, e.g., 7.5n, 0.4d; convert to km */
110 GMT_IS_AZIMUTH = 512, /* An angle to be converted via map projection to angle on map */
111 GMT_IS_ANGLE = 1024, /* An angle to be used as is */
112 GMT_IS_STRING = 2048, /* An text argument [internally used, not via -f] */
113 GMT_IS_UNKNOWN = 4096}; /* Input type is not knowable without -f */
114
115 /*! Various ways to report longitudes */
116 enum GMT_lon_enum {
117 GMT_IS_GIVEN_RANGE = 0, /* Report lon as is */
118 GMT_IS_0_TO_P360_RANGE = 1, /* Report 0 <= lon <= 360 */
119 GMT_IS_0_TO_P360 = 2, /* Report 0 <= lon < 360 */
120 GMT_IS_M360_TO_0_RANGE = 3, /* Report -360 <= lon <= 0 */
121 GMT_IS_M360_TO_0 = 4, /* Report -360 < lon <= 0 */
122 GMT_IS_M180_TO_P180_RANGE = 5, /* Report -180 <= lon <= +180 */
123 GMT_IS_M180_TO_P180 = 6, /* Report -180 <= lon < +180 */
124 GMT_IS_M180_TO_P270_RANGE = 7, /* Report -180 <= lon < +270 [GSHHG only] */
125 GMT_IGNORE_RANGE = 99}; /* Do not adjust longitudes at all */
126
127 /*! How to handle NaNs in records */
128 enum GMT_io_nan_enum {
129 GMT_IO_NAN_SKIP = 1, /* -s[cols] : Skip records with z == NaN in selected cols [col=2 only] */
130 GMT_IO_NAN_KEEP = 2, /* -s+r : Skip records with z != NaN */
131 GMT_IO_NAN_ANY = 4}; /* -s+a : Skip records with at least one NaN [skip records with all cols = NaN] */
132
133 /* Use POSIX functions ftello() and fseeko(), which represent the
134 * position using the off_t type: */
135 #ifdef HAVE_FSEEKO
136 # define fseek fseeko
137 #endif
138
139 #ifdef HAVE_FTELLO
140 # define ftell ftello
141 #endif
142
143 /* Windows 64-bit file access */
144 #if defined HAVE__FSEEKI64 && defined HAVE__FTELLI64
145 # define fseek _fseeki64
146 # define ftell _ftelli64
147 # ifndef SIZEOF_OFF_T
148 typedef __int64 off_t;
149 # else
150 # define off_t __int64
151 # endif /* SIZEOF_OFF_T */
152 #elif !defined SIZEOF_OFF_T /* HAVE__FSEEKI64 && HAVE__FTELLI64 */
153 typedef long off_t;
154 #endif /* HAVE__FSEEKI64 && HAVE__FTELLI64 */
155
156 #define gmt_M_fputs(line,fp) fputs(line,fp)
157 #define gmt_M_fread(ptr,size,nmemb,stream) fread(ptr,size,nmemb,stream)
158 #define gmt_M_fwrite(ptr,size,nmemb,stream) fwrite(ptr,size,nmemb,stream)
159 #define gmt_M_rewind(stream) rewind(stream)
160
161 /* Low-level structures used internally */
162
163 struct GMT_QUAD { /* Counting parameters needed to determine proper longitude min/max range */
164 uint64_t quad[4]; /* Keeps track if a longitude fell in these quadrants */
165 unsigned int range[2]; /* The format for reporting longitude */
166 double min[2], max[2]; /* Min/max values in either -180/180 or 0/360 counting */
167 };
168
169 struct GMT_CLOCK_IO {
170 bool skip; /* Only true if a format string was pass as NULL */
171 double f_sec_to_int; /* Scale to convert 0.xxx seconds to integer xxx (used for formatting) */
172 int order[3]; /* The relative order of hour, mn, sec in input clock string (-ve if unused) */
173 unsigned int n_sec_decimals; /* Number of digits in decimal seconds (0 for whole seconds) */
174 bool compact; /* true if we do not want leading zeros in items (e.g., 03) */
175 bool twelve_hr_clock; /* true if we are doing am/pm on output */
176 char ampm_suffix[2][GMT_LEN8]; /* Holds the strings to append am or pm */
177 char format[GMT_LEN64]; /* Actual C format used to output clock */
178 char delimiter[2][2]; /* Delimiter strings in clock, e.g. ":" */
179 };
180
181 struct GMT_DATE_IO {
182 bool skip; /* Only true if a format string was pass as NULL */
183 bool watch; /* Only true if input format has month last and is monthname */
184 int item_order[4]; /* The sequence year, month, day, day-of-year in input calendar string (-ve if unused) */
185 int item_pos[4]; /* Which position year, month, day, day-of-year has in calendar string (-ve if unused) */
186 bool Y2K_year; /* true if we have 2-digit years */
187 bool truncated_cal_is_ok; /* true if we have YMD or YJ order so smallest unit is to the right */
188 bool iso_calendar; /* true if we do ISO week calendar */
189 bool day_of_year; /* true if we do day-of-year rather than month/day */
190 bool mw_text; /* true if we must plot the month name or Week rather than a numeral */
191 bool compact; /* true if we do not want leading zeros in items (e.g., 03) */
192 char format[GMT_LEN64]; /* Actual C format used to input/output date */
193 char delimiter[2][2]; /* Delimiter strings in date, e.g. "-" */
194 };
195
196 struct GMT_GEO_IO { /* For geographic output and plotting */
197 double f_sec_to_int; /* Scale to convert 0.xxx seconds to integer xxx (used for formatting) */
198 unsigned int n_sec_decimals; /* Number of digits in decimal seconds (0 for whole seconds) */
199 unsigned int range; /* 0 for 0/360, 1 for -360/0, 2 for -180/+180 */
200 unsigned int wesn; /* 1 if we want sign encoded with suffix W, E, S, N, 2 if also want space before letter */
201 int order[3]; /* The relative order of degree, minute, seconds in form (-ve if unused) */
202 bool decimal; /* true if we want to use the FORMAT_FLOAT_OUT for decimal degrees only */
203 bool no_sign; /* true if we want absolute values (plot only) */
204 char x_format[GMT_LEN64]; /* Actual C format used to plot/output longitude */
205 char y_format[GMT_LEN64]; /* Actual C format used to plot/output latitude */
206 char delimiter[2][2]; /* Delimiter strings in date, e.g. "-" */
207 };
208
209 struct GMT_COL_INFO { /* Used by -i and input parsing */
210 unsigned int col; /* The column number in the order requested via -i */
211 unsigned int order; /* The initial order (0,1,...) but this will be sorted on col */
212 unsigned int convert; /* 2 if we must convert the data by log10, 1 if scale, offset */
213 double scale; /* Multiplier for raw in value */
214 double offset; /* Offset applied after multiplier */
215 };
216
217 struct GMT_COL_TYPE { /* Used by -b for binary formatting */
218 unsigned int type; /* Data type e.g., GMT_FLOAT */
219 off_t skip; /* Rather than read/write an item, jump |skip| bytes before (-ve) or after (+ve) read/write */
220 int (*io) (struct GMT_CTRL *, FILE *, uint64_t, double *); /* Pointer to the correct read or write function given type/swab */
221 };
222
223 /*! For selecting row ranges via -q */
224 struct GMT_ROW_RANGE {
225 int64_t first, last, inc;
226 };
227
228 /*! For selecting data ranges via -q */
229 struct GMT_DATA_RANGE {
230 double first, last;
231 };
232
233 struct GMT_IO { /* Used to process input data records */
234 void * (*input) (struct GMT_CTRL *, FILE *, uint64_t *, int *); /* Pointer to function reading ASCII or binary tables */
235 int (*output) (struct GMT_CTRL *, FILE *, uint64_t, double *, char *); /* Pointer to function writing ASCII or binary tables */
236 int (*read_item) (struct GMT_CTRL *, FILE *, uint64_t, double *); /* Pointer to function reading 1-col z tables in grd2xyz */
237 int (*write_item) (struct GMT_CTRL *, FILE *, uint64_t, double *); /* Pointer to function writing 1-col z tables in xyz2grd */
238 bool (*ogr_parser) (struct GMT_CTRL *, char *); /* Set to handle either header or data OGR records */
239 const char *scan_separators; /* List of characters that separates columns in ascii records */
240
241 unsigned int pad[4]; /* pad[0] = west, pad[1] = east, pad[2] = south, pad[3] = north */
242 unsigned int inc_code[2];
243 double curr_rec[GMT_MAX_COLUMNS]; /* The most recently processed data record */
244 double prev_rec[GMT_MAX_COLUMNS]; /* The previous data record */
245
246 bool multi_segments[2]; /* true if current ASCII input/output file has multiple segments */
247 bool skip_headers_on_outout; /* true when gmtconvert -T is set [or possibly other similar actions in the future] */
248 bool skip_bad_records; /* true if records where x and/or y are NaN or Inf */
249 bool give_report; /* true if functions should report how many bad records were skipped */
250 bool skip_duplicates; /* true if we should ignore duplicate x,y records */
251 bool variable_in_columns; /* true if we are reading ASCII records with variable numbers of columns */
252 bool need_previous; /* true if when parsing a record we need access to previous record values (e.g., for gap or duplicate checking) */
253 bool has_previous_rec; /* true if we have the previous record for this segment */
254 bool warn_geo_as_cartesion; /* true if we should warn if we read a record with geographic data while the expected format has not been set (i.e., no -J or -fg) */
255 bool first_rec; /* true when reading very first data record in a dataset */
256 bool trailing_text[2]; /* Default is to process training text unless turned off via -i, -o */
257 bool refreshed[2]; /* true after calling the hash_refresh function the first time, for hash and info, respectively */
258 bool new_data_list; /* true after when a server refresh yields an updated gmt_data_server.txt */
259 bool internet_error; /* true after failing to get hash table due to time-out */
260 bool grid_padding; /* If true we try to read two extra rows/cols from grids for BC purposes */
261 bool leave_as_jp2; /* If true we do not convert downloaded JP2 grids to NC right away, but as needed */
262 uint64_t seg_no; /* Number of current multi-segment in entire data set */
263 uint64_t seg_in_tbl_no; /* Number of current multi-segment in current table */
264 uint64_t n_clean_rec; /* Number of clean records read (not including skipped records or comments or blanks) */
265 uint64_t n_bad_records; /* Number of bad records encountered during i/o */
266 unsigned int tbl_no; /* Number of current table in entire data set */
267 unsigned int io_nan_ncols; /* Number of columns to consider for -s option */
268 unsigned int record_type[2]; /* Either GMT_READ|WRITE_DATA (0), GMT_READ|WRITE_TEXT (1), or GMT_READ|WRITE_MIXED (2) (for input and output) */
269 unsigned int n_numerical_cols; /* As it says */
270 unsigned int max_cols_to_read; /* For ascii input [all] */
271 unsigned int n_row_ranges[2]; /* How many row ranges given in -q */
272 enum GMT_ogr_status ogr; /* Tells us if current input source has OGR/GMT metadata (GMT_OGR_TRUE) or not (GMT_OGR_FALSE) or not set (GMT_OGR_UNKNOWN) */
273 unsigned int status; /* 0 All is ok
274 1 Current record is segment header
275 2 Mismatch between actual and expected fields
276 4 EOF
277 8 NaNs encountered in first 2/3 cols */
278 uint64_t rec_no; /* Number of current records (counts headers etc) in entire data set */
279 uint64_t rec_in_tbl_no; /* Number of current record (counts headers etc) in current table */
280 uint64_t data_record_number_in_set[2]; /* Number of current valid data record number in the whole dataset, for input and output. Headers not counted. */
281 uint64_t data_record_number_in_tbl[2]; /* Number of current valid data record number in the current table, for input and output. Headers not counted. */
282 uint64_t data_record_number_in_seg[2]; /* Number of current valid data record number in the current segment, for input and output. Headers not counted. */
283 int64_t curr_pos[2][4]; /* Keep track of current input/output table, segment, row, and table headers (for rec-by-rec action) */
284 char r_mode[4]; /* Current file opening mode for reading (r or rb) */
285 char w_mode[4]; /* Current file opening mode for writing (w or wb) */
286 char a_mode[4]; /* Current file append mode for writing (a+ or ab+) */
287 char curr_text[GMT_BUFSIZ]; /* Current ASCII record as it was read */
288 char curr_trailing_text[GMT_BUFSIZ]; /* Current text portion of current record (or NULL) */
289 char segment_header[GMT_BUFSIZ]; /* Current ASCII segment header */
290 char filename[2][PATH_MAX]; /* Current filenames (or <stdin>/<stdout>) */
291 #ifdef HAVE_GDAL
292 char tempfile[PATH_MAX]; /* Temporary file used to read - should be removed when closed */
293 #endif
294 char col_set[2][GMT_MAX_COLUMNS]; /* Keeps track of which columns have had their type set */
295 char *o_format[GMT_MAX_COLUMNS]; /* Custom output ASCII format to overrule format_float_out */
296 int ncid; /* NetCDF file ID (when opening netCDF file) */
297 int nvars; /* Number of requested variables in netCDF file */
298 uint64_t ncols; /* Number of total columns in netCDF file */
299 size_t t_index[GMT_MAX_COLUMNS][5]; /* Indices for cross-sections (netCDF only) */
300 size_t count[GMT_MAX_COLUMNS][5]; /* Count used for cross-sections (netCDF only) */
301 size_t ndim; /* Length of the column dimension */
302 size_t nrec; /* Record count */
303 struct GMT_DATE_IO date_input; /* Has all info on how to decode input dates */
304 struct GMT_DATE_IO date_output; /* Has all info on how to write output dates */
305 struct GMT_CLOCK_IO clock_input; /* Has all info on how to decode input clocks */
306 struct GMT_CLOCK_IO clock_output; /* Has all info on how to write output clocks */
307 struct GMT_GEO_IO geo; /* Has all the info on how to write geographic coordinates */
308 bool skip_if_NaN[GMT_MAX_COLUMNS]; /* true if column j cannot be NaN and we must skip the record */
309 bool col_skip[GMT_MAX_COLUMNS]; /* true of input column is to be ignored [Default reads all columns, but see -i] */
310 unsigned int col_type[2][GMT_MAX_COLUMNS]; /* Type of column on input and output: Time, geographic, etc, see GMT_IS_<TYPE> */
311 unsigned int io_nan_col[GMT_MAX_COLUMNS]; /* Array of columns to consider for -s option ir true */
312 struct GMT_COL_INFO col[2][GMT_MAX_COLUMNS]; /* Order of columns on input and output unless 0,1,2,3,... */
313 struct GMT_COL_TYPE fmt[2][GMT_MAX_COLUMNS]; /* Formatting information for binary data */
314 struct GMT_ROW_RANGE row_range[2][GMT_MAX_RANGES]; /* One or more ranges for input or output rows */
315 struct GMT_DATA_RANGE data_range[2][GMT_MAX_RANGES]; /* One or more ranges for input or output times */
316 struct GMT_OGR *OGR; /* Pointer to GMT/OGR info used during reading */
317 struct GMT_RECORD record; /* Current record with pointers to data columns and text */
318 double *nc_xarray, *nc_yarray; /* For grids with variable x,y arrays */
319 enum GMT_time_period cycle_operator;
320 bool cycle_interval; /* true for annual and weekly cycles */
321 int64_t cycle_col; /* The input column with periodic time [-1 meaning no such thing] */
322 double cycle_min; /* Min cyclical time requested via -R */
323 double cycle_max; /* Max cyclical time requested via -R */
324 double cycle_range; /* A full period of time */
325 double cycle_period; /* Custom period */
326 double cycle_phase; /* Custom phase */
327 /* The remainder are just pointers to memory allocated elsewhere */
328 int *grpid, *varid; /* Arrays of group and variable IDs (netCDF only) */
329 double *scale_factor; /* Array of scale factors (netCDF only) */
330 double *add_offset; /* Array of offsets (netCDF only) */
331 double *missing_value; /* Array of missing values (netCDF only) */
332 };
333
334 struct GMT_Z_IO { /* Used when processing z(x,y) table input when (x,y) is implicit */
335 bool swab; /* true if we must swap byte-order */
336 bool binary; /* true if we are reading/writing binary data */
337 bool input; /* true if we are reading, false if we are writing */
338 int x_step; /* +1 if logical x values increase to right, else -1 */
339 int y_step; /* +1 if logical y values increase upwards, else -1 */
340 unsigned int x_missing; /* 1 if a periodic (right) column is implicit (i.e., not stored) */
341 unsigned int y_missing; /* 1 if a periodic (top) row is implicit (i.e., not stored) */
342 unsigned int format; /* Either GMT_IS_COL_FORMAT or GMT_IS_ROW_FORMAT */
343 unsigned int x_period; /* length of a row in the input data ( <= n_columns, see x_missing) */
344 unsigned int y_period; /* length of a col in the input data ( <= n_rows, see y_missing) */
345 unsigned int start_col; /* First logical column in file */
346 unsigned int start_row; /* First logical row in file */
347 unsigned int gmt_i; /* Current column number in the GMT registered grid */
348 unsigned int gmt_j; /* Current row number in the GMT registered grid */
349 uint64_t n_expected; /* Number of data element expected to be read */
350 off_t skip; /* Number of bytes to skip before reading data */
351 uint64_t (*get_gmt_ij) (struct GMT_Z_IO *, struct GMT_GRID *, uint64_t); /* Pointer to function that converts running number to GMT ij */
352 };
353
354 struct GMT_PARSE_Z_IO { /* -Z[<flags>] */
355 bool active; /* true if selected */
356 bool not_grid; /* false if binary data file is a grid so organization matters */
357 bool repeat[2]; /* true if periodic in x|y and repeating row/col is missing */
358 enum GMT_swap_direction swab; /* k_swap_none = no byte swapping, k_swap_inswaps input, k_swap_out swaps output, combine to swap both */
359 off_t skip; /* Initial bytes to skip before reading */
360 char type; /* Data type flag A|a|c|u|h|H|i|I|l|L|f|d */
361 char format[2]; /* 2-char code describing row/col organization for grids */
362 };
363
364 struct GMT_PLOT_CALCLOCK {
365 struct GMT_DATE_IO date;
366 struct GMT_CLOCK_IO clock;
367 struct GMT_GEO_IO geo;
368 };
369
370 /* For the GMT_GRID container, see gmt_grdio.h */
371
372 #endif /* GMT_IO_H */
373