1 /*-------------------------------------------------------------------------
2 *
3 * tzparser.c
4 * Functions for parsing timezone offset files
5 *
6 * Note: this code is invoked from the check_hook for the GUC variable
7 * timezone_abbreviations. Therefore, it should report problems using
8 * GUC_check_errmsg() and related functions, and try to avoid throwing
9 * elog(ERROR). This is not completely bulletproof at present --- in
10 * particular out-of-memory will throw an error. Could probably fix with
11 * PG_TRY if necessary.
12 *
13 *
14 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
15 * Portions Copyright (c) 1994, Regents of the University of California
16 *
17 * IDENTIFICATION
18 * src/backend/utils/misc/tzparser.c
19 *
20 *-------------------------------------------------------------------------
21 */
22
23 #include "postgres.h"
24
25 #include <ctype.h>
26
27 #include "miscadmin.h"
28 #include "storage/fd.h"
29 #include "utils/guc.h"
30 #include "utils/memutils.h"
31 #include "utils/tzparser.h"
32
33
34 #define WHITESPACE " \t\n\r"
35
36 static bool validateTzEntry(tzEntry *tzentry);
37 static bool splitTzLine(const char *filename, int lineno,
38 char *line, tzEntry *tzentry);
39 static int addToArray(tzEntry **base, int *arraysize, int n,
40 tzEntry *entry, bool override);
41 static int ParseTzFile(const char *filename, int depth,
42 tzEntry **base, int *arraysize, int n);
43
44
45 /*
46 * Apply additional validation checks to a tzEntry
47 *
48 * Returns TRUE if OK, else false
49 */
50 static bool
validateTzEntry(tzEntry * tzentry)51 validateTzEntry(tzEntry *tzentry)
52 {
53 unsigned char *p;
54
55 /*
56 * Check restrictions imposed by datetkntbl storage format (see
57 * datetime.c)
58 */
59 if (strlen(tzentry->abbrev) > TOKMAXLEN)
60 {
61 GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
62 tzentry->abbrev, TOKMAXLEN,
63 tzentry->filename, tzentry->lineno);
64 return false;
65 }
66
67 /*
68 * Sanity-check the offset: shouldn't exceed 14 hours
69 */
70 if (tzentry->offset > 14 * 60 * 60 ||
71 tzentry->offset < -14 * 60 * 60)
72 {
73 GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
74 tzentry->offset,
75 tzentry->filename, tzentry->lineno);
76 return false;
77 }
78
79 /*
80 * Convert abbrev to lowercase (must match datetime.c's conversion)
81 */
82 for (p = (unsigned char *) tzentry->abbrev; *p; p++)
83 *p = pg_tolower(*p);
84
85 return true;
86 }
87
88 /*
89 * Attempt to parse the line as a timezone abbrev spec
90 *
91 * Valid formats are:
92 * name zone
93 * name offset dst
94 *
95 * Returns TRUE if OK, else false; data is stored in *tzentry
96 */
97 static bool
splitTzLine(const char * filename,int lineno,char * line,tzEntry * tzentry)98 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
99 {
100 char *abbrev;
101 char *offset;
102 char *offset_endptr;
103 char *remain;
104 char *is_dst;
105
106 tzentry->lineno = lineno;
107 tzentry->filename = filename;
108
109 abbrev = strtok(line, WHITESPACE);
110 if (!abbrev)
111 {
112 GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
113 filename, lineno);
114 return false;
115 }
116 tzentry->abbrev = pstrdup(abbrev);
117
118 offset = strtok(NULL, WHITESPACE);
119 if (!offset)
120 {
121 GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
122 filename, lineno);
123 return false;
124 }
125
126 /* We assume zone names don't begin with a digit or sign */
127 if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
128 {
129 tzentry->zone = NULL;
130 tzentry->offset = strtol(offset, &offset_endptr, 10);
131 if (offset_endptr == offset || *offset_endptr != '\0')
132 {
133 GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
134 filename, lineno);
135 return false;
136 }
137
138 is_dst = strtok(NULL, WHITESPACE);
139 if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
140 {
141 tzentry->is_dst = true;
142 remain = strtok(NULL, WHITESPACE);
143 }
144 else
145 {
146 /* there was no 'D' dst specifier */
147 tzentry->is_dst = false;
148 remain = is_dst;
149 }
150 }
151 else
152 {
153 /*
154 * Assume entry is a zone name. We do not try to validate it by
155 * looking up the zone, because that would force loading of a lot of
156 * zones that probably will never be used in the current session.
157 */
158 tzentry->zone = pstrdup(offset);
159 tzentry->offset = 0;
160 tzentry->is_dst = false;
161 remain = strtok(NULL, WHITESPACE);
162 }
163
164 if (!remain) /* no more non-whitespace chars */
165 return true;
166
167 if (remain[0] != '#') /* must be a comment */
168 {
169 GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
170 filename, lineno);
171 return false;
172 }
173 return true;
174 }
175
176 /*
177 * Insert entry into sorted array
178 *
179 * *base: base address of array (changeable if must enlarge array)
180 * *arraysize: allocated length of array (changeable if must enlarge array)
181 * n: current number of valid elements in array
182 * entry: new data to insert
183 * override: TRUE if OK to override
184 *
185 * Returns the new array length (new value for n), or -1 if error
186 */
187 static int
addToArray(tzEntry ** base,int * arraysize,int n,tzEntry * entry,bool override)188 addToArray(tzEntry **base, int *arraysize, int n,
189 tzEntry *entry, bool override)
190 {
191 tzEntry *arrayptr;
192 int low;
193 int high;
194
195 /*
196 * Search the array for a duplicate; as a useful side effect, the array is
197 * maintained in sorted order. We use strcmp() to ensure we match the
198 * sort order datetime.c expects.
199 */
200 arrayptr = *base;
201 low = 0;
202 high = n - 1;
203 while (low <= high)
204 {
205 int mid = (low + high) >> 1;
206 tzEntry *midptr = arrayptr + mid;
207 int cmp;
208
209 cmp = strcmp(entry->abbrev, midptr->abbrev);
210 if (cmp < 0)
211 high = mid - 1;
212 else if (cmp > 0)
213 low = mid + 1;
214 else
215 {
216 /*
217 * Found a duplicate entry; complain unless it's the same.
218 */
219 if ((midptr->zone == NULL && entry->zone == NULL &&
220 midptr->offset == entry->offset &&
221 midptr->is_dst == entry->is_dst) ||
222 (midptr->zone != NULL && entry->zone != NULL &&
223 strcmp(midptr->zone, entry->zone) == 0))
224 {
225 /* return unchanged array */
226 return n;
227 }
228 if (override)
229 {
230 /* same abbrev but something is different, override */
231 midptr->zone = entry->zone;
232 midptr->offset = entry->offset;
233 midptr->is_dst = entry->is_dst;
234 return n;
235 }
236 /* same abbrev but something is different, complain */
237 GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
238 entry->abbrev);
239 GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
240 midptr->filename, midptr->lineno,
241 entry->filename, entry->lineno);
242 return -1;
243 }
244 }
245
246 /*
247 * No match, insert at position "low".
248 */
249 if (n >= *arraysize)
250 {
251 *arraysize *= 2;
252 *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
253 }
254
255 arrayptr = *base + low;
256
257 memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
258
259 memcpy(arrayptr, entry, sizeof(tzEntry));
260
261 return n + 1;
262 }
263
264 /*
265 * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
266 *
267 * filename: user-specified file name (does not include path)
268 * depth: current recursion depth
269 * *base: array for results (changeable if must enlarge array)
270 * *arraysize: allocated length of array (changeable if must enlarge array)
271 * n: current number of valid elements in array
272 *
273 * Returns the new array length (new value for n), or -1 if error
274 */
275 static int
ParseTzFile(const char * filename,int depth,tzEntry ** base,int * arraysize,int n)276 ParseTzFile(const char *filename, int depth,
277 tzEntry **base, int *arraysize, int n)
278 {
279 char share_path[MAXPGPATH];
280 char file_path[MAXPGPATH];
281 FILE *tzFile;
282 char tzbuf[1024];
283 char *line;
284 tzEntry tzentry;
285 int lineno = 0;
286 bool override = false;
287 const char *p;
288
289 /*
290 * We enforce that the filename is all alpha characters. This may be
291 * overly restrictive, but we don't want to allow access to anything
292 * outside the timezonesets directory, so for instance '/' *must* be
293 * rejected.
294 */
295 for (p = filename; *p; p++)
296 {
297 if (!isalpha((unsigned char) *p))
298 {
299 /* at level 0, just use guc.c's regular "invalid value" message */
300 if (depth > 0)
301 GUC_check_errmsg("invalid time zone file name \"%s\"",
302 filename);
303 return -1;
304 }
305 }
306
307 /*
308 * The maximal recursion depth is a pretty arbitrary setting. It is hard
309 * to imagine that someone needs more than 3 levels so stick with this
310 * conservative setting until someone complains.
311 */
312 if (depth > 3)
313 {
314 GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
315 filename);
316 return -1;
317 }
318
319 get_share_path(my_exec_path, share_path);
320 snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
321 share_path, filename);
322 tzFile = AllocateFile(file_path, "r");
323 if (!tzFile)
324 {
325 /*
326 * Check to see if the problem is not the filename but the directory.
327 * This is worth troubling over because if the installation share/
328 * directory is missing or unreadable, this is likely to be the first
329 * place we notice a problem during postmaster startup.
330 */
331 int save_errno = errno;
332 DIR *tzdir;
333
334 snprintf(file_path, sizeof(file_path), "%s/timezonesets",
335 share_path);
336 tzdir = AllocateDir(file_path);
337 if (tzdir == NULL)
338 {
339 GUC_check_errmsg("could not open directory \"%s\": %m",
340 file_path);
341 GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
342 my_exec_path);
343 return -1;
344 }
345 FreeDir(tzdir);
346 errno = save_errno;
347
348 /*
349 * otherwise, if file doesn't exist and it's level 0, guc.c's
350 * complaint is enough
351 */
352 if (errno != ENOENT || depth > 0)
353 GUC_check_errmsg("could not read time zone file \"%s\": %m",
354 filename);
355
356 return -1;
357 }
358
359 while (!feof(tzFile))
360 {
361 lineno++;
362 if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
363 {
364 if (ferror(tzFile))
365 {
366 GUC_check_errmsg("could not read time zone file \"%s\": %m",
367 filename);
368 return -1;
369 }
370 /* else we're at EOF after all */
371 break;
372 }
373 if (strlen(tzbuf) == sizeof(tzbuf) - 1)
374 {
375 /* the line is too long for tzbuf */
376 GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
377 filename, lineno);
378 return -1;
379 }
380
381 /* skip over whitespace */
382 line = tzbuf;
383 while (*line && isspace((unsigned char) *line))
384 line++;
385
386 if (*line == '\0') /* empty line */
387 continue;
388 if (*line == '#') /* comment line */
389 continue;
390
391 if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
392 {
393 /* pstrdup so we can use filename in result data structure */
394 char *includeFile = pstrdup(line + strlen("@INCLUDE"));
395
396 includeFile = strtok(includeFile, WHITESPACE);
397 if (!includeFile || !*includeFile)
398 {
399 GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
400 filename, lineno);
401 return -1;
402 }
403 n = ParseTzFile(includeFile, depth + 1,
404 base, arraysize, n);
405 if (n < 0)
406 return -1;
407 continue;
408 }
409
410 if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
411 {
412 override = true;
413 continue;
414 }
415
416 if (!splitTzLine(filename, lineno, line, &tzentry))
417 return -1;
418 if (!validateTzEntry(&tzentry))
419 return -1;
420 n = addToArray(base, arraysize, n, &tzentry, override);
421 if (n < 0)
422 return -1;
423 }
424
425 FreeFile(tzFile);
426
427 return n;
428 }
429
430 /*
431 * load_tzoffsets --- read and parse the specified timezone offset file
432 *
433 * On success, return a filled-in TimeZoneAbbrevTable, which must have been
434 * malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
435 * and friends to give details of the problem.
436 */
437 TimeZoneAbbrevTable *
load_tzoffsets(const char * filename)438 load_tzoffsets(const char *filename)
439 {
440 TimeZoneAbbrevTable *result = NULL;
441 MemoryContext tmpContext;
442 MemoryContext oldContext;
443 tzEntry *array;
444 int arraysize;
445 int n;
446
447 /*
448 * Create a temp memory context to work in. This makes it easy to clean
449 * up afterwards.
450 */
451 tmpContext = AllocSetContextCreate(CurrentMemoryContext,
452 "TZParserMemory",
453 ALLOCSET_SMALL_SIZES);
454 oldContext = MemoryContextSwitchTo(tmpContext);
455
456 /* Initialize array at a reasonable size */
457 arraysize = 128;
458 array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
459
460 /* Parse the file(s) */
461 n = ParseTzFile(filename, 0, &array, &arraysize, 0);
462
463 /* If no errors so far, let datetime.c allocate memory & convert format */
464 if (n >= 0)
465 {
466 result = ConvertTimeZoneAbbrevs(array, n);
467 if (!result)
468 GUC_check_errmsg("out of memory");
469 }
470
471 /* Clean up */
472 MemoryContextSwitchTo(oldContext);
473 MemoryContextDelete(tmpContext);
474
475 return result;
476 }
477