1 /*-------------------------------------------------------------------------
2 *
3 * tzparser.c
4 * Functions for parsing timezone offset files
5 *
6 * Note: this code is invoked from the check_hook for the GUC variable
7 * timezone_abbreviations. Therefore, it should report problems using
8 * GUC_check_errmsg() and related functions, and try to avoid throwing
9 * elog(ERROR). This is not completely bulletproof at present --- in
10 * particular out-of-memory will throw an error. Could probably fix with
11 * PG_TRY if necessary.
12 *
13 *
14 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
15 * Portions Copyright (c) 1994, Regents of the University of California
16 *
17 * IDENTIFICATION
18 * src/backend/utils/misc/tzparser.c
19 *
20 *-------------------------------------------------------------------------
21 */
22
23 #include "postgres.h"
24
25 #include <ctype.h>
26
27 #include "miscadmin.h"
28 #include "storage/fd.h"
29 #include "utils/guc.h"
30 #include "utils/memutils.h"
31 #include "utils/tzparser.h"
32
33
34 #define WHITESPACE " \t\n\r"
35
36 static bool validateTzEntry(tzEntry *tzentry);
37 static bool splitTzLine(const char *filename, int lineno,
38 char *line, tzEntry *tzentry);
39 static int addToArray(tzEntry **base, int *arraysize, int n,
40 tzEntry *entry, bool override);
41 static int ParseTzFile(const char *filename, int depth,
42 tzEntry **base, int *arraysize, int n);
43
44
45 /*
46 * Apply additional validation checks to a tzEntry
47 *
48 * Returns true if OK, else false
49 */
50 static bool
validateTzEntry(tzEntry * tzentry)51 validateTzEntry(tzEntry *tzentry)
52 {
53 unsigned char *p;
54
55 /*
56 * Check restrictions imposed by datetktbl storage format (see datetime.c)
57 */
58 if (strlen(tzentry->abbrev) > TOKMAXLEN)
59 {
60 GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
61 tzentry->abbrev, TOKMAXLEN,
62 tzentry->filename, tzentry->lineno);
63 return false;
64 }
65
66 /*
67 * Sanity-check the offset: shouldn't exceed 14 hours
68 */
69 if (tzentry->offset > 14 * 60 * 60 ||
70 tzentry->offset < -14 * 60 * 60)
71 {
72 GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
73 tzentry->offset,
74 tzentry->filename, tzentry->lineno);
75 return false;
76 }
77
78 /*
79 * Convert abbrev to lowercase (must match datetime.c's conversion)
80 */
81 for (p = (unsigned char *) tzentry->abbrev; *p; p++)
82 *p = pg_tolower(*p);
83
84 return true;
85 }
86
87 /*
88 * Attempt to parse the line as a timezone abbrev spec
89 *
90 * Valid formats are:
91 * name zone
92 * name offset dst
93 *
94 * Returns true if OK, else false; data is stored in *tzentry
95 */
96 static bool
splitTzLine(const char * filename,int lineno,char * line,tzEntry * tzentry)97 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
98 {
99 char *abbrev;
100 char *offset;
101 char *offset_endptr;
102 char *remain;
103 char *is_dst;
104
105 tzentry->lineno = lineno;
106 tzentry->filename = filename;
107
108 abbrev = strtok(line, WHITESPACE);
109 if (!abbrev)
110 {
111 GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
112 filename, lineno);
113 return false;
114 }
115 tzentry->abbrev = pstrdup(abbrev);
116
117 offset = strtok(NULL, WHITESPACE);
118 if (!offset)
119 {
120 GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
121 filename, lineno);
122 return false;
123 }
124
125 /* We assume zone names don't begin with a digit or sign */
126 if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
127 {
128 tzentry->zone = NULL;
129 tzentry->offset = strtol(offset, &offset_endptr, 10);
130 if (offset_endptr == offset || *offset_endptr != '\0')
131 {
132 GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
133 filename, lineno);
134 return false;
135 }
136
137 is_dst = strtok(NULL, WHITESPACE);
138 if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
139 {
140 tzentry->is_dst = true;
141 remain = strtok(NULL, WHITESPACE);
142 }
143 else
144 {
145 /* there was no 'D' dst specifier */
146 tzentry->is_dst = false;
147 remain = is_dst;
148 }
149 }
150 else
151 {
152 /*
153 * Assume entry is a zone name. We do not try to validate it by
154 * looking up the zone, because that would force loading of a lot of
155 * zones that probably will never be used in the current session.
156 */
157 tzentry->zone = pstrdup(offset);
158 tzentry->offset = 0;
159 tzentry->is_dst = false;
160 remain = strtok(NULL, WHITESPACE);
161 }
162
163 if (!remain) /* no more non-whitespace chars */
164 return true;
165
166 if (remain[0] != '#') /* must be a comment */
167 {
168 GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
169 filename, lineno);
170 return false;
171 }
172 return true;
173 }
174
175 /*
176 * Insert entry into sorted array
177 *
178 * *base: base address of array (changeable if must enlarge array)
179 * *arraysize: allocated length of array (changeable if must enlarge array)
180 * n: current number of valid elements in array
181 * entry: new data to insert
182 * override: true if OK to override
183 *
184 * Returns the new array length (new value for n), or -1 if error
185 */
186 static int
addToArray(tzEntry ** base,int * arraysize,int n,tzEntry * entry,bool override)187 addToArray(tzEntry **base, int *arraysize, int n,
188 tzEntry *entry, bool override)
189 {
190 tzEntry *arrayptr;
191 int low;
192 int high;
193
194 /*
195 * Search the array for a duplicate; as a useful side effect, the array is
196 * maintained in sorted order. We use strcmp() to ensure we match the
197 * sort order datetime.c expects.
198 */
199 arrayptr = *base;
200 low = 0;
201 high = n - 1;
202 while (low <= high)
203 {
204 int mid = (low + high) >> 1;
205 tzEntry *midptr = arrayptr + mid;
206 int cmp;
207
208 cmp = strcmp(entry->abbrev, midptr->abbrev);
209 if (cmp < 0)
210 high = mid - 1;
211 else if (cmp > 0)
212 low = mid + 1;
213 else
214 {
215 /*
216 * Found a duplicate entry; complain unless it's the same.
217 */
218 if ((midptr->zone == NULL && entry->zone == NULL &&
219 midptr->offset == entry->offset &&
220 midptr->is_dst == entry->is_dst) ||
221 (midptr->zone != NULL && entry->zone != NULL &&
222 strcmp(midptr->zone, entry->zone) == 0))
223 {
224 /* return unchanged array */
225 return n;
226 }
227 if (override)
228 {
229 /* same abbrev but something is different, override */
230 midptr->zone = entry->zone;
231 midptr->offset = entry->offset;
232 midptr->is_dst = entry->is_dst;
233 return n;
234 }
235 /* same abbrev but something is different, complain */
236 GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
237 entry->abbrev);
238 GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
239 midptr->filename, midptr->lineno,
240 entry->filename, entry->lineno);
241 return -1;
242 }
243 }
244
245 /*
246 * No match, insert at position "low".
247 */
248 if (n >= *arraysize)
249 {
250 *arraysize *= 2;
251 *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
252 }
253
254 arrayptr = *base + low;
255
256 memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
257
258 memcpy(arrayptr, entry, sizeof(tzEntry));
259
260 return n + 1;
261 }
262
263 /*
264 * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
265 *
266 * filename: user-specified file name (does not include path)
267 * depth: current recursion depth
268 * *base: array for results (changeable if must enlarge array)
269 * *arraysize: allocated length of array (changeable if must enlarge array)
270 * n: current number of valid elements in array
271 *
272 * Returns the new array length (new value for n), or -1 if error
273 */
274 static int
ParseTzFile(const char * filename,int depth,tzEntry ** base,int * arraysize,int n)275 ParseTzFile(const char *filename, int depth,
276 tzEntry **base, int *arraysize, int n)
277 {
278 char share_path[MAXPGPATH];
279 char file_path[MAXPGPATH];
280 FILE *tzFile;
281 char tzbuf[1024];
282 char *line;
283 tzEntry tzentry;
284 int lineno = 0;
285 bool override = false;
286 const char *p;
287
288 /*
289 * We enforce that the filename is all alpha characters. This may be
290 * overly restrictive, but we don't want to allow access to anything
291 * outside the timezonesets directory, so for instance '/' *must* be
292 * rejected.
293 */
294 for (p = filename; *p; p++)
295 {
296 if (!isalpha((unsigned char) *p))
297 {
298 /* at level 0, just use guc.c's regular "invalid value" message */
299 if (depth > 0)
300 GUC_check_errmsg("invalid time zone file name \"%s\"",
301 filename);
302 return -1;
303 }
304 }
305
306 /*
307 * The maximal recursion depth is a pretty arbitrary setting. It is hard
308 * to imagine that someone needs more than 3 levels so stick with this
309 * conservative setting until someone complains.
310 */
311 if (depth > 3)
312 {
313 GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
314 filename);
315 return -1;
316 }
317
318 get_share_path(my_exec_path, share_path);
319 snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
320 share_path, filename);
321 tzFile = AllocateFile(file_path, "r");
322 if (!tzFile)
323 {
324 /*
325 * Check to see if the problem is not the filename but the directory.
326 * This is worth troubling over because if the installation share/
327 * directory is missing or unreadable, this is likely to be the first
328 * place we notice a problem during postmaster startup.
329 */
330 int save_errno = errno;
331 DIR *tzdir;
332
333 snprintf(file_path, sizeof(file_path), "%s/timezonesets",
334 share_path);
335 tzdir = AllocateDir(file_path);
336 if (tzdir == NULL)
337 {
338 GUC_check_errmsg("could not open directory \"%s\": %m",
339 file_path);
340 GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
341 my_exec_path);
342 return -1;
343 }
344 FreeDir(tzdir);
345 errno = save_errno;
346
347 /*
348 * otherwise, if file doesn't exist and it's level 0, guc.c's
349 * complaint is enough
350 */
351 if (errno != ENOENT || depth > 0)
352 GUC_check_errmsg("could not read time zone file \"%s\": %m",
353 filename);
354
355 return -1;
356 }
357
358 while (!feof(tzFile))
359 {
360 lineno++;
361 if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
362 {
363 if (ferror(tzFile))
364 {
365 GUC_check_errmsg("could not read time zone file \"%s\": %m",
366 filename);
367 return -1;
368 }
369 /* else we're at EOF after all */
370 break;
371 }
372 if (strlen(tzbuf) == sizeof(tzbuf) - 1)
373 {
374 /* the line is too long for tzbuf */
375 GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
376 filename, lineno);
377 return -1;
378 }
379
380 /* skip over whitespace */
381 line = tzbuf;
382 while (*line && isspace((unsigned char) *line))
383 line++;
384
385 if (*line == '\0') /* empty line */
386 continue;
387 if (*line == '#') /* comment line */
388 continue;
389
390 if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
391 {
392 /* pstrdup so we can use filename in result data structure */
393 char *includeFile = pstrdup(line + strlen("@INCLUDE"));
394
395 includeFile = strtok(includeFile, WHITESPACE);
396 if (!includeFile || !*includeFile)
397 {
398 GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
399 filename, lineno);
400 return -1;
401 }
402 n = ParseTzFile(includeFile, depth + 1,
403 base, arraysize, n);
404 if (n < 0)
405 return -1;
406 continue;
407 }
408
409 if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
410 {
411 override = true;
412 continue;
413 }
414
415 if (!splitTzLine(filename, lineno, line, &tzentry))
416 return -1;
417 if (!validateTzEntry(&tzentry))
418 return -1;
419 n = addToArray(base, arraysize, n, &tzentry, override);
420 if (n < 0)
421 return -1;
422 }
423
424 FreeFile(tzFile);
425
426 return n;
427 }
428
429 /*
430 * load_tzoffsets --- read and parse the specified timezone offset file
431 *
432 * On success, return a filled-in TimeZoneAbbrevTable, which must have been
433 * malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
434 * and friends to give details of the problem.
435 */
436 TimeZoneAbbrevTable *
load_tzoffsets(const char * filename)437 load_tzoffsets(const char *filename)
438 {
439 TimeZoneAbbrevTable *result = NULL;
440 MemoryContext tmpContext;
441 MemoryContext oldContext;
442 tzEntry *array;
443 int arraysize;
444 int n;
445
446 /*
447 * Create a temp memory context to work in. This makes it easy to clean
448 * up afterwards.
449 */
450 tmpContext = AllocSetContextCreate(CurrentMemoryContext,
451 "TZParserMemory",
452 ALLOCSET_SMALL_SIZES);
453 oldContext = MemoryContextSwitchTo(tmpContext);
454
455 /* Initialize array at a reasonable size */
456 arraysize = 128;
457 array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
458
459 /* Parse the file(s) */
460 n = ParseTzFile(filename, 0, &array, &arraysize, 0);
461
462 /* If no errors so far, let datetime.c allocate memory & convert format */
463 if (n >= 0)
464 {
465 result = ConvertTimeZoneAbbrevs(array, n);
466 if (!result)
467 GUC_check_errmsg("out of memory");
468 }
469
470 /* Clean up */
471 MemoryContextSwitchTo(oldContext);
472 MemoryContextDelete(tmpContext);
473
474 return result;
475 }
476